reporead: implement delayed parsing of files data
This gives us some large memory savings in python due to the internal storage of Unicode strings vs. byte strings, as well as saving us processing time up front for filelist data we are never going to have to actually use. Signed-off-by: Dan McGee <dan@archlinux.org>
This commit is contained in:
parent
8e8e3d7aa7
commit
ac157895f1
@ -82,8 +82,7 @@ class RepoPackage(object):
|
||||
'md5sum', 'sha256sum', 'url', 'packager' )
|
||||
number = ( 'csize', 'isize' )
|
||||
collections = ( 'depends', 'optdepends', 'makedepends', 'checkdepends',
|
||||
'conflicts', 'provides', 'replaces', 'groups', 'license',
|
||||
'files' )
|
||||
'conflicts', 'provides', 'replaces', 'groups', 'license')
|
||||
|
||||
def __init__(self, repo):
|
||||
self.repo = repo
|
||||
@ -98,7 +97,6 @@ def __init__(self, repo):
|
||||
setattr(self, k, ())
|
||||
self.builddate = None
|
||||
self.files = None
|
||||
self.has_files = False
|
||||
|
||||
def populate(self, values):
|
||||
for k, v in values.iteritems():
|
||||
@ -120,13 +118,21 @@ def populate(self, values):
|
||||
logger.warning(
|
||||
'Package %s had unparsable build date %s',
|
||||
self.name, v[0])
|
||||
elif k == 'files':
|
||||
self.files = tuple(v)
|
||||
self.has_files = True
|
||||
else:
|
||||
# anything left in collections
|
||||
setattr(self, k, tuple(v))
|
||||
|
||||
@property
|
||||
def files_list(self):
|
||||
data_file = io.TextIOWrapper(io.BytesIO(self.files), encoding='UTF-8')
|
||||
try:
|
||||
info = parse_info(data_file)
|
||||
except UnicodeDecodeError:
|
||||
logger.warn("Could not correctly decode files list for %s",
|
||||
self.name)
|
||||
return None
|
||||
return info['files']
|
||||
|
||||
@property
|
||||
def full_version(self):
|
||||
'''Very similar to the main.models.Package method.'''
|
||||
@ -291,15 +297,18 @@ def populate_files(dbpkg, repopkg, force=False):
|
||||
return
|
||||
|
||||
# only delete files if we are reading a DB that contains them
|
||||
if repopkg.has_files:
|
||||
if repopkg.files:
|
||||
files = repopkg.files_list
|
||||
# we had files data, but it couldn't be parsed, so skip
|
||||
if not files:
|
||||
return
|
||||
delete_pkg_files(dbpkg)
|
||||
logger.info("adding %d files for package %s",
|
||||
len(repopkg.files), dbpkg.pkgname)
|
||||
len(files), dbpkg.pkgname)
|
||||
pkg_files = []
|
||||
# sort in normal alpha-order that pacman uses, rather than makepkg's
|
||||
# default breadth-first, directory-first ordering
|
||||
files = sorted(repopkg.files)
|
||||
for f in files:
|
||||
for f in sorted(files):
|
||||
if '/' in f:
|
||||
dirname, filename = f.rsplit('/', 1)
|
||||
dirname += '/'
|
||||
@ -507,24 +516,27 @@ def parse_repo(repopath):
|
||||
|
||||
repodb = tarfile.open(repopath, "r")
|
||||
logger.debug("Starting package parsing")
|
||||
dbfiles = ('desc', 'depends', 'files')
|
||||
newpkg = lambda: RepoPackage(reponame)
|
||||
pkgs = defaultdict(newpkg)
|
||||
for tarinfo in repodb.getmembers():
|
||||
if tarinfo.isreg():
|
||||
pkgid, fname = os.path.split(tarinfo.name)
|
||||
if fname not in dbfiles:
|
||||
continue
|
||||
data_file = repodb.extractfile(tarinfo)
|
||||
data_file = io.TextIOWrapper(io.BytesIO(data_file.read()),
|
||||
encoding='UTF-8')
|
||||
try:
|
||||
pkgs[pkgid].populate(parse_info(data_file))
|
||||
except UnicodeDecodeError:
|
||||
logger.warn("Could not correctly decode %s, skipping file",
|
||||
tarinfo.name)
|
||||
data_file.close()
|
||||
del data_file
|
||||
if fname == 'files':
|
||||
# don't parse yet for speed and memory consumption reasons
|
||||
files_data = repodb.extractfile(tarinfo)
|
||||
pkgs[pkgid].files = files_data.read()
|
||||
del files_data
|
||||
elif fname in ('desc', 'depends'):
|
||||
data_file = repodb.extractfile(tarinfo)
|
||||
data_file = io.TextIOWrapper(io.BytesIO(data_file.read()),
|
||||
encoding='UTF-8')
|
||||
try:
|
||||
pkgs[pkgid].populate(parse_info(data_file))
|
||||
except UnicodeDecodeError:
|
||||
logger.warn("Could not correctly decode %s, skipping file",
|
||||
tarinfo.name)
|
||||
data_file.close()
|
||||
del data_file
|
||||
|
||||
logger.debug("Done parsing file %s/%s", pkgid, fname)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user