Skip to content

Commit

Permalink
portage.cache: write md5 instead of mtime (bug 568934)
Browse files Browse the repository at this point in the history
Change cache modules to write md5 in cache entries, instead of mtime.
Since portage-2.2.27, the relevant cache modules have had the ability
to read cache entries containing either md5 or mtime, therefore this
change is backward-compatible with portage-2.2.27 and later.

Also fix the reconstruct_eclasses function to raise CacheCorruption
when the specified chf_type is md5 and the cache entry contains mtime
data, and optimize __getitem__ to skip reconstruct_eclasses calls when
the entry appears to have a different chf_type.

X-Gentoo-Bug: 568934
X-Gentoo-Bug-url: https://bugs.gentoo.org/show_bug.cgi?id=568934
Acked-by: Alexander Berntsen <bernalex@gentoo.org>
  • Loading branch information
zmedico committed Jul 13, 2016
1 parent e2d88ef commit 9abbda7
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 10 deletions.
4 changes: 2 additions & 2 deletions pym/portage/cache/anydbm.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@

class database(fs_template.FsBased):

validation_chf = 'mtime'
chf_types = ('mtime', 'md5')
validation_chf = 'md5'
chf_types = ('md5', 'mtime')

autocommits = True
cleanse_keys = True
Expand Down
4 changes: 2 additions & 2 deletions pym/portage/cache/flat_hash.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,5 +163,5 @@ class md5_database(database):


class mtime_md5_database(database):
validation_chf = 'mtime'
chf_types = ('mtime', 'md5')
validation_chf = 'md5'
chf_types = ('md5', 'mtime')
4 changes: 2 additions & 2 deletions pym/portage/cache/sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@

class database(fs_template.FsBased):

validation_chf = 'mtime'
chf_types = ('mtime', 'md5')
validation_chf = 'md5'
chf_types = ('md5', 'mtime')

autocommits = False
synchronous = False
Expand Down
36 changes: 32 additions & 4 deletions pym/portage/cache/template.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,15 @@ def __getitem__(self, cpv):

if self.serialize_eclasses and "_eclasses_" in d:
for chf_type in chf_types:
if '_%s_' % chf_type not in d:
# Skip the reconstruct_eclasses call, since it's
# a waste of time if it contains a different chf_type
# than the current one. In the past, it was possible
# for reconstruct_eclasses called with chf_type='md5'
# to "successfully" return invalid data here, because
# it was unable to distinguish between md5 data and
# mtime data.
continue
try:
d["_eclasses_"] = reconstruct_eclasses(cpv, d["_eclasses_"],
chf_type, paths=self.store_eclass_paths)
Expand All @@ -62,6 +71,9 @@ def __getitem__(self, cpv):
raise
else:
break
else:
raise cache_errors.CacheCorruption(cpv,
'entry does not contain a recognized chf_type')

elif "_eclasses_" not in d:
d["_eclasses_"] = {}
Expand Down Expand Up @@ -310,16 +322,31 @@ def serialize_eclasses(eclass_dict, chf_type='mtime', paths=True):
for k, v in sorted(eclass_dict.items(), key=_keysorter))


def _md5_deserializer(md5):
"""
Without this validation, it's possible for reconstruct_eclasses to
mistakenly interpret mtime data as md5 data, and return an invalid
data structure containing strings where ints are expected.
"""
if len(md5) != 32:
raise ValueError('expected 32 hex digits')
return md5


_chf_deserializers = {
'md5': _md5_deserializer,
'mtime': long,
}


def reconstruct_eclasses(cpv, eclass_string, chf_type='mtime', paths=True):
"""returns a dict when handed a string generated by serialize_eclasses"""
eclasses = eclass_string.rstrip().lstrip().split("\t")
if eclasses == [""]:
# occasionally this occurs in the fs backends. they suck.
return {}

converter = _unicode
if chf_type == 'mtime':
converter = long
converter = _chf_deserializers.get(chf_type, lambda x: x)

if paths:
if len(eclasses) % 3 != 0:
Expand All @@ -340,6 +367,7 @@ def reconstruct_eclasses(cpv, eclass_string, chf_type='mtime', paths=True):
raise cache_errors.CacheCorruption(cpv,
"_eclasses_ was of invalid len %i" % len(eclasses))
except ValueError:
raise cache_errors.CacheCorruption(cpv, "_eclasses_ mtime conversion to long failed")
raise cache_errors.CacheCorruption(cpv,
"_eclasses_ not valid for chf_type {}".format(chf_type))
del eclasses
return d

0 comments on commit 9abbda7

Please sign in to comment.