Skip to content

Commit

Permalink
Implement collect_related_packages() / `deb-pkg-tools --collect'
Browse files Browse the repository at this point in the history
  • Loading branch information
xolox committed May 18, 2014
1 parent 0a85866 commit 7d6fbe6
Show file tree
Hide file tree
Showing 6 changed files with 218 additions and 45 deletions.
2 changes: 1 addition & 1 deletion deb_pkg_tools/__init__.py
Expand Up @@ -11,7 +11,7 @@
logger = logging.getLogger(__name__)

# Semi-standard module versioning.
__version__ = '1.16'
__version__ = '1.17'

# The following non-essential Debian packages need to be
# installed in order for deb-pkg-tools to work properly.
Expand Down
54 changes: 46 additions & 8 deletions deb_pkg_tools/cli.py
@@ -1,15 +1,18 @@
# Debian packaging tools: Command line interface
#
# Author: Peter Odding <peter@peterodding.com>
# Last Change: May 10, 2014
# Last Change: May 18, 2014
# URL: https://github.com/xolox/python-deb-pkg-tools

"""
Usage: deb-pkg-tools [OPTIONS]
Usage: deb-pkg-tools [OPTIONS] ...
Supported options:
-i, --inspect=FILE inspect the metadata in a *.deb archive
-c, --collect=DIR copy the package archive(s) given as positional
arguments and all packages archives required by
the given package archives into a directory
-p, --patch=FILE patch fields into an existing control file
-s, --set=LINE a line to patch into the control file
(syntax: "Name: Value")
Expand All @@ -32,15 +35,17 @@
import getopt
import logging
import os.path
import shutil
import sys

# External dependencies.
import coloredlogs
from humanfriendly import format_path, format_size
from humanfriendly import format_path, format_size, pluralize

# Modules included in our package.
from deb_pkg_tools.control import patch_control_file
from deb_pkg_tools.package import inspect_package, build_package
from deb_pkg_tools.package import (build_package, collect_related_packages,
inspect_package, parse_filename)
from deb_pkg_tools.repo import (update_repository,
activate_repository,
deactivate_repository,
Expand All @@ -61,13 +66,17 @@ def main():
control_fields = {}
# Parse the command line options.
try:
long_options = ['inspect=', 'patch=', 'set=', 'build=', 'update-repo=',
'activate-repo=', 'deactivate-repo=', 'with-repo=',
'verbose', 'help']
options, arguments = getopt.getopt(sys.argv[1:], 'i:p:s:b:u:a:d:w:vh', long_options)
options, arguments = getopt.getopt(sys.argv[1:], 'i:c:p:s:b:u:a:d:w:vh', [
'inspect=', 'collect=', 'patch=', 'set=', 'build=', 'update-repo=',
'activate-repo=', 'deactivate-repo=', 'with-repo=', 'verbose',
'help'
])
for option, value in options:
if option in ('-i', '--inspect'):
actions.append(functools.partial(show_package_metadata, value))
elif option in ('-c', '--collect'):
actions.append(functools.partial(collect_packages, arguments, check_directory(value)))
arguments = []
elif option in ('-p', '--patch'):
control_file = os.path.abspath(value)
assert os.path.isfile(control_file), "Control file does not exist!"
Expand Down Expand Up @@ -125,6 +134,35 @@ def show_package_metadata(archive):
pathname += ' -> ' + entry.target
print(entry.permissions, '%s/%s' % (entry.owner, entry.group), size, entry.modified, pathname)

def collect_packages(archives, directory):
related_archives = set()
for given_filename in archives:
related_archives.add(parse_filename(given_filename))
related_archives.update(collect_related_packages(given_filename))
if related_archives:
related_archives = sorted(related_archives)
pluralized = pluralize(len(related_archives), "package archive", "package archives")
print("Found %s:" % pluralized)
for file_to_collect in related_archives:
print(" - %s" % format_path(file_to_collect.filename))
try:
# Ask permission to copy the file(s).
prompt = "Copy %s to %s? [Y/n] " % (pluralized, format_path(directory))
assert raw_input(prompt).lower() in ('', 'y', 'yes')
# Copy the file(s).
for file_to_collect in related_archives:
copy_from = file_to_collect.filename
copy_to = os.path.join(directory, os.path.basename(copy_from))
logger.debug("Copying %s -> %s ..", format_path(copy_from), format_path(copy_to))
shutil.copy(copy_from, copy_to)
logger.info("Done! Copied %s to %s.", pluralized, format_path(directory))
except (AssertionError, KeyboardInterrupt) as e:
if isinstance(e, KeyboardInterrupt):
# Control-C interrupts the prompt without emitting a newline. We'll
# print one manually so the console output doesn't look funny.
sys.stderr.write('\n')
logger.warning("Not copying archive(s) to %s! (aborted by user)", format_path(directory))

def check_directory(argument):
"""
Make sure a command line argument points to an existing directory.
Expand Down
19 changes: 7 additions & 12 deletions deb_pkg_tools/deps.py
Expand Up @@ -15,8 +15,8 @@
:py:func:`RelationshipSet.matches()` function can be used to evaluate
relationship expressions. The relationship parsing is implemented in pure
Python (no external dependencies) but relationship evaluation uses the external
command ``dpkg --compare-versions`` to ensure compatibility with apt's version
comparison algorithm.
command ``dpkg --compare-versions`` to ensure compatibility with Debian's
package version comparison algorithm.
To give you an impression of how to use this module:
Expand Down Expand Up @@ -56,11 +56,9 @@
import logging
import re

# External dependencies.
from executor import execute

# Modules included in our package.
from deb_pkg_tools.utils import OrderedObject, str_compatible, unicode
from deb_pkg_tools.utils import (dpkg_compare_versions, OrderedObject,
str_compatible, unicode)

# Initialize a logger.
logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -216,12 +214,12 @@ def __init__(self, name, operator, version):
self.name = name
self.operator = operator
self.version = version
self._evaluation_cache = {}

def matches(self, name, version=None):
"""
Check if the relationship matches a given package and version. Uses the
external command ``dpkg --compare-versions`` to compare versions.
external command ``dpkg --compare-versions`` to ensure compatibility
with Debian's package version comparison algorithm.
:param name: The name of a package (a string).
:param version: The version number of a package (a string, optional).
Expand All @@ -230,10 +228,7 @@ def matches(self, name, version=None):
"""
if self.name == name:
if version:
key = (name, version)
if key not in self._evaluation_cache:
self._evaluation_cache[key] = execute('dpkg', '--compare-versions', version, self.operator, self.version, check=False, logger=logger)
return self._evaluation_cache[key]
return dpkg_compare_versions(version, self.operator, self.version)
else:
return False

Expand Down
130 changes: 117 additions & 13 deletions deb_pkg_tools/package.py
@@ -1,7 +1,7 @@
# Debian packaging tools: Package manipulation.
#
# Author: Peter Odding <peter@peterodding.com>
# Last Change: May 16, 2014
# Last Change: May 18, 2014
# URL: https://github.com/xolox/python-deb-pkg-tools

"""
Expand All @@ -15,6 +15,8 @@
# Standard library modules.
import collections
import fnmatch
import functools
import glob
import logging
import os.path
import pipes
Expand All @@ -36,7 +38,8 @@
from humanfriendly import format_path, pluralize

# Modules included in our package.
from deb_pkg_tools.control import patch_control_file
from deb_pkg_tools.control import parse_control_fields, patch_control_file
from deb_pkg_tools.utils import dpkg_compare_versions

# Initialize a logger.
logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -72,24 +75,35 @@ def parse_filename(filename):
>>> from deb_pkg_tools.package import parse_filename
>>> components = parse_filename('/var/cache/apt/archives/python2.7_2.7.3-0ubuntu3.4_amd64.deb')
>>> print components
PackageFile(name='python2.7', version='2.7.3-0ubuntu3.4', architecture='amd64')
PackageFile(filename='/var/cache/apt/archives/python2.7_2.7.3-0ubuntu3.4_amd64.deb',
name='python2.7', version='2.7.3-0ubuntu3.4', architecture='amd64')
:param filename: The pathname of a ``*.deb`` archive (a string).
:returns: A :py:class:`PackageFile` object.
"""
basename, extension = os.path.splitext(os.path.basename(filename))
if isinstance(filename, PackageFile):
return filename
pathname = os.path.abspath(filename)
filename = os.path.basename(pathname)
basename, extension = os.path.splitext(filename)
if extension != '.deb':
raise ValueError("Refusing to parse filename that doesn't have `.deb' extension! (%r)" % filename)
raise ValueError("Refusing to parse filename that doesn't have `.deb' extension! (%r)" % pathname)
components = basename.split('_')
if len(components) != 3:
raise ValueError("Filename doesn't have three underscore separated components! (%r)" % filename)
return PackageFile(*components)
raise ValueError("Filename doesn't have three underscore separated components! (%r)" % pathname)
return PackageFile(pathname, *components)

@functools.total_ordering
class PackageFile(collections.namedtuple('PackageFile', 'filename, name, version, architecture')):

class PackageFile(collections.namedtuple('PackageFile', 'name, version, architecture')):
"""
The function :py:func:`parse_filename()` reports the fields of a package
archive's filename as a named tuple. Here are the fields supported by those
named tuples:
archive's filename as a :py:class:`PackageFile` object (a named tuple).
Here are the fields supported by these named tuples:
.. py:attribute:: filename
The absolute pathname of the package archive (a string).
.. py:attribute:: name
Expand All @@ -102,7 +116,97 @@ class PackageFile(collections.namedtuple('PackageFile', 'name, version, architec
.. py:attribute:: architecture
The architecture of the package (a string).
:py:class:`PackageFile` objects support sorting according to Debian's
package version comparison algorithm as implemented in ``dpkg
--compare-versions``.
"""

def __lt__(self, other):
"""
Enables rich comparison between :py:class:`PackageFile` objects.
"""
if type(self) is type(other):
if self.name < other.name:
return True
elif self.name == other.name:
return dpkg_compare_versions(self.version, '<<', other.version)

def collect_related_packages(filename):
"""
Collect the package archive(s) related to the given package archive. This
works by parsing and resolving the dependencies of the given package to
filenames of package archives, then parsing and resolving the dependencies
of those package archives, etc. until no more relationships can be resolved
to existing package archives.
:param filename: The filename of an existing ``*.deb`` archive (a string).
:returns: A list of :py:class:`PackageFile` objects.
This function is used to implement the ``deb-pkg-tools --collect`` command:
.. code-block:: sh
$ deb-pkg-tools -c /tmp python-deb-pkg-tools_1.13-1_all.deb
2014-05-18 08:33:42 deb_pkg_tools.package INFO Collecting packages related to ~/python-deb-pkg-tools_1.13-1_all.deb ..
2014-05-18 08:33:42 deb_pkg_tools.package INFO Scanning ~/python-deb-pkg-tools_1.13-1_all.deb ..
2014-05-18 08:33:42 deb_pkg_tools.package INFO Scanning ~/python-coloredlogs_0.4.8-1_all.deb ..
2014-05-18 08:33:42 deb_pkg_tools.package INFO Scanning ~/python-chardet_2.2.1-1_all.deb ..
2014-05-18 08:33:42 deb_pkg_tools.package INFO Scanning ~/python-humanfriendly_1.7.1-1_all.deb ..
2014-05-18 08:33:42 deb_pkg_tools.package INFO Scanning ~/python-debian_0.1.21-1_all.deb ..
Found 5 package archives:
- ~/python-chardet_2.2.1-1_all.deb
- ~/python-coloredlogs_0.4.8-1_all.deb
- ~/python-deb-pkg-tools_1.13-1_all.deb
- ~/python-humanfriendly_1.7.1-1_all.deb
- ~/python-debian_0.1.21-1_all.deb
Copy 5 package archives to /tmp? [Y/n] y
2014-05-18 08:33:44 deb_pkg_tools.cli INFO Done! Copied 5 package archives to /tmp.
.. note:: The implementation of this function can be somewhat slow when
you're dealing with a lot of packages, but this function is meant
to be used interactively so I don't think it will be a big issue.
"""
filename = os.path.abspath(filename)
logger.info("Collecting packages related to %s ..", format_path(filename))
# Internal state.
relationship_sets = []
packages_to_scan = [filename]
related_packages = collections.defaultdict(list)
# Preparations.
available_packages = map(parse_filename, glob.glob(os.path.join(os.path.dirname(filename), '*.deb')))
# Loop to collect the related packages.
while packages_to_scan:
filename = packages_to_scan.pop(0)
logger.info("Scanning %s ..", format_path(filename))
# Find the relationships of the given package.
fields, contents = inspect_package(filename)
if 'Depends' in fields:
relationship_sets.append(fields['Depends'])
# Collect all related packages from the given directory.
for package in available_packages:
logger.debug("Checking %s ..", package.filename)
results = [r.matches(package.name, package.version) for r in relationship_sets]
matches = [r for r in results if r is not None]
if matches and all(matches):
logger.debug("Package archive matched all relationships: %s", package.filename)
if package not in related_packages[package.name]:
related_packages[package.name].append(package)
packages_to_scan.append(package.filename)
# Pick the latest version of the collected packages.
return map(find_latest_version, related_packages.values())

def find_latest_version(packages):
"""
Find the package archive with the highest version number. Uses ``dpkg
--compare-versions ...`` for version comparison.
:param packages: A list of filenames (strings) and/or
:py:class:`PackageFile` objects.
:returns: The :py:class:`PackageFile` with
the highest version number.
"""
return sorted(map(parse_filename, packages))[-1]

def inspect_package(archive):
r"""
Expand All @@ -111,8 +215,8 @@ def inspect_package(archive):
:param archive: The pathname of an existing ``*.deb`` archive.
:returns: A tuple with two dictionaries:
1. A dictionary with control file fields (an instance of
:py:func:`debian.deb822.Deb822`).
1. A dictionary with control file fields (the result of
:py:func:`deb_pkg_tools.control.parse_control_fields()`).
2. A dictionary with the directories and files contained in the
package. The dictionary keys are the absolute pathnames and
the dictionary values are :py:class:`ArchiveEntry` objects
Expand Down Expand Up @@ -146,7 +250,7 @@ def inspect_package(archive):
'/usr/lib/python2.7/uuid.py': ArchiveEntry(permissions='-rw-r--r--', owner='root', group='root', size=21095, modified='2013-09-26 22:28'),
...}
"""
metadata = Deb822(StringIO(execute('dpkg-deb', '-f', archive, logger=logger, capture=True)))
metadata = parse_control_fields(Deb822(StringIO(execute('dpkg-deb', '-f', archive, logger=logger, capture=True))))
contents = {}
for line in execute('dpkg-deb', '-c', archive, logger=logger, capture=True).splitlines():
# Example output of dpkg-deb -c archive.deb:
Expand Down

0 comments on commit 7d6fbe6

Please sign in to comment.