In [1]:
# Old method
from pkg_resources import working_set

unknown_license = "UNKNOWN"
all_packages_old = []


def parse_license(license_str: str):
    # Get license
    if not license_str:
        return None
    license_str = license_str.strip()

    if "copyright (c) " in license_str:
        license_str = license_str.lower().split("copyright (c) ")[0]

    if len(license_str) > 300:
        license_str = license_str[:300]

    license_str = license_str if license_str else unknown_license
    return license_str


for key in working_set.normalized_to_canonical_keys:
    """
    The metadata of a package is stored in "metadata" or "PKG-INFO"
    """
    try:
        package_name = working_set.normalized_to_canonical_keys[key]
        generic_name = package_name.replace("-", "_").lower()
        package = working_set.by_key[package_name]
        metadata_lines = package.get_metadata("METADATA").split("\n")
    except:
        try:
            metadata_lines = package.get_metadata("PKG-INFO").split("\n")
        except:
            all_packages_old.append([package_name, unknown_license, generic_name])
            continue

    license_arg_raw = None
    license_classifier_raw = None
    for line in metadata_lines:
        line: str = line
        if line.startswith("License-Expression: "):
            license_arg_raw = line.replace("License-Expression: ", "")
        if line.startswith("License: "):
            license_arg_raw = line.replace("License: ", "")
        if line.startswith("Classifier: License ::"):
            license_classifier_raw = line.split(" :: ")[-1]
            if license_classifier_raw.lower() in ["osi approved"]:
                license_classifier_raw = None

    general_license = "NOT FOUND"
    license_arg = parse_license(license_arg_raw)
    license_classifier = parse_license(license_classifier_raw)

    # You can get the license from license argument or classifier
    if license_arg is not None and license_arg != unknown_license:
        general_license = license_arg
    elif license_classifier:
        general_license = license_classifier
    else:
        general_license = unknown_license

    all_packages_old.append([package_name, general_license, generic_name])

all_packages_old.sort()
all_packages_old

  from pkg_resources import working_set


[['absl-py', 'Apache-2.0', 'absl_py'],
 ['accelerate', 'Apache', 'accelerate'],
 ['acme', 'Apache-2.0', 'acme'],
 ['adal', 'MIT', 'adal'],
 ['adlfs', 'BSD', 'adlfs'],
 ['aenum', 'BSD License', 'aenum'],
 ['agate', 'MIT', 'agate'],
 ['aioboto3', 'Apache-2.0', 'aioboto3'],
 ['aiobotocore', 'Apache-2.0', 'aiobotocore'],
 ['aiodns', 'MIT', 'aiodns'],
 ['aiofiles', 'Apache-2.0', 'aiofiles'],
 ['aiohappyeyeballs', 'PSF-2.0', 'aiohappyeyeballs'],
 ['aiohttp', 'Apache-2.0 AND MIT', 'aiohttp'],
 ['aiohttp-cors', 'Apache License, Version 2.0', 'aiohttp_cors'],
 ['aiohttp-retry', 'MIT', 'aiohttp_retry'],
 ['aioitertools', 'MIT License', 'aioitertools'],
 ['aioresponses', 'MIT License', 'aioresponses'],
 ['aiosignal', 'Apache 2.0', 'aiosignal'],
 ['aiosqlite', 'MIT License', 'aiosqlite'],
 ['alabaster', 'BSD License', 'alabaster'],
 ['alembic', 'MIT', 'alembic'],
 ['aliyun-python-sdk-core', 'Apache License 2.0', 'aliyun_python_sdk_core'],
 ['altair', 'Copyright (c) 2015-2023, Vega-Altair Developer

In [2]:
import importlib_metadata as im


def get_license(name: str):
    try:
        metas = im.metadata(name)
    except im.PackageNotFoundError:
        return None
    all_metas = str(metas).split("\n")
    for m in all_metas:
        if m.startswith("License-Expression: "):
            return m.replace("License-Expression: ", "")
        if m.startswith("License: "):
            return m.replace("License: ", "")
        if m.startswith("Classifier: License ::"):
            return m.split("::")[-1]

In [3]:
import pkgutil

all_packages_new = []
for i in pkgutil.iter_modules():
    try:
        if i.ispkg:
            name = i.name
            generic_name = name.replace("-", "_").lower()
            all_packages_new.append([name, get_license(name), generic_name])
    except:
        continue

all_packages_new.sort()
all_packages_new

[['Crypto', None, 'crypto'],
 ['Cryptodome', None, 'cryptodome'],
 ['Cython', 'Apache-2.0', 'cython'],
 ['DateTime', 'ZPL 2.1', 'datetime'],
 ['IPython', 'BSD-3-Clause', 'ipython'],
 ['Levenshtein', 'GPL-2.0-or-later', 'levenshtein'],
 ['OpenSSL', None, 'openssl'],
 ['PIL', None, 'pil'],
 ['PyPDF2', ' BSD License', 'pypdf2'],
 ['TCLIService', None, 'tcliservice'],
 ['__phello__', None, '__phello__'],
 ['_argon2_cffi_bindings', None, '_argon2_cffi_bindings'],
 ['_distutils_hack', None, '_distutils_hack'],
 ['_multiprocess', None, '_multiprocess'],
 ['_plotly_utils', None, '_plotly_utils'],
 ['_pyrepl', None, '_pyrepl'],
 ['_pytest', None, '_pytest'],
 ['_soundfile_data', None, '_soundfile_data'],
 ['_yaml', None, '_yaml'],
 ['absl', None, 'absl'],
 ['accelerate', 'Apache', 'accelerate'],
 ['acme', 'Apache-2.0', 'acme'],
 ['adal', 'MIT', 'adal'],
 ['adlfs', 'BSD', 'adlfs'],
 ['aenum', 'BSD License', 'aenum'],
 ['agate', 'MIT', 'agate'],
 ['aioboto3', 'Apache-2.0', 'aioboto3'],
 ['aioboto

In [4]:
import pandas as pd

all_packages = pd.DataFrame(
    columns=[
        "package_name_old",
        "package_name_new",
        "package_name_general",
        "license_new",
        "license_old",
    ]
)

for i in all_packages_old:
    new_package = []
    for j in all_packages_new:
        if j[2] == i[2]:
            new_package = j

    new_row = pd.Series(
        {
            "package_name_old": i[0],
            "package_name_new": new_package[0] if new_package else "",
            "package_name_general": i[2],
            "license_new": new_package[1] if new_package else "",
            "license_old": i[1],
        }
    )
    all_packages = pd.concat([all_packages, new_row.to_frame().T])

all_packages.head()

Unnamed: 0,package_name_old,package_name_new,package_name_general,license_new,license_old
0,absl-py,,absl_py,,Apache-2.0
0,accelerate,accelerate,accelerate,Apache,Apache
0,acme,acme,acme,Apache-2.0,Apache-2.0
0,adal,adal,adal,MIT,MIT
0,adlfs,adlfs,adlfs,BSD,BSD


In [6]:
all_packages.to_csv("test_1.csv")