In [None]:
# Old method
from pkg_resources import working_set

unknown_license = "UNKNOWN"
all_packages_old = []


def parse_license(license_str: str):
    # Get license
    if not license_str:
        return None
    license_str = license_str.strip()

    if "copyright (c) " in license_str:
        license_str = license_str.lower().split("copyright (c) ")[0]

    if len(license_str) > 300:
        license_str = license_str[:300]

    license_str = license_str if license_str else unknown_license
    return license_str


for key in working_set.normalized_to_canonical_keys:
    """
    The metadata of a package is stored in "metadata" or "PKG-INFO"
    """
    try:
        package_name = working_set.normalized_to_canonical_keys[key]
        generic_name = package_name.replace("-", "_").lower()
        package = working_set.by_key[package_name]
        metadata_lines = package.get_metadata("METADATA").split("\n")
    except:
        try:
            metadata_lines = package.get_metadata("PKG-INFO").split("\n")
        except:
            all_packages_old.append([package_name, unknown_license, generic_name])
            continue

    license_arg_raw = None
    license_classifier_raw = None
    for line in metadata_lines:
        line: str = line
        if line.startswith("License-Expression: "):
            license_arg_raw = line.replace("License-Expression: ", "")
        if line.startswith("License: "):
            license_arg_raw = line.replace("License: ", "")
        if line.startswith("Classifier: License ::"):
            license_classifier_raw = line.split(" :: ")[-1]
            if license_classifier_raw.lower() in ["osi approved"]:
                license_classifier_raw = None

    general_license = "NOT FOUND"
    license_arg = parse_license(license_arg_raw)
    license_classifier = parse_license(license_classifier_raw)

    # You can get the license from license argument or classifier
    if license_arg is not None and license_arg != unknown_license:
        general_license = license_arg
    elif license_classifier:
        general_license = license_classifier
    else:
        general_license = unknown_license

    all_packages_old.append([package_name, general_license, generic_name])

all_packages_old.sort()
all_packages_old

[['asttokens', 'Apache 2.0', 'asttokens'],
 ['autocommand', 'LGPLv3', 'autocommand'],
 ['backports.tarfile', 'MIT License', 'backports.tarfile'],
 ['comm', 'BSD 3-Clause License', 'comm'],
 ['debugpy', 'MIT', 'debugpy'],
 ['decorator', 'BSD-2-Clause', 'decorator'],
 ['exceptiongroup', 'MIT License', 'exceptiongroup'],
 ['executing', 'MIT', 'executing'],
 ['importlib-metadata', 'Apache Software License', 'importlib_metadata'],
 ['inflect', 'MIT License', 'inflect'],
 ['iniconfig', 'MIT', 'iniconfig'],
 ['ipykernel', 'BSD 3-Clause License', 'ipykernel'],
 ['ipython', 'BSD-3-Clause', 'ipython'],
 ['ipython-pygments-lexers', 'BSD License', 'ipython_pygments_lexers'],
 ['jaraco.collections', 'MIT License', 'jaraco.collections'],
 ['jaraco.context', 'MIT License', 'jaraco.context'],
 ['jaraco.functools', 'MIT License', 'jaraco.functools'],
 ['jaraco.text', 'MIT License', 'jaraco.text'],
 ['jedi', 'MIT', 'jedi'],
 ['jupyter-client', 'BSD 3-Clause License', 'jupyter_client'],
 ['jupyter-core',

In [47]:
import importlib_metadata as im


def get_license(name: str):
    try:
        metas = im.metadata(name)
    except im.PackageNotFoundError:
        return None
    all_metas = str(metas).split("\n")
    for m in all_metas:
        if m.startswith("License-Expression: "):
            return m.replace("License-Expression: ", "")
        if m.startswith("License: "):
            return m.replace("License: ", "")
        if m.startswith("Classifier: License ::"):
            return m.split("::")[-1]

In [48]:
import pkgutil

all_packages_new = []
for i in pkgutil.iter_modules():
    try:
        if i.ispkg:
            name = i.name
            generic_name = name.replace("-", "_").lower()
            all_packages_new.append([name, get_license(name), generic_name])
    except:
        continue

all_packages_new.sort()
all_packages_new

[['IPython', 'BSD-3-Clause', 'ipython'],
 ['__phello__', None, '__phello__'],
 ['_distutils_hack', None, '_distutils_hack'],
 ['_pyrepl', None, '_pyrepl'],
 ['_pytest', None, '_pytest'],
 ['asttokens', 'Apache 2.0', 'asttokens'],
 ['asyncio', None, 'asyncio'],
 ['autocommand', 'LGPLv3', 'autocommand'],
 ['backports', None, 'backports'],
 ['collections', None, 'collections'],
 ['comm', 'BSD 3-Clause License', 'comm'],
 ['concurrent', None, 'concurrent'],
 ['ctypes', None, 'ctypes'],
 ['curses', None, 'curses'],
 ['dateutil', None, 'dateutil'],
 ['dbm', None, 'dbm'],
 ['debugpy', 'MIT', 'debugpy'],
 ['email', None, 'email'],
 ['encodings', None, 'encodings'],
 ['ensurepip', None, 'ensurepip'],
 ['exceptiongroup', ' MIT License', 'exceptiongroup'],
 ['executing', 'MIT', 'executing'],
 ['html', None, 'html'],
 ['http', None, 'http'],
 ['idlelib', None, 'idlelib'],
 ['importlib', None, 'importlib'],
 ['importlib_metadata', ' Apache Software License', 'importlib_metadata'],
 ['inflect', ' MI

In [None]:
import pandas as pd

all_packages = pd.DataFrame(
    columns=[
        "package_name_old",
        "package_name_new",
        "package_name_general",
        "license_new",
        "license_old",
    ]
)

for i in all_packages_old:
    new_package = []
    for j in all_packages_new:
        if j[2] == i[2]:
            new_package = j

    new_row = pd.Series(
        {
            "package_name_old": i[0],
            "package_name_new": new_package[0] if new_package else "",
            "package_name_general": i[2],
            "license_new": new_package[1] if new_package else "",
            "license_old": i[1],
        }
    )
    all_packages = pd.concat([all_packages, new_row.to_frame().T])

all_packages.head()

Unnamed: 0,package_name_old,package_name_new,package_name_general,license_new,license_old
0,asttokens,asttokens,asttokens,Apache 2.0,Apache 2.0
0,autocommand,autocommand,autocommand,LGPLv3,LGPLv3
0,backports.tarfile,,backports.tarfile,,MIT License
0,comm,comm,comm,BSD 3-Clause License,BSD 3-Clause License
0,debugpy,debugpy,debugpy,MIT,MIT


In [62]:
all_packages

Unnamed: 0,package_name_old,package_name_new,package_name_general,license_new,license_old
0,asttokens,asttokens,asttokens,Apache 2.0,Apache 2.0
0,autocommand,autocommand,autocommand,LGPLv3,LGPLv3
0,backports.tarfile,,backports.tarfile,,MIT License
0,comm,comm,comm,BSD 3-Clause License,BSD 3-Clause License
0,debugpy,debugpy,debugpy,MIT,MIT
0,decorator,,decorator,,BSD-2-Clause
0,exceptiongroup,exceptiongroup,exceptiongroup,MIT License,MIT License
0,executing,executing,executing,MIT,MIT
0,importlib-metadata,importlib_metadata,importlib_metadata,Apache Software License,Apache Software License
0,inflect,inflect,inflect,MIT License,MIT License


In [65]:
import requests
import subprocess

top_packages_url = (
    r"https://hugovk.github.io/top-pypi-packages/top-pypi-packages.min.json"
)

top_packages = requests.get(top_packages_url).json()

all_packages = [i["project"] for i in top_packages["rows"]]

for package in all_packages:
    subprocess.run(f"python3 -m pip install {package}")

FileNotFoundError: [Errno 2] No such file or directory: 'python3 -m pip install boto3'