In [1]:
import os
import json

import pandas as pd
from fastapi import FastAPI
from py2neo import Graph

In [2]:
url = os.environ.get("NEO4J_URL", "bolt://localhost:7687")
user = os.environ.get("NEO4J_USER", "neo4j")
pswd = os.environ.get("NEO4J_PSWD", "password")
graph = Graph(url, auth=(user, pswd))

csv_file_name = "results.csv"

In [7]:
response = graph.run(
    f"""
MATCH (n:package)
WHERE n.license CONTAINS "GNU general public license" OR n.license CONTAINS "GNU Affero general public license"

MATCH (n)<-[:DEPENDS_ON*1..]-(m)
WHERE NOT m.license CONTAINS "GNU general public license" AND NOT m.license CONTAINS "GNU Affero general public license"
RETURN m.name as name, m.license AS license, collect( DISTINCT n.name) as GPL_dependencies ORDER BY license
    """
).data()
all_miss_matches = pd.DataFrame(response)
all_miss_matches.to_csv(csv_file_name, index=False)

In [8]:
all_miss_matches = pd.read_csv(csv_file_name)
all_miss_matches

Unnamed: 0,name,license,GPL_dependencies
0,apache-airflow-providers-mysql,Apache license,"['mysql-connector-python', 'mysqlclient']"
1,jiwer,Apache license,['levenshtein']
2,optbinning,Apache license,['ecos']
3,ropwr,Apache license,['ecos']
4,rpaframework,Apache license,"['pyqt5', 'pyqtwebengine']"
5,rpaframework-dialogs,Apache license,"['pyqt5', 'pyqtwebengine']"
6,causalml,Apache license,['pygam']
7,cmreshandler,Apache license,['enum']
8,dvc,Apache license 2.0,['grandalf']
9,dvc-http,Apache license 2.0,['grandalf']


In [9]:
all_miss_matches.license.value_counts().to_dict()

{'MIT license': 28,
 'Apache license': 8,
 'BSD license': 7,
 'Apache license 2.0': 4,
 'UNKNOWN': 4,
 'GNU lesser general public license v3 (lgplv3)': 1,
 'Public Domain': 1,
 'other/proprietary license': 1}

: 