In [60]:
import os
from fnmatch import fnmatch

root = 'advisory-database-main/advisories/github-reviewed'
pattern = "*.json"

all_urls = []

for path, subdirs, files in os.walk(root):
    for name in files:
        if fnmatch(name, pattern):
            all_urls.append(os.path.join(path, name))

In [49]:
import pandas as pd
import json

advisory_data_list = []

for url in all_urls:
    f = open(url)

    data = json.load(f)
    
    advisory_data = {}
    
    advisory_data['published'] = data['published']
    
    for arr in data['affected']:
        if 'package' in arr:
            if 'ecosystem' in arr['package']:
                advisory_data['ecosystem'] = arr['package']['ecosystem']

        if 'ranges' in arr:
            for ran in arr['ranges']:
                if ran['type'] == 'ECOSYSTEM' and 'events' in ran:
                    for event in ran['events']:
                        if 'fixed' in event:
                            advisory_data['fixed'] = 1
                    if 'fixed' not in advisory_data:
                        advisory_data['fixed'] = 0

    if 'severity' in data['database_specific']:
        advisory_data['severity'] = data['database_specific']['severity']
        
    advisory_data_list.append(advisory_data)
    
    f.close()
    
df = pd.DataFrame(advisory_data_list)

df.to_csv('advisory_data.csv', index=False)

Unnamed: 0,published,ecosystem,fixed,severity
0,2022-03-24T00:04:03Z,Go,1.0,HIGH
1,2022-03-01T18:58:23Z,Maven,1.0,HIGH
2,2022-03-12T00:00:36Z,npm,1.0,MODERATE
3,2022-03-11T00:02:36Z,Packagist,1.0,MODERATE
4,2022-03-14T23:22:27Z,npm,1.0,LOW
...,...,...,...,...
7038,2018-12-19T19:23:52Z,PyPI,1.0,MODERATE
7039,2018-12-21T17:46:03Z,RubyGems,1.0,MODERATE
7040,2018-12-21T17:51:42Z,Maven,1.0,HIGH
7041,2018-12-19T19:25:14Z,PyPI,0.0,CRITICAL


In [59]:
print("Not fixed -----------------")
print(df[df['fixed'] == 0].groupby('ecosystem').size())

print("Fixed -----------------")
print(df[df['fixed'] == 1].groupby('ecosystem').size())

Not fixed -----------------
ecosystem
Go            21
Maven         93
NuGet          9
Packagist     98
PyPI          63
RubyGems      33
crates.io    109
npm          951
dtype: int64
Fixed -----------------
ecosystem
Go            428
Hex            12
Maven        1117
NuGet         161
Packagist     749
PyPI          999
RubyGems      416
crates.io     290
npm          1446
dtype: int64


In [116]:
import functools
import numpy as np
import json

language_dict = {}

for language in df.ecosystem.unique():
    print(language, "--------------------")
    df_npm = df[df.ecosystem == language]
    df_npm.published = pd.to_datetime(df_npm.published)
    totals = df_npm.resample('1M', on='published').size()

    totals_accumulate = []
    def accumulate(x, y):
        total = x + y
        totals_accumulate.append(total)
        return total

    totals_accumulate.append(totals.iloc[0])
    functools.reduce(accumulate, totals)

    totals = totals.reset_index()
    totals[0] = totals_accumulate
    totals.rename(columns={'published': 'x', 0: 'y'}, inplace=True)

    language_dict[language] = totals.to_json(orient='records')
    
with open('language_data.json', 'w') as outfile:
    outfile.write(json.dumps(language_dict))

Go --------------------
Maven --------------------
npm --------------------
Packagist --------------------
PyPI --------------------
RubyGems --------------------
crates.io --------------------
NuGet --------------------
Hex --------------------


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the d

In [129]:
df.groupby(['ecosystem', 'severity']).size()

df[df['severity'] == 'CRITICAL']

Unnamed: 0,published,ecosystem,fixed,severity
12,2022-03-15 00:00:55+00:00,Packagist,1.0,CRITICAL
24,2022-03-02 21:30:54+00:00,Maven,1.0,CRITICAL
26,2022-03-11 00:02:17+00:00,Go,1.0,CRITICAL
32,2022-03-18 17:56:08+00:00,Maven,1.0,CRITICAL
45,2022-03-31 00:00:24+00:00,crates.io,0.0,CRITICAL
...,...,...,...,...
7029,2018-12-26 17:45:19+00:00,PyPI,0.0,CRITICAL
7031,2018-12-21 17:50:20+00:00,Maven,1.0,CRITICAL
7033,2018-12-19 19:24:03+00:00,npm,1.0,CRITICAL
7035,2018-12-20 22:02:02+00:00,Maven,1.0,CRITICAL
