Code that downloads the information about charm uploads.

In [None]:
import json
import sys
import requests
from datetime import datetime
from datetime import timedelta

CHARMSTORE_URL = 'https://api.jujucharms.com/charmstore/v5'
CHANGES_URL = '/changes/published?'

def download_stats(start_date_str, end_date_str, interval, arg_output):
    start_date = datetime.strptime(start_date_str, '%Y-%m-%d')
    end_date = datetime.strptime(end_date_str, '%Y-%m-%d')

    data = []

    while start_date <= end_date:
        eow = start_date + timedelta(days=interval-1)
        param1 = start_date.strftime('%Y-%m-%d')
        param2 = eow.strftime('%Y-%m-%d')
        print(param1, param2)

        cs_data = {}
        cs_data["start"] = param1
        cs_data["stop"] = param2

        url = CHARMSTORE_URL+CHANGES_URL+"start="+param1+"&stop="+param2
        #print url
        r = requests.get(url)	
        uploads = {}
        uploads["date"]=param1
        uploads["charms"]=r.json()
        data.append(uploads)

        start_date = eow + timedelta(days=1)

    f = open(arg_output, "w")
    f.write(json.dumps(data))


Let's get data for two days, October 18th and 19th 2018. Write it out to test-changes.json.

In [None]:
download_stats("2018-10-18", "2018-10-19", 1, "test-changes.json")

In [None]:
with open('test-changes.json') as f:
    data = json.load(f)

In [None]:
print(data)

In [None]:
download_stats("2018-01-01", "2018-10-19", 1, "uploads.json")

In [None]:
with open("uploads.json") as f:
    data = json.load(f)

Now we have JSON data for uploads. For each item, map the date to the number of uploads for that date, getting two arrays: array of dates, and array of number of uploads.

In [None]:
dates = []
uploads = []
num_uploads = []
for d in data:
    dates.append(d['date'])
    uploads.append(d['charms'])
    num_uploads.append(len(d['charms']))
    
#print(dates)
print(num_uploads)

Let's export the data, so that we can load it later with pandas.

In [None]:
import pandas as pd
import numpy as np

a = np.array(dates)
b = np.array(num_uploads)
df = pd.DataFrame({"date" : a, "num_uploads" : b})
df.to_csv("num-uploads.csv", index=False)

Let's load the data and print out some info about it.

In [None]:
uploads_data = pd.read_csv("num-uploads.csv")
uploads_data.head()

In [None]:
uploads_data.info

In [None]:
uploads_data['num_uploads'].plot(kind="bar", figsize=(50,25))

In [None]:
import matplotlib.pyplot as plt
uploads_data.hist(bins=50, figsize=(20,15))
plt.show()

In [None]:
uploads_data['num_uploads'].agg([pd.np.min, pd.np.max, pd.np.mean, pd.np.sum])

In [None]:
def split_name(charm):
    #print(charm)
    n = charm.replace('cs:~','').split('/')
    nn = n[-1].split('-')
    del(nn[-1])
    charm_name = "-".join(nn)
    #print(n[0], charm_name)
    return n[0], charm_name

charmers = []
charms = []

#print(uploads)
for day in uploads:
    for upload in day:
        charm = upload['Id']
        charmer, charm_name = split_name(charm)
        charmers.append(charmer)
        charms.append(charm_name)
        
c = np.array(charmers)
d = np.array(charms)
df2 = pd.DataFrame({"charmer" : c, "application" : d})
df2.to_csv("charm-uploads.csv", index=False)

In [None]:
print(df2["charmer"].unique())

Number of charmers uploading the charms in the time period.

In [None]:
len(df2["charmer"].unique())

In [None]:
df2.groupby(["charmer"]).count()

In [None]:
charmer_count = df2.groupby(["charmer"]).count()
charmer_count.to_csv("charmer-count.cvs")

In [None]:
df2.groupby(["charmer"]).count().plot(kind="bar", figsize=(30,15))

In [None]:
df2.groupby(["charmer"]).count().hist(bins=200, figsize=(20,15))
plt.show()

In [None]:
df2.groupby(["charmer"]).count().agg([pd.np.min, pd.np.max, pd.np.mean, pd.np.sum])

In [None]:
print(df2["application"].unique())

Number of applications loaded in the time period.

In [None]:
len(df2["application"].unique())

In [None]:
df2.groupby(["application"]).count()

In [None]:
df2.groupby(["application"]).count().plot(kind="bar", figsize=(50,15))

In [None]:
df2.groupby(["application"]).count().hist(bins=200, figsize=(20,15))
plt.show()

In [None]:
df2.groupby(["application"]).count().agg([pd.np.min, pd.np.max, pd.np.mean, pd.np.sum])

In [None]:
app_count = df2.groupby(["application"]).count()
app_count.to_csv("app-count.csv")

In [None]:
df2.groupby(["charmer", "application"]).count()

In [None]:
grp = df2.groupby(["charmer", "application"]).count()
grp.to_csv("charmer-app-count.csv")

grp2 = df2.groupby(["application", "charmer"]).count()
grp2.to_csv("app-charmer-count.csv")

In [None]:
grp2.filter(like="docker", axis=0)

In [None]:
grp.filter(like="yellow", axis=0)

In [None]:
df2.groupby(["charmer"]).count().filter(like="yellow", axis=0)