# Use pyquery-ql.py with files to find most recent issues
Send a graphql query to GitHub
and work with files for reports.

Supports Python 3.6+

In [22]:
import csv
import json
import os
import pprint

import requests

In [23]:
# get api token and set authorization
api_token = os.environ['GITHUB_API_TOKEN']
headers = {'Authorization': f'token {api_token}'}

In [24]:
# set url to a graphql endpoint
url = 'https://api.github.com/graphql'

In [80]:
# add a json query
query = """
{
  organization(login: "jupyterhub") {
    repositories(first: 30) {
      nodes {
        name
        issueConnection(first: 4) {
          nodes {
            number
            title
            createdAt
          }
        }
      }
    }
  }
}
"""

In [81]:
# submit the request
r = requests.post(url=url, json={'query': query}, headers=headers)

In [82]:
result = r.json()
result

{'data': None,
 'errors': [{'locations': [{'column': 9, 'line': 7}],
   'message': "Field 'issueConnection' doesn't exist on type 'Repository'"}]}

In [33]:
import pandas as pd

In [52]:
repos = (pd.DataFrame(result['data']['organization']['repositories']['nodes'], columns=['name','issues'])).copy()

In [53]:
repos

Unnamed: 0,name,issues
0,jupyterhub,"{'nodes': [{'number': 2, 'title': 'use sqlalch..."
1,configurable-http-proxy,"{'nodes': [{'number': 1, 'title': 'Query by la..."
2,oauthenticator,"{'nodes': [{'number': 8, 'title': 'GithubOAuth..."
3,dockerspawner,"{'nodes': [{'number': 2, 'title': 'Docker layo..."
4,sudospawner,"{'nodes': [{'number': 1, 'title': 'SudoSpawner..."
5,batchspawner,"{'nodes': [{'number': 3, 'title': 'Implement t..."
6,kubespawner,"{'nodes': [{'number': 1, 'title': 'Add a defau..."
7,ldapauthenticator,"{'nodes': [{'number': 1, 'title': 'Support res..."
8,jupyterhub-deploy-docker,"{'nodes': [{'number': 6, 'title': 'Problem wit..."
9,jupyterhub-deploy-teaching,"{'nodes': [{'number': 5, 'title': 'Update cook..."


In [57]:
issues = pd.DataFrame(repos, columns=['number', 'title', 'createdAt'])

In [58]:
issues

Unnamed: 0,number,title,createdAt
0,,,
1,,,
2,,,
3,,,
4,,,
5,,,
6,,,
7,,,
8,,,
9,,,


### Make request and create json and csv files

In [15]:
# create a json file from response
with open('data.json', 'w') as f:
    json.dump(r.json(), f)

In [16]:
r.json()

{'data': {'organization': {'repositories': {'nodes': [{'issues': {'nodes': [{'createdAt': '2014-08-18T18:00:47Z',
         'number': 2,
         'title': 'use sqlalchemy ORM for state'},
        {'createdAt': '2014-08-18T18:01:57Z',
         'number': 3,
         'title': 'make get_current_user async'},
        {'createdAt': '2014-08-18T18:02:32Z',
         'number': 4,
         'title': 'make process launching configurable'},
        {'createdAt': '2014-08-18T18:05:21Z',
         'number': 5,
         'title': 'make authentication configurable'}]},
      'name': 'jupyterhub'},
     {'issues': {'nodes': [{'createdAt': '2014-09-17T13:43:28Z',
         'number': 1,
         'title': 'Query by last activity API'},
        {'createdAt': '2014-10-23T23:13:22Z',
         'number': 18,
         'title': 'Sockets ballooning proportionally to the number of websockets opened by clients.'},
        {'createdAt': '2014-11-12T02:32:31Z',
         'number': 19,
         'title': '"prepend path" opti

In [99]:
# unpack the layers of json
nodes = r.json()['data']['organization']['repositories']['nodes']

unpacked = []
for node in nodes:
    unpacked.append(node)

In [100]:
unpacked

[{'issues': {'nodes': [{'createdAt': '2014-08-18T18:00:47Z',
     'number': 2,
     'title': 'use sqlalchemy ORM for state'},
    {'createdAt': '2014-08-18T18:01:57Z',
     'number': 3,
     'title': 'make get_current_user async'},
    {'createdAt': '2014-08-18T18:02:32Z',
     'number': 4,
     'title': 'make process launching configurable'},
    {'createdAt': '2014-08-18T18:05:21Z',
     'number': 5,
     'title': 'make authentication configurable'}]},
  'name': 'jupyterhub'},
 {'issues': {'nodes': [{'createdAt': '2014-09-17T13:43:28Z',
     'number': 1,
     'title': 'Query by last activity API'},
    {'createdAt': '2014-10-23T23:13:22Z',
     'number': 18,
     'title': 'Sockets ballooning proportionally to the number of websockets opened by clients.'},
    {'createdAt': '2014-11-12T02:32:31Z',
     'number': 19,
     'title': '"prepend path" option doesn\'t work properly'},
    {'createdAt': '2015-04-21T16:42:29Z',
     'number': 23,
     'title': 'host routing'}]},
  'name': 'con

In [101]:
headers = ['name', 'issues']

rows = []
for obj in unpacked:
    issue_list = []
    for item in obj['issues']['nodes']:
        issue_list.append({item['number'], item['title'], item['createdAt']})
    new_dict = {'name':obj['name'], 'issues':issue_list}
    rows.append(new_dict)
rows

[{'issues': [{2, '2014-08-18T18:00:47Z', 'use sqlalchemy ORM for state'},
   {'2014-08-18T18:01:57Z', 3, 'make get_current_user async'},
   {'2014-08-18T18:02:32Z', 4, 'make process launching configurable'},
   {'2014-08-18T18:05:21Z', 5, 'make authentication configurable'}],
  'name': 'jupyterhub'},
 {'issues': [{1, '2014-09-17T13:43:28Z', 'Query by last activity API'},
   {18,
    '2014-10-23T23:13:22Z',
    'Sockets ballooning proportionally to the number of websockets opened by clients.'},
   {'"prepend path" option doesn\'t work properly',
    19,
    '2014-11-12T02:32:31Z'},
   {'2015-04-21T16:42:29Z', 23, 'host routing'}],
  'name': 'configurable-http-proxy'},
 {'issues': [{'2015-07-25T02:07:48Z', 8, 'GithubOAuthenticator gets 403'},
   {15, '2015-08-22T16:07:32Z', 'Ubuntu users should use pip3 and python3'},
   {18, '2015-10-08T20:29:25Z', 'delete CILogon staged certificate'},
   {'2015-11-29T21:33:52Z', 21, 'Add a MediawikiOAuthenticator'}],
  'name': 'oauthenticator'},
 {'iss

In [102]:
with open('mydata.csv', 'w') as f:
    f_csv = csv.DictWriter(f, headers)
    f_csv.writeheader()
    f_csv.writerows(rows)

Check file

In [103]:
%%bash

less mydata.csv

name,issues
jupyterhub,"[{2, 'use sqlalchemy ORM for state', '2014-08-18T18:00:47Z'}, {'make get_current_user async', 3, '2014-08-18T18:01:57Z'}, {'2014-08-18T18:02:32Z', 'make process launching configurable', 4}, {'make authentication configurable', 5, '2014-08-18T18:05:21Z'}]"
configurable-http-proxy,"[{'Query by last activity API', 1, '2014-09-17T13:43:28Z'}, {'Sockets ballooning proportionally to the number of websockets opened by clients.', 18, '2014-10-23T23:13:22Z'}, {19, '2014-11-12T02:32:31Z', '""prepend path"" option doesn\'t work properly'}, {'2015-04-21T16:42:29Z', 'host routing', 23}]"
oauthenticator,"[{8, '2015-07-25T02:07:48Z', 'GithubOAuthenticator gets 403'}, {'2015-08-22T16:07:32Z', 'Ubuntu users should use pip3 and python3', 15}, {18, 'delete CILogon staged certificate', '2015-10-08T20:29:25Z'}, {'Add a MediawikiOAuthenticator', '2015-11-29T21:33:52Z', 21}]"
dockerspawner,"[{2, '2014-09-27T21:08:00Z', 'Docker layout'}, {'jupyterhub_config.py', '2014-11-11T22:18:2

## Bring into pandas

In [105]:
df = pd.read_csv('mydata.csv')

In [106]:
df.columns

Index(['name', 'issues'], dtype='object')

In [107]:
# df.head()

Generate basic report of total open issues

In [108]:
# df.dtypes

In [109]:
# df.index

In [110]:
# df.values

### Reports

In [111]:
# By repo name
sorted_df = df.sort_values(by=['name'])
sorted_df

Unnamed: 0,name,issues
5,batchspawner,"[{3, 'Implement testing', '2016-02-24T22:45:18..."
24,binder,"[{'Discuss nomenclature', 3, '2017-08-18T17:57..."
22,binderhub,"[{'Add a README', 1, '2017-05-06T21:19:55Z'}, ..."
1,configurable-http-proxy,"[{'Query by last activity API', 1, '2014-09-17..."
3,dockerspawner,"[{2, '2014-09-27T21:08:00Z', 'Docker layout'},..."
21,helm-chart,"[{1, '2017-04-18T00:04:26Z', 'Allow customizin..."
17,hubshare,"[{'2017-01-27T13:39:47Z', 'implement storage A..."
0,jupyterhub,"[{2, 'use sqlalchemy ORM for state', '2014-08-..."
8,jupyterhub-deploy-docker,[{'Problem with running/authenticating on an E...
11,jupyterhub-deploy-hpc,"[{'Add a jetstream instance for jupyterhub', 2..."


In [138]:
for x in sorted_df:
    name
    issues

    
   

NameError: name 'issues' is not defined

In [113]:
# by open pr count
df.sort_values(by=['prs'], ascending=False)

KeyError: 'prs'

In [None]:
# output data to a csv
# df.to_csv('issue_report.csv')