# Microtask-6

> Perform any other analysis you may find interesting, based on the Perceval data you collected.



In [3]:
import json
import datetime
import csv

import requests
import matplotlib.pyplot as plt

from collections import defaultdict
from prettytable import from_csv

from pprint import pprint

## Functions to return the summary of the contribution types

In [4]:
def summary_commit(commit):
    repo = commit['origin']
    data = commit['data']
    summary ={
            'repo': repo,
            'hash': data['commit'],
            'author': data['Author'],
            'author_date': datetime.datetime.strptime(data['AuthorDate'],
                                                      "%a %b %d %H:%M:%S %Y %z"),
            'commit': data['Commit'],
            'created_date': datetime.datetime.strptime(data['CommitDate'],
                                                      "%a %b %d %H:%M:%S %Y %z"),
            'files_no': len(data['files'])
    }
    actions = 0
    for file in data['files']:
        if 'action' in file:
            actions += 1
    summary['files_action'] = actions
    summary['merge'] = 'Merge' in data
    return summary

In [5]:
def summary_issue(issue):
    repo = issue['origin']
    data = issue['data']
    summary ={
            'repo': repo,
            'hash': data['id'],
            'author': data['user']['login'],
            'created_date': datetime.datetime.strptime(data['created_at'],"%Y-%m-%dT%H:%M:%SZ"),
            'updated_date': datetime.datetime.strptime(data['updated_at'],"%Y-%m-%dT%H:%M:%SZ")
                                        if data['updated_at'] else None,
            'comments': data['comments'],
            'state': data['state']
    }
    return summary

In [6]:
def summary_pr(pr):
    repo = pr['origin']
    data = pr['data']
    summary ={
            'repo': repo,
            'hash': data['id'],
            'author': data['user']['login'],
            'created_date': datetime.datetime.strptime(data['created_at'],"%Y-%m-%dT%H:%M:%SZ"),
            'closed_date': datetime.datetime.strptime(data['closed_at'],"%Y-%m-%dT%H:%M:%SZ")
                                        if data['closed_at'] else None,
            'merged_date': datetime.datetime.strptime(data['merged_at'],"%Y-%m-%dT%H:%M:%SZ")
                                        if data['merged_at'] else None,

            'comments': data['comments'],
            'commits': data['commits'],
            'additions': data['additions'],
            'deletions': data['deletions'],

            'changed_files': data['changed_files'],
            'state': data['state']
    }  
    return summary

## Dividing the data source into contribution types

In [13]:
content = defaultdict(list)
with open('../data/elasticsearch-py.json') as datasrc:
    for line in datasrc:
        line = json.loads(line)
        if line['category'] == 'commit':
            continue
        elif line['category'] == 'issue':
            continue
        elif line['category'] == 'pull_request':
            summary = summary_pr(line)
        content[line['category']].append(summary)

In [33]:
# https://github.com/chaoss/wg-gmd/blob/master/metrics/pull-requests-open.md
# https://github.com/chaoss/wg-gmd/blob/master/metrics/pull-requests-closed.md
# https://github.com/chaoss/wg-gmd/blob/master/metrics/pull-requests-merged.md

open_count =0
closed_count = 0
merged_count = 0

for item in content["pull_request"]:
    if(item["state"]=="open"):
        open_count+=1
    elif(item["state"]=="closed"):
        if (item["closed_date"] != None and item["merged_date"] == None):
            closed_count+=1
        elif(item["closed_date"] != None and item["merged_date"] != None):
            merged_count+=1
        
        
print("Open PR: ",open_count)
print("Total Closed: ", closed_count+merged_count)
print("\tClosed but not Merged: ", closed_count)
print("\tClosed and Merged: ", merged_count)

Open PR:  8
Total Closed:  282
	Closed but not Merged:  151
	Closed and Merged:  131


In [None]:
for item in content["pull_request"]:
    