# Microtask-6

> Perform any other analysis you may find interesting, based on the Perceval data you collected.



In [1]:
import json
import datetime
import csv

import requests
import matplotlib.pyplot as plt

from collections import defaultdict
from prettytable import from_csv

## Functions to return the summary of the contribution types

In [3]:
def summary_commit(commit):
    repo = commit['origin']
    data = commit['data']
    summary ={
            'repo': repo,
            'hash': data['commit'],
            'author': data['Author'],
            'author_date': datetime.datetime.strptime(data['AuthorDate'],
                                                      "%a %b %d %H:%M:%S %Y %z"),
            'commit': data['Commit'],
            'created_date': datetime.datetime.strptime(data['CommitDate'],
                                                      "%a %b %d %H:%M:%S %Y %z"),
            'files_no': len(data['files'])
    }
    actions = 0
    for file in data['files']:
        if 'action' in file:
            actions += 1
    summary['files_action'] = actions
    summary['merge'] = 'Merge' in data
    return summary

In [4]:
def summary_issue(issue):
    repo = issue['origin']
    data = issue['data']
    summary ={
            'repo': repo,
            'hash': data['id'],
            'author': data['user']['login'],
            'created_date': datetime.datetime.strptime(data['created_at'],"%Y-%m-%dT%H:%M:%SZ"),
            'updated_date': datetime.datetime.strptime(data['updated_at'],"%Y-%m-%dT%H:%M:%SZ")
                                        if data['updated_at'] else None,
            'comments': data['comments'],
            'state': data['state']
    }
    return summary

In [5]:
def summary_pr(pr):
    repo = pr['origin']
    data = pr['data']
    summary ={
            'repo': repo,
            'hash': data['id'],
            'author': data['user']['login'],
            'created_date': datetime.datetime.strptime(data['created_at'],"%Y-%m-%dT%H:%M:%SZ"),
            'closed_date': datetime.datetime.strptime(data['closed_at'],"%Y-%m-%dT%H:%M:%SZ")
                                        if data['closed_at'] else None,
            'merged_date': datetime.datetime.strptime(data['merged_at'],"%Y-%m-%dT%H:%M:%SZ")
                                        if data['merged_at'] else None,

            'comments': data['comments'],
            'commits': data['commits'],
            'additions': data['additions'],
            'deletions': data['deletions'],

            'changed_files': data['changed_files'],
            'state': data['state']
    }  
    return summary

## Dividing the data source into contribution types

In [6]:
content = defaultdict(list)
with open('../elasticsearch-py.json') as datasrc:
    for line in datasrc:
        line = json.loads(line)
        if line['category'] == 'commit':
            summary = summary_commit(line)
        elif line['category'] == 'issue':
            summary = summary_issue(line)
        elif line['category'] == 'pull_request':
            summary = summary_pr(line)
        content[line['category']].append(summary)