# Microtask-1

> Produce a notebook showing (and producing) a list with the activity per quarter: number of new committers, submitters of issues, and submitters of pull/merge requests, number of items (commits, issues, pull/merge requests), number of repositories with new items (all of this per quarter) as a table and as a CSV file using plain python3 (no pandas).



In [1]:
import json
import datetime
import csv

import requests
import matplotlib.pyplot as plt

from collections import defaultdict
from prettytable import from_csv

## Defining Quaters

The calendar year can be divided into four quarters, often abbreviated as Q1, Q2, Q3, and Q4.
- First quarter, Q1: 1 January – 31 March (90 days or 91 days in leap years)
- Second quarter, Q2: 1 April – 30 June (91 days)
- Third quarter, Q3: 1 July – 30 September (92 days)
- Fourth quarter, Q4: 1 October – 31 December (92 days)

Reference: https://en.wikipedia.org/wiki/Calendar_year

Each Quater is represented as **Qi yyyy** where *i* is the quater number and _yyyy_ is the year.

In [2]:
QUARTERS = (
    ({'month':1,'day':1},  {'month':3,'day':31}),
    ({'month':4,'day':1},  {'month':6,'day':30}),
    ({'month':7,'day':1},  {'month':9,'day':30}),
    ({'month':10,'day':1}, {'month':12,'day':31}),
)
ctypes = ('commit','pull_request','issue')

## Functions to return the summary of the contribution types

In [3]:
def summary_commit(commit):
    repo = commit['origin']
    data = commit['data']
    summary ={
            'repo': repo,
            'hash': data['commit'],
            'author': data['Author'],
            'author_date': datetime.datetime.strptime(data['AuthorDate'],
                                                      "%a %b %d %H:%M:%S %Y %z"),
            'commit': data['Commit'],
            'created_date': datetime.datetime.strptime(data['CommitDate'],
                                                      "%a %b %d %H:%M:%S %Y %z"),
            'files_no': len(data['files'])
    }
    actions = 0
    for file in data['files']:
        if 'action' in file:
            actions += 1
    summary['files_action'] = actions
    summary['merge'] = 'Merge' in data
    return summary

In [4]:
def summary_issue(issue):
    repo = issue['origin']
    data = issue['data']
    summary ={
            'repo': repo,
            'hash': data['id'],
            'author': data['user']['login'],
            'created_date': datetime.datetime.strptime(data['created_at'],"%Y-%m-%dT%H:%M:%SZ"),
            'updated_date': datetime.datetime.strptime(data['updated_at'],"%Y-%m-%dT%H:%M:%SZ")
                                        if data['updated_at'] else None,
            'comments': data['comments'],
            'state': data['state']
    }
    return summary

In [5]:
def summary_pr(pr):
    repo = pr['origin']
    data = pr['data']
    summary ={
            'repo': repo,
            'hash': data['id'],
            'author': data['user']['login'],
            'created_date': datetime.datetime.strptime(data['created_at'],"%Y-%m-%dT%H:%M:%SZ"),
            'closed_date': datetime.datetime.strptime(data['closed_at'],"%Y-%m-%dT%H:%M:%SZ")
                                        if data['closed_at'] else None,
            'merged_date': datetime.datetime.strptime(data['merged_at'],"%Y-%m-%dT%H:%M:%SZ")
                                        if data['merged_at'] else None,

            'comments': data['comments'],
            'commits': data['commits'],
            'additions': data['additions'],
            'deletions': data['deletions'],

            'changed_files': data['changed_files'],
            'state': data['state']
    }  
    return summary

## Dividing the data source into contribution types

In [6]:
content = defaultdict(list)
with open('../elasticsearch-py.json') as datasrc:
    for line in datasrc:
        line = json.loads(line)
        if line['category'] == 'commit':
            summary = summary_commit(line)
        elif line['category'] == 'issue':
            summary = summary_issue(line)
        elif line['category'] == 'pull_request':
            summary = summary_pr(line)
        content[line['category']].append(summary)

## Splitting the data quaterly

In [7]:
repodata =json.loads(requests.get("https://api.github.com/repos/elastic/elasticsearch-py").text)

created =datetime.datetime.strptime(repodata['created_at'][:10], "%Y-%m-%d").year
present =datetime.datetime.strptime(repodata['updated_at'][:10], "%Y-%m-%d").year

In [8]:
def quarterwise(first_year,last_year):
    for year in range(first_year, last_year+1):
        for quarter,(start,end) in enumerate(QUARTERS):
            start = datetime.datetime(year,**start)
            end = datetime.datetime(year,**end)
            yield year,quarter,start,end

In [9]:
quarters = []
activities = defaultdict(list)

newcontributors = defaultdict(list)
oldcontributors = defaultdict(set)

for year,quarter,start,end in quarterwise(created,present):
    quarters.append(r"Q%d %d"%(quarter+1,year))  
    for ctype in ctypes:
        activity =  newcontributor =  0 
        for item in content[ctype]:
            if start<=item['created_date'].replace(tzinfo=None)<=end:
                activity+=1
                if item['author'] not in oldcontributors[ctype]:
                    newcontributor+=1
                    oldcontributors[ctype].add(item['author'])
        activities[ctype].append(activity)
        newcontributors[ctype].append(newcontributor)

## Activity

In [10]:
print("Quaterwise Total Activity\n")
for item in dict(activities):
    print (item, dict(activities)[item])

print("Quaterwise New Activity\n")
for item in dict(newcontributors):
    print (item, dict(activities)[item])

Quaterwise Total Activity

commit [0, 78, 154, 94, 126, 33, 30, 38, 45, 37, 27, 80, 28, 29, 21, 34, 27, 28, 31, 57, 44, 26, 10, 37, 4, 0, 0, 0]
pull_request [0, 0, 3, 14, 16, 18, 8, 14, 13, 9, 6, 14, 11, 4, 7, 17, 15, 14, 13, 20, 24, 17, 7, 13, 8, 0, 0, 0]
issue [0, 0, 6, 24, 39, 29, 33, 39, 41, 33, 32, 50, 63, 26, 41, 52, 51, 47, 40, 39, 65, 54, 35, 39, 19, 0, 0, 0]
Quaterwise New Activity

commit [0, 78, 154, 94, 126, 33, 30, 38, 45, 37, 27, 80, 28, 29, 21, 34, 27, 28, 31, 57, 44, 26, 10, 37, 4, 0, 0, 0]
pull_request [0, 0, 3, 14, 16, 18, 8, 14, 13, 9, 6, 14, 11, 4, 7, 17, 15, 14, 13, 20, 24, 17, 7, 13, 8, 0, 0, 0]
issue [0, 0, 6, 24, 39, 29, 33, 39, 41, 33, 32, 50, 63, 26, 41, 52, 51, 47, 40, 39, 65, 54, 35, 39, 19, 0, 0, 0]


In [14]:
header = ['Quarter','Commits','PullRequests','Issues',
           'Committers','IssueSubmitters','PRSubmitters' ]
with open('elasticsearch-py.csv', 'w') as file:
    writer = csv.writer(file)
    writer.writerow(header)
    rows = zip(quarters,activities['commit'],activities['pull_request'],activities['issue'],
              newcontributors['commit'],newcontributors['pull_request'],newcontributors['issue'],
              )
    writer.writerows(rows)

In [15]:
with open("elasticsearch-py.csv", "r") as csvfile: 
    csvtable = from_csv(csvfile)
    
print(csvtable)

+---------+---------+--------------+--------+------------+-----------------+--------------+
| Quarter | Commits | PullRequests | Issues | Committers | IssueSubmitters | PRSubmitters |
+---------+---------+--------------+--------+------------+-----------------+--------------+
| Q1 2013 |    0    |      0       |   0    |     0      |        0        |      0       |
| Q2 2013 |    78   |      0       |   0    |     1      |        0        |      0       |
| Q3 2013 |   154   |      3       |   6    |     2      |        2        |      5       |
| Q4 2013 |    94   |      14      |   24   |     9      |        9        |      16      |
| Q1 2014 |   126   |      16      |   39   |     6      |        16       |      32      |
| Q2 2014 |    33   |      18      |   29   |     11     |        15       |      25      |
| Q3 2014 |    30   |      8       |   33   |     3      |        8        |      25      |
| Q4 2014 |    38   |      14      |   39   |     3      |        11       |    