In [2]:
import sys
import os

import astetik
import pandas as pd

# Add the parent directory to the system path
parent_path = os.path.abspath(os.path.join(os.getcwd(), '..'))
sys.path.insert(0, parent_path)

import productivity_analytics

# Load the data from file
pr_df = pd.read_csv(parent_path + '/data/pr_data.csv')
review_df = pd.read_csv(parent_path + '/data/review_data.csv')

# Add a new column for type of data
pr_df['type'] = 'pr'
review_df['type'] = 'review'

## Merged PR per Tag

Comparing the number of Conventional Commits tags in the PR title, gives an indication of how much resources go to different kinds of tasks, and can be summarized as "Sustaining Work vs. Transformative Work".

**NOTE:** Data is here limited to starting from 1st of August, 2024. This is the time since we started to actively use the tags from Convetional Commits in our PR titles.

In [2]:
from productivity_analytics import merged_per_tag

merged_per_tag_data = merged_per_tag(pr_df)

In [3]:
from productivity_analytics import pr_per_tag

pr_per_tag(merged_per_tag_data)

Unnamed: 0_level_0,count,relative_share
tag,Unnamed: 1_level_1,Unnamed: 2_level_1
feat,49,34.75
fix,40,28.37
refactor,16,11.35
build,12,8.51
ci,8,5.67
test,7,4.96
docs,5,3.55
style,2,1.42
perf,1,0.71
chore,1,0.71


In [4]:
from productivity_analytics import sustaining_vs_transformative

sustaining_vs_transformative(merged_per_tag_data)

Unnamed: 0_level_0,count,relative_share
type_of_work,Unnamed: 1_level_1,Unnamed: 2_level_1
sustaining,75,53.19
transformative,66,46.81


## Merged vs. Not Merged

Comparing the number of merged vs. non-merged PRs per contributor, is a complimentary indicator for productivity issues at a contributor level, and can be summarized as `merge_rate` with higher values (max 100) indicating how degree of performed work ending up in production.

In [5]:
from productivity_analytics import merged_vs_not

merged_vs_not(pr_df)

Unnamed: 0_level_0,merged_count,not_merged_count,total_prs,merge_rate
user_login,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
wangyaopw,113,4,117,96.58
SanjayVas,452,28,480,94.17
robinsons,15,1,16,93.75
yunyeng,43,3,46,93.48
oliver-amzn,11,1,12,91.67
efoxepstein,51,5,56,91.07
duliomatos1,8,1,9,88.89
tristanvuong2021,210,29,239,87.87
riemanli,69,10,79,87.34
jojijac0b,19,3,22,86.36


## Wasted Reviews

Looking at the number of reviews an author receives for his/hers PRs that never get merged and contrasting it with the total number of PRs by the author is another way to identify loss of productivity.

In [6]:
from productivity_analytics import wasted_reviews

wasted_reviews(pr_df, review_df)

Unnamed: 0,user_login,review_comment_count,pr_count,review_waste_ratio
10,jojijac0b,1,20,0.05
2,SanjayVas,25,457,0.05
25,wangyaopw,10,115,0.09
8,iverson52000,3,29,0.1
6,duliomatos1,1,9,0.11
18,riemanli,9,73,0.12
20,robinsons,3,16,0.19
26,wfa-hkawalkar,1,5,0.2
30,yunyeng,9,46,0.2
24,uakyol,14,63,0.22


## Lines Added vs. Deleted

While not always straightforward to interpret, it can be sometimes useful to understand how authors are adding lines versus deleting lines in their PRs.

In [3]:
from productivity_analytics import lines_added_vs_deleted

lines_added_vs_deleted(pr_df)

Unnamed: 0_level_0,total_prs,total_lines_added,total_lines_deleted,total_delta
user_login,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
wangyaopw,117,203440,36184,167256
tristanvuong2021,239,115981,31014,84967
uakyol,77,88365,6866,81499
riemanli,79,51503,9299,42204
renjiezh,174,53098,15212,37886
ple13,40,42819,5793,37026
SanjayVas,480,293102,258623,34479
Marco-Premier,37,41766,11126,30640
YuhongWang-Amazon,41,38278,8100,30178
bdomen-ggl,35,25055,2589,22466
