In [None]:
import pandas as pd
import plotly.express as px

In [None]:
commits_df = pd.read_csv('for_ed.csv', lineterminator='\n')[['author','date','message','stats_insertions','stats_deletions','stats_lines','hour','minute','day','message_length','neg','neu','pos','compound']]

In [None]:
author=''

In [None]:
by_author = commits_df.loc[commits_df['author'] == author]

In [None]:
commits_df

In [None]:
by_author_total = commits_df \
    .groupby(['author'])['date'] \
    .agg(['count']) \
    .sort_values('count', ascending=False)
by_author_total

In [None]:
all_means = commits_df \
    .groupby(['author']) \
    .mean() \
    .sort_values(by=['pos'], ascending=False)
all_means

In [None]:
all_vars = commits_df \
    .groupby(['author']) \
    .var() \
    .sort_values(by=['compound'], ascending=False)
all_vars

In [None]:
hist_message_length = px.histogram(by_author, x="message_length")
hist_message_length.show()

In [None]:
all_means['useless'] = 0
m_length = px.scatter(all_means.reset_index(), x='message_length', y='useless', hover_data=['author'], labels=dict(message_length='Message length'))
m_length.update_traces(marker=dict(size=12,
                              line=dict(width=30,
                                        color='DarkSlateGrey')),
                  selector=dict(mode='markers'))
#m_length.update_layout({
    #'plot_bgcolor': 'rgba(0, 0, 0, 0)',
    #'paper_bgcolor': 'rgba(0, 0, 0, 0)',
    #'yaxis':{'visible':False}
#})
m_length.add_annotation(x=30, y=0,
            text="Text annotation with arrow",
            showarrow=True,
            yshift=30,
            font=dict(
                family="Courier New, monospace",
                size=16,
                color="#ffffff"
            ),
        arrowhead=2,
        arrowsize=1,
        arrowwidth=2,
        )
m_length.update_xaxes(
        tickangle = 45,
        title_text = "Message length",
        title_font = {"size": 40, 'color':'white'},
        title_standoff = 25,
        tickfont=dict(color='white', size=40)
)
m_length.show()

In [None]:
by_day = by_author
by_day = by_day \
    .groupby(['date']) \
    .sum() \
    .reset_index() \
    .sort_values(by=['date'], ascending=True)
by_day['cumsum_insertions'] = by_day['stats_insertions'].cumsum()
by_day['cumsum_deletions'] = by_day['stats_deletions'].cumsum()
by_day['rate_of_increase'] = by_day['cumsum_insertions'].diff()
by_day
by_day_g = px.line(by_day, x='day', y=['cumsum_insertions','cumsum_deletions'])
by_day_g.show()

In [None]:
by_day_count = by_author
by_day_count = by_day_count \
    .groupby(['day']) \
    .size() \
    .reset_index() \
    .sort_values(by=['day'], ascending=True) \
    .rename(columns={0: 'count'})

by_day_count['count_commits'] = by_day_count['count'].cumsum()
by_day_count
by_day_count_g = px.line(by_day_count, x='day', y='count_commits')
by_day_count_g.show()

In [None]:
by_day_count_all = commits_df[commits_df['author'] != author]
by_day_count_all = by_day_count_all \
    .groupby(['day']) \
    .size() \
    .reset_index() \
    .sort_values(by=['day'], ascending=True) \
    .rename(columns={0: 'count'})

by_day_count_all['count_commits'] = by_day_count_all['count'].cumsum()
by_day_count_all
by_day_count_all_g = px.line(by_day_count_all, x='day', y='count_commits')
by_day_count_all_g.show()

In [None]:
by_day = commits_df[commits_df['author'] != author]
by_day['day'] = pd.to_datetime(by_day['date']).dt.date
by_day = by_day \
    .groupby(['day']) \
    .sum() \
    .reset_index() \
    .sort_values(by=['day'], ascending=True)
by_day['cumsum_insertions'] = by_day['stats_insertions'].cumsum()
by_day['cumsum_deletions'] = by_day['stats_deletions'].cumsum()
by_day['rate_of_increase'] = by_day['cumsum_insertions'].diff()
by_day
by_day_g = px.line(by_day, x='day', y=['cumsum_insertions','cumsum_deletions'])
by_day_g.show()

In [None]:
all_sums = commits_df \
    .groupby(['author']) \
    .sum() \
    .sort_values(by=['stats_insertions'], ascending=False)
all_sums['insertion_ratio'] = all_sums['stats_insertions']/all_sums['stats_deletions']
all_sums

In [None]:
import plotly.figure_factory as ff
stuff = [ a_and_b[a_and_b['Group'] == "All"]['hour'], a_and_b[a_and_b['Group'] != "All"]['hour'] ] 
fig = ff.create_distplot(stuff, ['Everyone else','Ed'], show_rug=False)
fig.update_layout({
    'plot_bgcolor': 'rgba(0, 0, 0, 0)',
    'paper_bgcolor': 'rgba(0, 0, 0, 0)'
})
fig.show()

In [None]:
a = by_author
a['Group'] = author
b = commits_df
b['Group'] = 'All'
a_and_b = pd.concat([a,b])
hour_histogram = px.histogram(a_and_b, x='hour', color='Group', barmode='overlay', template='plotly_dark')
hour_histogram.update_xaxes(range=[0,23])
hour_histogram.update_layout({
    'plot_bgcolor': 'rgba(0, 0, 0, 0)',
    'paper_bgcolor': 'rgba(0, 0, 0, 0)',
})
hour_histogram.show()

In [None]:
f = px.bar(a_and_b.groupby(by=['hour','Group']).mean().reset_index(), x='hour', y='stats_insertions', color="Group", barmode="overlay",template = 'plotly_dark',labels={
                     "stats_insertions": "Insertions",
                     "hour": "Hour of day"
                 })
f.update_layout({
    'plot_bgcolor': 'rgba(0, 0, 0, 0)',
    'paper_bgcolor': 'rgba(0, 0, 0, 0)',
})
f.show()

In [None]:
hour_histogram_all = px.histogram(b, x='hour', color='Group', barmode='overlay', template='plotly_dark', color_discrete_sequence=['indianred'])
hour_histogram_all.update_layout({
    'plot_bgcolor': 'rgba(0, 0, 0, 0)',
    'paper_bgcolor': 'rgba(0, 0, 0, 0)',
})
hour_histogram_all.show()

In [None]:
minute_histogram = px.histogram(by_author, x='minute')
minute_histogram.show()

In [None]:
means = by_author.groupby(['hour']).mean().reset_index()
msg_length = px.scatter(means, x='hour', y='compound',template = 'plotly_dark',
                        labels={
                     "message_length": "Message length",
                     "hour": "Hour of day"
                 })
msg_length.update_traces(marker=dict(size=12,
                              line=dict(width=30,
                                        color='DarkSlateGrey')),
                  selector=dict(mode='markers'))
msg_length.update_layout({
    'plot_bgcolor': 'rgba(0, 0, 0, 0)',
    'paper_bgcolor': 'rgba(0, 0, 0, 0)',
})
msg_length.show()

In [None]:
means

In [None]:
means_all = commits_df.groupby(['hour']).mean().reset_index()
msg_length_all = px.scatter(means_all, x='hour', y='stats_insertions',template = 'plotly_dark')
msg_length_all.show()

In [None]:
means_days = commits_df.groupby(['day']).mean().reset_index()
msg_length_all = px.scatter(means_all, x='hour', y='stats_insertions',template = 'plotly_dark')
msg_length_all.show()

In [None]:
import plotly.express as px
weekday_histogram = px.histogram(by_author, x='day',template = 'plotly_dark',labels={
                     "Count": "Commits",
                     "day": "Day of the week"
                 })
weekday_histogram.update_layout({
    'plot_bgcolor': 'rgba(0, 0, 0, 0)',
    'paper_bgcolor': 'rgba(0, 0, 0, 0)',
})
weekday_histogram.show()

In [None]:
by_author

In [None]:
weekday_histogram_all = px.histogram(commits_df, x='day')
weekday_histogram_all.show()

In [None]:
import plotly.graph_objects as go
from scipy import signal
means_day = by_author.groupby(['day']).mean().reset_index()
msg_length_day = px.bar(means_day, x='day', y=['stats_deletions','stats_insertions'], template = 'plotly_dark', barmode='group')
msg_length_day.update_layout({
    'plot_bgcolor': 'rgba(0, 0, 0, 0)',
    'paper_bgcolor': 'rgba(0, 0, 0, 0)',
})
msg_length_day.show()

In [None]:
by_message = by_author.copy()
by_message['message_processed'] = by_message['message'].apply(lambda x: x.lower().split(' '))
exploded = by_message \
    .explode('message_processed') \
    .groupby('message_processed') \
    .count() \
    .reset_index() \
    .sort_values('date', ascending=False)
filter_words = ['merge', 'branch', "'master'", 'of', 'github.com:spqt/bidbrain-api', 'for', '','to','the','not','with','it','a','when',"'origin/master'", 'and','in']
exploded.loc[ ~exploded['message_processed'].isin(filter_words), : ][['message_processed', 'date']].head(60)

In [None]:
means_day = by_author.groupby(['day']).mean().reset_index().rename(columns={'pos':'Positive', 'neg':'Negative', 'neu':'Neutral','compound':'Mood'})
mood_per_day = px.line(means_day, x='day', y=['Positive', 'Negative'], template = 'plotly_dark', line_shape='spline')
mood_per_day.update_layout({
    'plot_bgcolor': 'rgba(0, 0, 0, 0)',
    'paper_bgcolor': 'rgba(0, 0, 0, 0)',
})
mood_per_day.update_traces(line=dict(width=10))
for idx in range(len(mood_per_day.data)):
    mood_per_day.data[idx].x = ['Monday','Thuesday','Wednesday','Thursday','Friday']
mood_per_day.show()

In [None]:
commits_df['day'] = pd.to_datetime(commits_df['date']).dt.strftime('%Y-%m-%d')
means_dates = commits_df.groupby(['day']).mean().reset_index().rename(columns={'pos':'Positive', 'neg':'Negative'})
means_dates
mood_per_date = px.line(means_dates, x='day', y=['stats_insertions','Negative'], template = 'plotly_dark')
mood_per_date.update_layout({
    'plot_bgcolor': 'rgba(0, 0, 0, 0)',
    'paper_bgcolor': 'rgba(0, 0, 0, 0)',
})
#mood_per_date.update_traces(line=dict(width=10))
mood_per_date.show()

In [None]:
means_dates

In [None]:
msg_length_day = px.bar(means_day, x='day', y='neg', template = 'plotly_dark')
msg_length_day.show()

In [None]:
msg_length_day = px.bar(means_day, x='day', y='neu', template = 'plotly_dark')
msg_length_day.show()

In [None]:
commits_df.sort_values(by=['pos'], ascending=False)

In [None]:
for index, row in by_author.sort_values(by=['pos'], ascending=False).head(10).iterrows():
    print(row.message)
    print(row.hour)

In [None]:
merges_by_day = by_author[by_author['message'].str.contains("Merge pull request") ].groupby('day').count().reset_index()

merges_by_day = px.bar(merges_by_day, x='day', y=['author'], template = 'plotly_dark')
merges_by_day.update_layout({
    'plot_bgcolor': 'rgba(0, 0, 0, 0)',
    'paper_bgcolor': 'rgba(0, 0, 0, 0)',
})
for idx in range(len(merges_by_day.data)):
    merges_by_day.data[idx].x = ['Monday','Thuesday','Wednesday','Thursday','Friday','Saturday','Sunday']
merges_by_day.show()

In [None]:
merges_by_day_all = commits_df[commits_df['message'].str.contains("Merge pull request", na=False) ].groupby('day').count().reset_index()

merges_by_day_all = px.bar(merges_by_day_all, x='day', y=['author'], template = 'plotly_dark')
merges_by_day_all.update_layout({
    'plot_bgcolor': 'rgba(0, 0, 0, 0)',
    'paper_bgcolor': 'rgba(0, 0, 0, 0)',
})
for idx in range(len(merges_by_day.data)):
    merges_by_day_all.data[idx].x = ['Monday','Thuesday','Wednesday','Thursday','Friday','Saturday','Sunday']
merges_by_day_all.show()

In [None]:
by_author[~by_author['message'].str.contains("Merge pull request") ]

In [None]:
commits_df[commits_df['message'].str.contains("Merge pull request", na=False)] 

In [None]:
commits_df[~commits_df['message'].str.contains("Merge pull request", na=False) ]

In [None]:
723/10391

In [None]:
72/287

In [None]:
commits_df[commits_df['message'].str.contains("Merge pull request", na=False)].groupby('author').count()