# Data importing

In [1]:
import pandas as pd
import numpy as np

In [2]:
activities = pd.read_csv('../data/activities.csv')
activities

Unnamed: 0,category,contributor,repository,activity,date
0,bot,AmplabJenkins,apache/spark,Commenting pull request,2022-11-25 09:55:19+00:00
1,bot,AmplabJenkins,apache/spark,Commenting pull request,2022-11-25 09:55:23+00:00
2,bot,AmplabJenkins,apache/spark,Commenting pull request,2022-11-25 09:55:26+00:00
3,bot,analysis-bot,facebook/react-native,Commenting pull request,2022-11-25 09:55:27+00:00
4,bot,neos-bot,neos/neos-ui-compiled,Pushing commits,2022-11-25 09:55:47+00:00
...,...,...,...,...,...
1015418,human,tychoish,neondatabase/autoscaling,Reviewing code,2023-04-15 16:06:15+00:00
1015419,human,tychoish,neondatabase/autoscaling,Reviewing code,2023-04-15 16:07:26+00:00
1015420,human,snakefoot,snakefoot/NLog,Creating branch,2023-04-15 16:07:33+00:00
1015421,human,snakefoot,NLog/NLog,Opening pull request,2023-04-15 16:08:07+00:00


# Contributor clustering

## Taking into account repositories (separated)

In [45]:
bot_activities = (
    activities[activities['category'] == 'bot']
    .pivot_table(
        index=['contributor', 'repository'], 
        columns='activity', 
        aggfunc='size', 
        fill_value=0
    )
    .reset_index()
)
bot_activities

activity,contributor,repository,Adding collaborator to repository,Closing issue,Closing pull request,Commenting commit,Commenting issue,Commenting pull request,Commenting pull request changes,Creating branch,...,Forking repository,Opening issue,Opening pull request,Publishing a release,Pushing commits,Reopening issue,Reopening pull request,Reviewing code,Starring repository,Transferring issue
0,0crat,cqfn/jpeek,0,0,0,0,0,2,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0crat,jcabi/jcabi-aether,0,0,0,0,0,9,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0crat,jcabi/jcabi-aspects,0,0,0,0,0,3,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0crat,jcabi/jcabi-beanstalk-maven-plugin,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0crat,jcabi/jcabi-dynamo,0,0,0,0,0,58,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34345,yii-bot,yiisoft/yii2-queue,0,0,0,0,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
34346,zcabot,zeroc-ice/ice,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
34347,zeebe-bors[bot],zeebe-io/flaky-test-extractor-maven-plugin,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
34348,zeebe-bors[bot],zeebe-io/zeebe-cluster-testbench,0,0,79,0,0,83,0,61,...,0,0,0,0,150,0,0,0,0,0


In [46]:
human_activities = (
    activities[activities['category'] == 'human']
    .pivot_table(
        index=['contributor', 'repository'], 
        columns='activity', 
        aggfunc='size', 
        fill_value=0
    )
    .reset_index()
)
human_activities

activity,contributor,repository,Adding collaborator to repository,Closing issue,Closing pull request,Commenting commit,Commenting issue,Commenting pull request,Commenting pull request changes,Creating branch,...,Making repository public,Opening issue,Opening pull request,Publishing a release,Pushing commits,Reopening issue,Reopening pull request,Reviewing code,Starring repository,Transferring issue
0,3cp,3cp/browser-do,0,0,0,0,0,0,0,0,...,0,0,0,0,5,0,0,0,0,0
1,3cp,3cp/create-demo,0,0,0,0,0,0,0,0,...,0,0,0,0,4,0,0,0,0,0
2,3cp,3cp/demo,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
3,3cp,3cp/django-easy-audit,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,3cp,3cp/documentation,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18998,zserge,grafana/grafana,0,2,42,0,2,3,14,50,...,0,1,49,0,79,0,0,65,0,0
18999,zserge,grafana/k6,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
19000,zserge,snesrev/zelda3,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
19001,zserge,zserge/fenster,0,2,6,0,0,0,0,0,...,1,0,0,0,20,0,0,2,6,0


## Without taking into account repositories

In [47]:
bot_activities = (
    activities[activities['category'] == 'bot']
    .pivot_table(
        index=['contributor'], 
        columns='activity', 
        aggfunc='size', 
        fill_value=0
    )
    .reset_index()
)
bot_activities

activity,contributor,Adding collaborator to repository,Closing issue,Closing pull request,Commenting commit,Commenting issue,Commenting pull request,Commenting pull request changes,Creating branch,Creating repository,...,Forking repository,Opening issue,Opening pull request,Publishing a release,Pushing commits,Reopening issue,Reopening pull request,Reviewing code,Starring repository,Transferring issue
0,0crat,0,0,0,0,28,732,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,24emebot,0,0,0,0,0,0,0,0,0,...,0,0,0,0,426,0,0,0,0,0
2,5iMON-bot,0,0,0,0,0,4,0,0,0,...,0,0,0,1,61,0,0,0,0,0
3,AmplabJenkins,0,0,0,0,0,209,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,ApiaryBot,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
345,yarnbot,0,7,0,0,37,0,0,0,0,...,0,0,0,0,11,0,0,0,0,0
346,yii-bot,0,4,0,0,14,6,0,0,0,...,0,0,0,0,24,0,0,0,0,0
347,zcabot,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
348,zeebe-bors[bot],0,0,79,0,0,83,0,61,0,...,0,0,0,0,150,0,0,0,0,0
