In [145]:
import re
import git
import math

# Set the path of the git repository and the commit id

repo_name = 'spring-framework'
id = '2ae6a6a3415eebc57babcb9d3e5505887eda6d8a'

repo = git.Repo(repo_name)
commit = repo.commit(id)

In [91]:
print('Title and message')
print('-----------------', end='\n\n')
print(commit.message)

Title and message
-----------------

SOLR-12316: Do not allow to use absolute URIs for including other files in solrconfig.xml and schema parsing

# Conflicts:
#	solr/CHANGES.txt



In [81]:
print('Number of affected files')
print('------------------------', end='\n\n')

if commit.stats.total['files'] == 1:
    print(str(commit.stats.total['files']) + ' file was affected')
else:
    print(str(commit.stats.total['files']) + ' files were affected')

Number of affected files
------------------------

3 files were affected


In [82]:
print('Number of affected directories')
print('------------------------------', end='\n\n')

uniqueDir = set()

for file in commit.stats.files:
    # Split and reconstruct to find the path of each file
    directories = file.split('/')
    uniqueDir.add('/'.join(directories[:-1]))
    print('/'.join(directories[:-1]))
    
if len(uniqueDir) == 1:
    print('\n' + str(len(uniqueDir)) + ' directory was affected')
else:
    print('\n' + str(len(uniqueDir)) + ' directories were affected')

Number of affected directories
------------------------------

solr
solr/core/src/java/org/apache/solr/util
solr/core/src/test/org/apache/solr/util

3 directories were affected


In [83]:
print('Total lines removed (including comments and blank lines)')
print('--------------------------------------------------------', end='\n\n')
print(str(commit.stats.total['deletions']) + ' lines were removed')

Total lines removed (including comments and blank lines)
--------------------------------------------------------

12 lines were removed


In [84]:
print('Total lines added (including comments and blank lines)')
print('------------------------------------------------------', end='\n\n')
print(str(commit.stats.total['insertions']) + ' lines were inserted')

Total lines added (including comments and blank lines)
------------------------------------------------------

24 lines were inserted


In [143]:
print('Total lines removed (excluding comments and blank lines)')
print('--------------------------------------------------------', end='\n\n')

# Exclude whitespace and comments

diff = repo.git.diff(commit, commit.parents[0])
total_lines = commit.stats.total['deletions'] - len(re.findall(re.compile(r'^-((\s*)\/\/|\n)', re.MULTILINE), diff))
print(str(total_lines) + ' lines were removed')

Total lines removed (excluding comments and blank lines)
--------------------------------------------------------

30 lines were removed


In [144]:
print('Total lines inserted (excluding comments and blank lines)')
print('---------------------------------------------------------', end='\n\n')

# Exclude whitespace and comments

total_lines = commit.stats.total['insertions'] - len(re.findall(re.compile(r'^\+((\s*)\/\/|\n)', re.MULTILINE), diff))
print(str(total_lines) + ' lines were inserted')

Total lines inserted (excluding comments and blank lines)
---------------------------------------------------------

204 lines were inserted


In [87]:
print('Days since previous commit for each file')
print('----------------------------------------', end='\n\n')

average = 0

for file in commit.stats.files:
    commits = repo.iter_commits('--all', paths=file)

    finished = False
    previous_commit = None

    # Find the commit which is next in the list after the current commit
    
    for com in commits:
        if finished:
            previous_commit = com
            break

        if str(com) == id:
            finished = True

    if previous_commit is None:
        print('File created:', end='\t')
    else:
        # Find the number of days between the commits
        time_diff = commit.committed_date - previous_commit.committed_date
        print(math.floor(time_diff / 86400), end=' days:\t')
        average += time_diff / 86400

    print(file)

print('\nAverage: ' + str(average / len(commit.stats.files)))

Days since previous commit for each file
----------------------------------------

1 days:	solr/CHANGES.txt
821 days:	solr/core/src/java/org/apache/solr/util/SystemIdResolver.java
821 days:	solr/core/src/test/org/apache/solr/util/TestSystemIdResolver.java

Average: 548.582449845679


In [88]:
print('Number of modifications to each file before the current commit')
print('--------------------------------------------------------------', end='\n\n')

average = 0

for file in commit.stats.files:
    count = -1

    commits = repo.iter_commits('--all', paths=file)
    
    # Count the number of modifications to each file before the current commit
    
    for com in commits:
        if count >= 0:
            count += 1

        if str(com) == id:
            count = 0

    average += count
    print(count, end='\t')
    print(file)
    
print('\nAverage: ' + str(average / len(commit.stats.files)))

Number of modifications to each file before the current commit
--------------------------------------------------------------

4462	solr/CHANGES.txt
7	solr/core/src/java/org/apache/solr/util/SystemIdResolver.java
6	solr/core/src/test/org/apache/solr/util/TestSystemIdResolver.java

Average: 1491.6666666666667


In [114]:
print('Names of developers who worked on each file')
print('-------------------------------------------', end='\n\n')

average = 0
total = set()

for file in commit.stats.files:
    print(file)
    
    # Add the names to a set to remove duplicates

    names = set()
    commits = repo.iter_commits('--all', paths=file)

    for com in commits:
        total.add(com.author.name)
        names.add(com.author.name)

    for name in names:
        print(name)
 
    average += len(names)
    print('Total: ' + str(len(names)), end='\n\n')
    
print('Total:   ' + str(len(total)))
print('Average: ' + str(average / len(commit.stats.files)))

Names of developers who worked on each file
-------------------------------------------

solr/CHANGES.txt
Jason Gerlowski
Gregory Chanan
tballison
Sami Siren
Tommaso Teofili
Ishan Chattopadhyaya
Shai Erera
nknize
Tomas Eduardo Fernandez Lobbe
Varun Thacker
David Smiley
yonik
Mike McCandless
Joel Bernstein
Grant Ingersoll
Greg Bowyer
Hrishikesh Gadre
Dennis Gove
Areek Zillur
Christopher John Male
jbernste
koji
thelabdude
Jeff
James Dyer
Erick
Jan Høydahl
noble
Alan Woodward
Steven Rowe
Uwe Schindler
markrmiller
Anshum Gupta
Ramkumar Aiyengar
Shawn Heisey
Mark Robert Miller
Mark Miller
Tomás Fernández Löbbe
Christine Poerschke
Mike Drob
David Wayne Smiley
Ryan McKinley
Houston Putman
Cao Manh Dat
epugh
Chris Hostetter
Andrzej Bialecki
Shalin Shekhar Mangar
Martijn van Groningen
Simon Willnauer
Ryan Ernst
Yonik Seeley
Doron Cohen
Erik Hatcher
Adrien Grand
Upayavira
elyograg
Benson Margulies
jdyer1
anshum
Michael McCandless
Stefan Matheis
Toke Eskildsen
Chris M. Hostetter
Scott Blum
Tomas 

In [116]:
print('Commits per developer')
print('---------------------', end='\n\n')

max = 0
number_of_commits = {}

for name in total:
    if len(name) > max:
        max = len(name)
        
    # Get the number of commits by each author

    loginfo = repo.git.log('--author=' + name)
    number_of_commits[name] = len(re.findall(re.compile(r'^commit.*', re.MULTILINE), loginfo))

max_commits = 0
min_commits = 0
    
for name in number_of_commits:
    print(name, end='')
    
    for i in range(len(name), max):
        print(' ', end='')
        
    print(': ', end='')
    print(number_of_commits[name])
    
    # Get max and min commits
    
    if number_of_commits[name] > max_commits:
        max_commits = number_of_commits[name]
        
    if number_of_commits[name] < min_commits or min_commits == 0:
        min_commits = number_of_commits[name]

print('\nMax: ' + str(max_commits))
print('Min: ' + str(min_commits))

Commits per developer
---------------------

Jason Gerlowski              : 13
Gregory Chanan               : 48
tballison                    : 5
Sami Siren                   : 59
Tommaso Teofili              : 150
Ishan Chattopadhyaya         : 96
Shai Erera                   : 417
nknize                       : 53
Tomas Eduardo Fernandez Lobbe: 35
Varun Thacker                : 124
David Smiley                 : 194
yonik                        : 1974
Mike McCandless              : 511
Joel Bernstein               : 415
Grant Ingersoll              : 586
Greg Bowyer                  : 7
Hrishikesh Gadre             : 2
Dennis Gove                  : 59
Areek Zillur                 : 13
Christopher John Male        : 106
jbernste                     : 495
koji                         : 253
thelabdude                   : 192
Jeff                         : 1
James Dyer                   : 117
Erick                        : 340
Jan Høydahl                  : 228
noble                    