In [64]:
import re
import git

# Set the path of the git repository and the commit id

repo_name = 'spring-framework'
id = '2ae6a6a3415eebc57babcb9d3e5505887eda6d8a'

repo = git.Repo(repo_name)
commit = repo.commit(id)

In [65]:
filenames = []

for filename in commit.stats.files:
    filenames.append(filename)

# Will need a set of commits to count all of their occurrences

gitShow = repo.git.show(commit)
files = re.split(r'diff --.*\n', gitShow)
   
removedLines = []
addedLines = []
    
for i in range(1, len(files)):
    snippets = re.split(r'@@.*\n', files[i])
    snippetIndex = 1
    
    removedTemp = []
    addedTemp = []

    for match in re.findall(re.compile(r'@@.*', re.MULTILINE), files[i]):
        split = match.split(' ')

        before = split[1].replace('-', '').split(',')
        before = [int(x) for x in before]

        after = split[2].replace('+', '').split(',')
        after = [int(x) for x in after]

        beforeCounter = before[0]
        afterCounter = after[0]

        lines = snippets[snippetIndex].split('\n')

        # Record removed/added lines
        for line in lines:
            if len(line) > 0 and line[0] is '-':
                
                # Ignore empty lines and comments (single line and multiline)
                if len(line) > 1 and len(re.findall(re.compile(r'^\-((\s*)((\/\/)|\*|(\/\*)))', re.MULTILINE), line)) == 0:
                    removedTemp.append(beforeCounter)

                beforeCounter += 1
            elif len(line) > 0 and line[0] is '+':
                
                # Ignore empty lines and comments (single line and multiline)
                if len(line) > 1 and len(re.findall(re.compile(r'^\+((\s*)((\/\/)|\*|(\/\*)))', re.MULTILINE), line)) == 0:
                    addedTemp.append(afterCounter)

                afterCounter += 1
            else:
                beforeCounter += 1
                afterCounter += 1

        snippetIndex += 1

    removedLines.append(removedTemp)
    addedLines.append(addedTemp)

In [81]:
# Find all commits of deleted lines (a)

VCCs = {}

for i in range(0, len(filenames)):
    lineCount = 0
    lineIndex = 0
    
    previousCommit = None
    commits = repo.iter_commits(str(commit) + '^@', paths=filenames[i])
    
    for com in commits:
        previousCommit = com
        break

    if previousCommit is not None:

        # Find all commits of deleted lines
        for changeCommit, lines in repo.blame(commit.parents[0], filenames[i]):
            lineCount += len(lines)

            while lineIndex < len(removedLines[i]) and removedLines[i][lineIndex] <= lineCount:
                if str(changeCommit) in VCCs:
                    VCCs[str(changeCommit)] += 1
                else:
                    VCCs[str(changeCommit)] = 1

                lineIndex += 1

for com in VCCs:
    print(com, VCCs[com])

ca01cb4df61c1b41afe0ae5ad178181c4cabdf20 19
dc0613f487d6927d1b98d413955229ee1b47e1e4 7
4ae1709313bece1c34646ec29a0189c58bf88584 1
d14cc0d7a20326fb5643dd966af71d2691a376da 17
6626a38730050c83a0dd6cdc1bfc510024e9ca95 2
85b8befbd1e4b7b4a975c92c592577fe58ffdd8f 1


In [67]:
for i in range(0, len(filenames)):
    lines = []
    commitLines = [None]

    # Add all added lines from the file
    for line in addedLines[i]:
        lines.append(line)

    # Add all lines and commits from the file (excluding whitespace and comments)
    for c, l in repo.blame(commit, filenames[i]):
        for line in l:
            if len(commitLines) not in lines and len(line) > 1 and len(re.findall(re.compile(r'^((\s*)((\/\/)|\*|(\/\*)))', re.MULTILINE), line)) == 0:
                commitLines.append((line, str(c)))
            else:
                commitLines.append((line, None))

    for i in range(0, len(lines)):
        if lines[i] is None:
            continue

        currentLine = lines[i]
        lines[i] = None

        depth = 0
        current = currentLine + 1

        # For each line, look below for any lines in the same scope which have different commits
        while current < len(commitLines) and depth >= 0:
            if depth == 0:
                if current in lines:
                    lines = [None if x == current else x for x in lines]
                elif commitLines[current][1] is not None and commitLines[current][1] != str(commit):
                    if commitLines[current][1] in VCCs:
                        VCCs[commitLines[current][1]] += 1
                    else:
                        VCCs[commitLines[current][1]] = 1

            # Update scope depth
            depth += commitLines[current][0].count('{')
            depth -= commitLines[current][0].count('}')
            current += 1

        depth = 0
        current = currentLine - 1

        # For each line, look above for any lines in the same scope which have different commits
        while current > len(commitLines) and depth >= 0:
            if depth == 0:
                if current in lines:
                    lines = [None if x == current else x for x in lines]
                elif commitLines[current][1] is not None and commitLines[current][1] != str(commit):
                    if commitLines[current][1] in VCCs:
                        VCCs[commitLines[current][1]] += 1
                    else:
                        VCCs[commitLines[current][1]] = 1

            # Update scope depth
            depth += commitLines[current][0].count('}')
            depth -= commitLines[current][0].count('{')
            current -= 1

In [68]:
for com in VCCs:
    print(com, VCCs[com])
    
VCCid = None
maxOccur = 0

for com in VCCs:
    if VCCs[com] > maxOccur:
        VCCid = com
        maxOccur = VCCs[com]
        
print()
print(VCCid, maxOccur)

VCC = repo.commit(VCCid)

print(VCC)

ca01cb4df61c1b41afe0ae5ad178181c4cabdf20 70
dc0613f487d6927d1b98d413955229ee1b47e1e4 8
4ae1709313bece1c34646ec29a0189c58bf88584 1
d14cc0d7a20326fb5643dd966af71d2691a376da 23
6626a38730050c83a0dd6cdc1bfc510024e9ca95 5
85b8befbd1e4b7b4a975c92c592577fe58ffdd8f 12
33674933eae53d17034e9f3cfb2d29ff31930d81 17
f2fdf9fa6b3ea96790051518032fee802296130a 1
ee0a59d464630f91ae310cafa05908422b808a58 1

ca01cb4df61c1b41afe0ae5ad178181c4cabdf20 70
ca01cb4df61c1b41afe0ae5ad178181c4cabdf20


In [69]:
print('Title and message')
print('-----------------', end='\n\n')
print(VCC.message)

Title and message
-----------------

SPR-5539: Add XML HttpMessageConverters



In [70]:
print('Number of affected files')
print('------------------------', end='\n\n')

if VCC.stats.total['files'] == 1:
    print(str(VCC.stats.total['files']) + ' file was affected')
else:
    print(str(VCC.stats.total['files']) + ' files were affected')

Number of affected files
------------------------

8 files were affected


In [71]:
print('Number of affected directories')
print('------------------------------', end='\n\n')

uniqueDir = set()

for file in VCC.stats.files:
    # Split and reconstruct to find the path of each file
    directories = file.split('/')
    uniqueDir.add('/'.join(directories[:-1]))
#     print('/'.join(directories[:-1]))
    
if len(uniqueDir) == 1:
    print('\n' + str(len(uniqueDir)) + ' directory was affected')
else:
    print('\n' + str(len(uniqueDir)) + ' directories were affected')

Number of affected directories
------------------------------


3 directories were affected


In [72]:
print('Total lines removed (including comments and blank lines)')
print('--------------------------------------------------------', end='\n\n')
print(str(VCC.stats.total['deletions']) + ' lines were removed')

Total lines removed (including comments and blank lines)
--------------------------------------------------------

0 lines were removed


In [73]:
print('Total lines added (including comments and blank lines)')
print('------------------------------------------------------', end='\n\n')
print(str(VCC.stats.total['insertions']) + ' lines were inserted')

Total lines added (including comments and blank lines)
------------------------------------------------------

445 lines were inserted


In [74]:
print('Total lines removed (excluding comments and blank lines)')
print('--------------------------------------------------------', end='\n\n')

# Exclude whitespace and comments
diff = repo.git.show(VCC)

total_lines = VCC.stats.total['deletions'] - len(re.findall(re.compile(r'^\-((\s*)((\/\/)|\*|(\/\*)))', re.MULTILINE), diff))
print(str(total_lines) + ' lines were removed')

Total lines removed (excluding comments and blank lines)
--------------------------------------------------------

0 lines were removed


In [75]:
print('Total lines inserted (excluding comments and blank lines)')
print('---------------------------------------------------------', end='\n\n')

# Exclude whitespace and comments
total_lines = VCC.stats.total['insertions'] - len(re.findall(re.compile(r'^\+((\s*)((\/\/)|\*|(\/\*)))', re.MULTILINE), diff))
print(str(total_lines) + ' lines were inserted')

Total lines inserted (excluding comments and blank lines)
---------------------------------------------------------

348 lines were inserted


In [76]:
print('Days since previous commit for each file')
print('----------------------------------------', end='\n\n')

average = 0
count = 0

for file in VCC.stats.files:
    commits = repo.iter_commits('--all', paths=file)

    finished = False
    previous_commit = None

    # Find the commit which is next in the list after the current commit
    
    for com in commits:
        if finished:
            previous_commit = com
            break

        if str(com) == str(VCC):
            finished = True

    if previous_commit is None:
        print('File created:', end='\t')
    else:
        # Find the number of days between the commits
        time_diff = VCC.committed_date - previous_commit.committed_date
        print(time_diff / 86400, end=' days:\t')
        average += time_diff / 86400
        
    print(file)
        
    count += 1
    
    if count == 5:
        break

print('\nAverage: ' + str(average / len(VCC.stats.files)))

Days since previous commit for each file
----------------------------------------

9.987534722222222 days:	org.springframework.web/ivy.xml
File created:	org.springframework.web/src/main/java/org/springframework/http/converter/xml/AbstractXmlHttpMessageConverter.java
File created:	org.springframework.web/src/main/java/org/springframework/http/converter/xml/MarshallingHttpMessageConverter.java
File created:	org.springframework.web/src/main/java/org/springframework/http/converter/xml/SourceHttpMessageConverter.java
File created:	org.springframework.web/src/main/java/org/springframework/http/converter/xml/package.html

Average: 1.2484418402777777


In [77]:
print('Number of modifications to each file before the current commit')
print('--------------------------------------------------------------', end='\n\n')

average = 0

for file in VCC.stats.files:
    count = -1

    commits = repo.iter_commits('--all', paths=file)
    
    # Count the number of modifications to each file before the current commit
    
    for com in commits:
        if count >= 0:
            count += 1

        if str(com) == str(VCC):
            count = 0

    average += count
    print(count, end='\t')
    print(file)
    
print('\nAverage: ' + str(average / len(VCC.stats.files)))

Number of modifications to each file before the current commit
--------------------------------------------------------------

9	org.springframework.web/ivy.xml
0	org.springframework.web/src/main/java/org/springframework/http/converter/xml/AbstractXmlHttpMessageConverter.java
0	org.springframework.web/src/main/java/org/springframework/http/converter/xml/MarshallingHttpMessageConverter.java
0	org.springframework.web/src/main/java/org/springframework/http/converter/xml/SourceHttpMessageConverter.java
0	org.springframework.web/src/main/java/org/springframework/http/converter/xml/package.html
0	org.springframework.web/src/test/java/org/springframework/http/converter/xml/MarshallingHttpMessageConverterTest.java
0	org.springframework.web/src/test/java/org/springframework/http/converter/xml/SourceHttpMessageConverterTest.java
6	org.springframework.web/web.iml

Average: 1.875


In [78]:
print('Names of developers who worked on each file')
print('-------------------------------------------', end='\n\n')

average = 0
total = set()

for file in VCC.stats.files:
    print(file)
    
    # Add the names to a set to remove duplicates

    names = set()
    addNames = False
    commits = repo.iter_commits('--all', paths=file)

    for com in commits:
        if str(com) == str(VCC):
            addNames = True

        if addNames is True:
            total.add(com.author.name)
            names.add(com.author.name)

    for name in names:
        print(name)
 
    average += len(names)
    print('Total: ' + str(len(names)), end='\n\n')
    
print('Total:   ' + str(len(total)))
print('Average: ' + str(average / len(VCC.stats.files)))

Names of developers who worked on each file
-------------------------------------------

org.springframework.web/ivy.xml
Chris Beams
Juergen Hoeller
Arjen Poutsma
Total: 3

org.springframework.web/src/main/java/org/springframework/http/converter/xml/AbstractXmlHttpMessageConverter.java
Arjen Poutsma
Total: 1

org.springframework.web/src/main/java/org/springframework/http/converter/xml/MarshallingHttpMessageConverter.java
Arjen Poutsma
Total: 1

org.springframework.web/src/main/java/org/springframework/http/converter/xml/SourceHttpMessageConverter.java
Arjen Poutsma
Total: 1

org.springframework.web/src/main/java/org/springframework/http/converter/xml/package.html
Arjen Poutsma
Total: 1

org.springframework.web/src/test/java/org/springframework/http/converter/xml/MarshallingHttpMessageConverterTest.java
Arjen Poutsma
Total: 1

org.springframework.web/src/test/java/org/springframework/http/converter/xml/SourceHttpMessageConverterTest.java
Arjen Poutsma
Total: 1

org.springframework.web/w

In [79]:
print('Commits per developer')
print('---------------------', end='\n\n')

max = 0
number_of_commits = {}

for name in total:
    if len(name) > max:
        max = len(name)
        
    # Get the number of commits by each author

    loginfo = repo.git.log('--author=' + name)
    number_of_commits[name] = len(re.findall(re.compile(r'^commit.*', re.MULTILINE), loginfo))

max_commits = 0
min_commits = 0
    
for name in number_of_commits:
    print(name, end='')
    
    for i in range(len(name), max):
        print(' ', end='')
        
    print(': ', end='')
    print(number_of_commits[name])
    
    # Get max and min commits
    
    if number_of_commits[name] > max_commits:
        max_commits = number_of_commits[name]
        
    if number_of_commits[name] < min_commits or min_commits == 0:
        min_commits = number_of_commits[name]

print('\nMax: ' + str(max_commits))
print('Min: ' + str(min_commits))

Commits per developer
---------------------

Chris Beams    : 1049
Juergen Hoeller: 5994
Arjen Poutsma  : 1212

Max: 5994
Min: 1049
