# Secure Software Engineering Exercise 2

In [1]:
import numpy as np
import pandas as pd
import os

# Specify git executable file for GitPython in Jupyter Notebook (In IDE, it can still work without this line.)
# os.environ["GIT_PYTHON_GIT_EXECUTABLE"] = "C:\Program Files\Git\cmd\git.exe"

import git
import re
# from git import RemoteProgress

from git import Repo
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

## 1. apache/spark
CVE-ID - CVE-2017-12612<br/>
Project Link:https://github.com/apache/spark<br/>
Commit:4cba3b5a350f4d477466fc73b32cbd653eee840

### a) What was the message and title of the fixing commit? 

In [23]:
import pprint as pprint
remote_link = "https://github.com/apache/spark"
local_link = "spark"
fixing_commit = "4cba3b5a350f4d477466fc73b32cbd653eee840"

repo = Repo(local_link)
show_data = repo.git.show("--shortstat", fixing_commit).splitlines()
commit = repo.commit(fixing_commit)
print("Commit Title:"+'\x1b[0m'+show_data[4])
print("Commit Message:")
for index in range(len(show_data)):
    if index>5 and index<19:
        print(show_data[index])

Commit Title:[0m    [SPARK-20922][CORE] Add whitelist of classes that can be deserialized by the launcher.
Commit Message:
    Blindly deserializing classes using Java serialization opens the code up to
    issues in other libraries, since just deserializing data from a stream may
    end up execution code (think readObject()).
    
    Since the launcher protocol is pretty self-contained, there's just a handful
    of classes it legitimately needs to deserialize, and they're in just two
    packages, so add a filter that throws errors if classes from any other
    package show up in the stream.
    
    This also maintains backwards compatibility (the updated launcher code can
    still communicate with the backend code in older Spark releases).
    
    Tested with new and existing unit tests.


### b) How many total files were affected in the fixing commit?

In [21]:
stats = commit.stats

print("Affected Files:"+ str(stats.total["files"]))

Affected Files:3


### c) How many total directories were affected in the fixing commit? 

In [22]:
totalDirAffected = repo.git.show("--dirstat", fixing_commit).splitlines()
affectedDir = []
for info in totalDirAffected:
    if "% " in info:
        affectedDir.append(info[info.find("% ")+2:])
print(len(affectedDir),"Directories affected: ")
for pline in affectedDir:
    print(pline)

2 Directories affected: 
launcher/src/main/java/org/apache/spark/launcher/
launcher/src/test/java/org/apache/spark/launcher/


### d) How many total lines of code(including comments and blank lines)  were deleted? 

In [23]:
diffData = repo.git.show("--stat", fixing_commit).splitlines()
#for line in diffData:
#    print(line)
print("Deleted lines(including comment and blank lines):"+diffData[30][36:40])


Deleted lines(including comment and blank lines): 27 


### e) How many total lines of code(including comments and blank lines)  were added? 

In [24]:
diffData = repo.git.show("--stat", fixing_commit).splitlines()
#for line in diffData:
#    print(line)
print("Inserted lines(including comment and blank lines):"+diffData[30][17:21])

Inserted lines(including comment and blank lines): 121


### f) How many total lines of code(excluding comments and blank lines)  were deleted? 

In [25]:
diffData = repo.git.diff("-w",fixing_commit + "^", fixing_commit).splitlines()
comment = False
count = 0
for line in diffData:
    if(not line[0:3] == '---'):
        if(line.find('/*') != -1):
            comment = True
            continue
        elif(line.find('*/') != -1):
            comment = False
            continue
        if(comment==False):
            if(line[0:1] == '-' and not re.search(r'\s', line) is None):
                if(line.find('//') == -1):
                    count=count+1
print("Deleted lines(excluding comments and blank lines):"+ str(count))

Deleted lines(excluding comments and blank lines):23


### g) How many total lines of code(excluding comments and blank lines)  were added?

In [26]:
diffData = repo.git.diff("-w",fixing_commit + "^", fixing_commit).splitlines()
comment = False
count = 0
for line in diffData:
    if(not line[0:3] == '+++'):
        if(line.find('/*') != -1):
            comment = True
            continue
        elif(line.find('*/') != -1):
            comment = False
            continue
        if(comment==False):
            if(line[0:1] == '+' and not re.search(r'\s', line) is None):
                if(line.find('//') == -1):
                    count=count+1
print("Inserted lines(excluding comments and blank lines):"+ str(count))

Inserted lines(excluding comments and blank lines):77


### h)  How many days were between the current fixing commit and the previous commit of each affected file? 

In [27]:
from datetime import datetime
from datetime import date

for file in s.files:   
    print("File: "+file)
    log_data = repo.git.log(fixing_commit,file).splitlines()
    commitID = 0
    date1 = datetime.now()
    date2 = datetime.now()
    for line in log_data:
        if(line[0:8] == "Date:   "):
            commitID=commitID+1
            if(commitID ==1):
                date1 = datetime.strptime(line[8:31], '%a %b %d %H:%M:%S %Y')
                print("Current Commit Time: "+str(date1))
                continue
            if(commitID ==2):
                date2 = datetime.strptime(line[8:31], '%a %b %d %H:%M:%S %Y')
                print("Previous Commit Time: "+str(date2))
                break
    if(commitID == 1):
        print("This file only has one commit.")
        print("----------------------")
    else:
        print("Time Difference: "+str(date1-date2))
        print("----------------------")

File: launcher/src/main/java/org/apache/spark/launcher/FilteredObjectInputStream.java
Current Commit Time: 2017-06-01 14:44:34
This file only has one commit.
----------------------
File: launcher/src/main/java/org/apache/spark/launcher/LauncherConnection.java
Current Commit Time: 2017-06-01 14:44:34
Previous Commit Time: 2015-10-09 15:28:09
Time Difference: 600 days, 23:16:25
----------------------
File: launcher/src/test/java/org/apache/spark/launcher/LauncherServerSuite.java
Current Commit Time: 2017-06-01 14:44:34
Previous Commit Time: 2016-06-06 16:05:40
Time Difference: 359 days, 22:38:54
----------------------


### i)  How many time has each affected file of the current fixing commit been modified in the past since their creation?  

In [28]:
for file in stats.files:   
    print("File: "+file)
    log_data = repo.git.log(fixing_commit,"--pretty=\"format:%H%M%S\"", "--",file).splitlines()
    print("Times of modification(including renaming): "+str(len(log_data)))
    print("-------------------------------")

File: launcher/src/main/java/org/apache/spark/launcher/FilteredObjectInputStream.java
Times of modification(including renaming): 1
-------------------------------
File: launcher/src/main/java/org/apache/spark/launcher/LauncherConnection.java
Times of modification(including renaming): 2
-------------------------------
File: launcher/src/test/java/org/apache/spark/launcher/LauncherServerSuite.java
Times of modification(including renaming): 10
-------------------------------


### j)  Which developers have modified each affected file since its creation? 

In [29]:
for file in stats.files:   
    print("File: "+file) 
    log_data = repo.git.log(fixing_commit,"--pretty=format:%an", "--follow", "--",file).splitlines()
    x = np.array(log_data) 
    uniqueNames = np.unique(x)
    print("Contributors: ")
    for name in uniqueNames:
        print(name) 
    print()

File: launcher/src/main/java/org/apache/spark/launcher/FilteredObjectInputStream.java
Contributors: 
Marcelo Vanzin

File: launcher/src/main/java/org/apache/spark/launcher/LauncherConnection.java
Contributors: 
Marcelo Vanzin

File: launcher/src/test/java/org/apache/spark/launcher/LauncherServerSuite.java
Contributors: 
Dongjoon Hyun
Marcelo Vanzin
Reynold Xin
Sean Owen
Subroto Sanyal



### k) For each developer identified, how many commits have each of them submitted? From your observation, are the involving developers experienced or new ones or both?

In [30]:
all_developers = repo.git.shortlog("-sne", "--all").splitlines()

author_commits = []
identifiedDev = []
for file in stats.files:   
    #print("File: "+file) 
    log_data = repo.git.log(fixing_commit,"--pretty=format:%an", "--follow", "--",file).splitlines()
    x = np.array(log_data) 
    uniqueNames = np.unique(x)
    #print("Contributors: ")
    for name in uniqueNames:
        identifiedDev.append(name) 
dev = list(set(identifiedDev))
print("commitTimes"+"\t\t"+"author")
for developer in all_developers:
    #print(developer)
    commitTimes, author = developer.split("\t")
    for tem in dev:
        if tem in author:
            print(commitTimes+"\t\t"+author)


commitTimes		author
  1079		Reynold Xin <rxin@databricks.com>
   623		Sean Owen <sowen@cloudera.com>
   581		Marcelo Vanzin <vanzin@cloudera.com>
   576		Reynold Xin <rxin@apache.org>
   571		Dongjoon Hyun <dongjoon@apache.org>
   203		Reynold Xin <rxin@cs.berkeley.edu>
   147		Sean Owen <sean.owen@databricks.com>
   124		Dongjoon Hyun <dhyun@apple.com>
    84		Reynold Xin <reynoldx@gmail.com>
    43		Sean Owen <srowen@gmail.com>
    14		Marcelo Vanzin <vanzin@apache.org>
     5		Subroto Sanyal <ssanyal@datameer.com>


## 2. FasterXML/jackson-databind
CVE-ID - CVE-2012-1621<br/>
Project Link:  https://github.com/FasterXML/jackson-databind<br/>
Commit:  ddfddfba6414adbecaff99684ef66eebd3a92e92

### a) What was the message and title of the fixing commit? 

In [31]:
import pprint as pprint
remote_link = "https://github.com/FasterXML/jackson-databind"
local_link = "jackson-databind"
fixing_commit = "ddfddfba6414adbecaff99684ef66eebd3a92e92"

repo = Repo(local_link)
show_data = repo.git.show("--shortstat", fixing_commit).splitlines()
commit = repo.commit(fixing_commit)
print("Commit Title:"+show_data[4])
print("Commit Message:"+show_data[5])

Commit Title:    Fix #1737
Commit Message:


### b) How many total files were affected in the fixing commit?

In [32]:
commit = repo.commit(fixing_commit)
stats = commit.stats

print("Affected Files:"+ str(stats.total["files"]))

Affected Files:3


### c) How many total directories were affected in the fixing commit? 

In [33]:
totalDirAffected = repo.git.show("--dirstat", fixing_commit).splitlines()
affectedDir = []
for info in totalDirAffected:
    if "% " in info:
        affectedDir.append(info[info.find("% ")+2:])
print(len(affectedDir),"Directories affected: ")
for pline in affectedDir:
    print(pline)

2 Directories affected: 
src/main/java/com/fasterxml/jackson/databind/deser/
src/test/java/com/fasterxml/jackson/databind/interop/


### d) How many total lines of code(including comments and blank lines)  were deleted? 

In [34]:
diffData = repo.git.show("--stat", fixing_commit).splitlines()
#for line in diffData:
#    print(line)
print("Deleted lines(including comment and blank lines):"+diffData[9][36:39])


Deleted lines(including comment and blank lines): 6 


### e) How many total lines of code(including comments and blank lines)  were added? 

In [35]:
diffData = repo.git.show("--stat", fixing_commit).splitlines()
#for line in diffData:
#    print(line)
print("Inserted lines(including comment and blank lines):"+diffData[9][17:21])

Inserted lines(including comment and blank lines): 100


### f) How many total lines of code(excluding comments and blank lines)  were deleted? 

In [36]:
diffData = repo.git.diff("-w",fixing_commit + "^", fixing_commit).splitlines()
comment = False
count = 0
for line in diffData:
    if(not line[0:3] == '---'):
        if(line.find('/*') != -1):
            comment = True
            continue
        elif(line.find('*/') != -1):
            comment = False
            continue
        if(comment==False):
            if(line[0:1] == '-' and not re.search(r'\s', line) is None):
                if(line.find('//') == -1):
                    count=count+1
print("Deleted lines(excluding comments and blank lines):"+ str(count))

Deleted lines(excluding comments and blank lines):4


### g) How many total lines of code(excluding comments and blank lines)  were added?

In [37]:
diffData = repo.git.diff("-w",fixing_commit + "^", fixing_commit).splitlines()
comment = False
count = 0
for line in diffData:
    if(not line[0:3] == '+++'):
        if(line.find('/*') != -1):
            comment = True
            continue
        elif(line.find('*/') != -1):
            comment = False
            continue
        if(comment==False):
            if(line[0:1] == '+' and not re.search(r'\s', line) is None):
                if(line.find('//') == -1):
                    count=count+1
print("Inserted lines(excluding comments and blank lines):"+ str(count))

Inserted lines(excluding comments and blank lines):54


### h)  How many days were between the current fixing commit and the previous commit of each affected file? 

In [52]:
from datetime import datetime
from datetime import date
for file in stats.files:   
    print("File: "+file)
    log_data = repo.git.log(fixing_commit,file).splitlines()
    commitID = 0
    date1 = datetime.now()
    date2 = datetime.now()
    for line in log_data:
        if(line[0:8] == "Date:   "):
            commitID=commitID+1
            if(commitID ==1):
                date1 = datetime.strptime(line[8:38], '%a %b %d %H:%M:%S %Y %z')
                print("Current Commit Time: "+str(date1))
                continue
            if(commitID ==2):
                date2 = datetime.strptime(line[8:38], '%a %b %d %H:%M:%S %Y %z')
                print("Previous Commit Time: "+str(date2))
                break
    if(commitID == 1):
        print("This file only has one commit.")
        print("----------------------")
    else:
        print("Time Difference: "+str(date1-date2))
        print("----------------------")

File: release-notes/VERSION
Current Commit Time: 2017-08-17 15:12:47-07:00
Previous Commit Time: 2017-08-17 13:44:18-07:00
Time Difference: 1:28:29
----------------------
File: src/main/java/com/fasterxml/jackson/databind/deser/BeanDeserializerFactory.java
Current Commit Time: 2017-08-17 15:12:47-07:00
Previous Commit Time: 2017-06-30 09:31:02-07:00
Time Difference: 48 days, 5:41:45
----------------------
File: src/test/java/com/fasterxml/jackson/databind/interop/IllegalTypesCheckTest.java
Current Commit Time: 2017-08-17 15:12:47-07:00
Previous Commit Time: 2017-04-13 15:08:10-07:00
Time Difference: 126 days, 0:04:37
----------------------


### i)  How many time has each affected file of the current fixing commit been modified in the past since their creation?  

In [54]:
for file in stats.files:   
    print("File: "+file)
    log_data = repo.git.log(fixing_commit,"--pretty=\"format:%H%M%S\"", "--",file).splitlines()
    print("Times of modification(including renaming): "+str(len(log_data)))
    print("-------------------------------")

File: release-notes/VERSION
Times of modification(including renaming): 865
-------------------------------
File: src/main/java/com/fasterxml/jackson/databind/deser/BeanDeserializerFactory.java
Times of modification(including renaming): 126
-------------------------------
File: src/test/java/com/fasterxml/jackson/databind/interop/IllegalTypesCheckTest.java
Times of modification(including renaming): 2
-------------------------------


### j)  Which developers have modified each affected file since its creation? 

In [56]:
for file in stats.files:   
    print("File: "+file) 
    log_data = repo.git.log(fixing_commit,"--pretty=format:%an", "--follow", "--",file).splitlines()
    x = np.array(log_data) 
    uniqueNames = np.unique(x)
    print("Contributors: ")
    for name in uniqueNames:
        print(name) 
    print()

File: release-notes/VERSION
Contributors: 
Cowtowncoder
Tatu
Tatu Saloranta
jackphel

File: src/main/java/com/fasterxml/jackson/databind/deser/BeanDeserializerFactory.java
Contributors: 
Cowtowncoder
LokeshN
Pascal Gélinas
Tatu
Tatu Saloranta

File: src/test/java/com/fasterxml/jackson/databind/interop/IllegalTypesCheckTest.java
Contributors: 
Tatu Saloranta



### k) For each developer identified, how many commits have each of them submitted? From your observation, are the involving developers experienced or new ones or both?

In [58]:
all_developers = repo.git.shortlog("-sne", "--all").splitlines()

author_commits = []
identifiedDev = []
for file in stats.files:   
    #print("File: "+file) 
    log_data = repo.git.log(fixing_commit,"--pretty=format:%an", "--follow", "--",file).splitlines()
    x = np.array(log_data) 
    uniqueNames = np.unique(x)
    #print("Contributors: ")
    for name in uniqueNames:
        identifiedDev.append(name) 
dev = list(set(identifiedDev))
print("commitTimes"+"\t\t"+"author")
for developer in all_developers:
    #print(developer)
    commitTimes, author = developer.split("\t")
    for tem in dev:
        if tem in author:
            print(commitTimes+"\t\t"+author)


commitTimes		author
  3600		Tatu Saloranta <tatu.saloranta@iki.fi>
  3600		Tatu Saloranta <tatu.saloranta@iki.fi>
  1028		Tatu Saloranta <tsaloranta@gmail.com>
  1028		Tatu Saloranta <tsaloranta@gmail.com>
   454		Cowtowncoder <tatu.saloranta@iki.fi>
    78		Tatu <tatu@ning.com>
    71		Tatu <tsaloranta@gmail.com>
    52		Tatu Saloranta <tsaloranta@salesforce.com>
    52		Tatu Saloranta <tsaloranta@salesforce.com>
    28		jackphel <jackphel@gmail.com>
    21		Pascal Gélinas <pascal.gelinas@nuecho.com>
     2		LokeshN <nlokesh.17@gmail.com>
     1		Tatu Saloranta <tatu@Tatus-Mac-mini.local>
     1		Tatu Saloranta <tatu@Tatus-Mac-mini.local>
     1		LokeshN <ln@vmware.com>


## 2. apache/cxf
CVE-ID - CVE-2012-1621<br/>
Project Link:https://github.com/apache/cxf<br/>
Commit:9deb2d17

### a) What was the message and title of the fixing commit? 

In [62]:
import pprint as pprint
remote_link = "https://github.com/apache/cxf"
local_link = "cxf"
fixing_commit = "9deb2d17"

repo = Repo(local_link)
show_data = repo.git.show("--shortstat", fixing_commit).splitlines()
print("Commit Title:"+'\x1b[0m'+show_data[4])
print("Commit Message:"+'\x1b[0m'+show_data[5])

Commit Title:[0m    Letting CXF StaxUtils prepare XMLStreamReader for Atom reads
Commit Message:[0m


### b) How many total files were affected in the fixing commit?

In [64]:
commit = repo.commit(fixing_commit)
stats = commit.stats

print("Affected Files:"+ str(stats.total["files"]))

Affected Files:2


### c) How many total directories were affected in the fixing commit? 

In [65]:
totalDirAffected = repo.git.show("--dirstat", fixing_commit).splitlines()
affectedDir = []
for info in totalDirAffected:
    if "% " in info:
        affectedDir.append(info[info.find("% ")+2:])
print(len(affectedDir),"Directories affected: ")
for pline in affectedDir:
    print(pline)

2 Directories affected: 
rt/rs/extensions/providers/src/main/java/org/apache/cxf/jaxrs/provider/atom/
rt/rs/extensions/providers/src/test/java/org/apache/cxf/jaxrs/provider/atom/


### d) How many total lines of code(including comments and blank lines)  were deleted? 

In [78]:
diffData = repo.git.show("--stat", fixing_commit).splitlines()
#for line in diffData:
#    print(line)
print("Deleted lines(including comment and blank lines):"+diffData[8][35:37])


Deleted lines(including comment and blank lines): 2


### e) How many total lines of code(including comments and blank lines)  were added? 

In [86]:
diffData = repo.git.show("--stat", fixing_commit).splitlines()
#for line in diffData:
#    print(line)
print("Inserted lines(including comment and blank lines):"+diffData[8][17:20])


Inserted lines(including comment and blank lines): 41


### f) How many total lines of code(excluding comments and blank lines)  were deleted? 

In [87]:
diffData = repo.git.diff("-w",fixing_commit + "^", fixing_commit).splitlines()
comment = False
count = 0
for line in diffData:
    if(not line[0:3] == '---'):
        if(line.find('/*') != -1):
            comment = True
            continue
        elif(line.find('*/') != -1):
            comment = False
            continue
        if(comment==False):
            if(line[0:1] == '-' and not re.search(r'\s', line) is None):
                if(line.find('//') == -1):
                    count=count+1
print("Deleted lines(excluding comments and blank lines):"+ str(count))

Deleted lines(excluding comments and blank lines):2


### g) How many total lines of code(excluding comments and blank lines)  were added?

In [88]:
diffData = repo.git.diff("-w",fixing_commit + "^", fixing_commit).splitlines()
comment = False
count = 0
for line in diffData:
    if(not line[0:3] == '+++'):
        if(line.find('/*') != -1):
            comment = True
            continue
        elif(line.find('*/') != -1):
            comment = False
            continue
        if(comment==False):
            if(line[0:1] == '+' and not re.search(r'\s', line) is None):
                if(line.find('//') == -1):
                    count=count+1
print("Inserted lines(excluding comments and blank lines):"+ str(count))

Inserted lines(excluding comments and blank lines):39


### h)  How many days were between the current fixing commit and the previous commit of each affected file? 

In [93]:
from datetime import datetime
from datetime import date
for file in stats.files:   
    print("File: "+file)
    log_data = repo.git.log(fixing_commit,file).splitlines()
    commitID = 0
    date1 = datetime.now()
    date2 = datetime.now()
    for line in log_data:
        if(line[0:8] == "Date:   "):
            commitID=commitID+1
            if(commitID ==1):
                date1 = datetime.strptime(line[8:38], '%a %b %d %H:%M:%S %Y %z')
                print("Current Commit Time: "+str(date1))
                continue
            if(commitID ==2):
                date2 = datetime.strptime(line[8:38], '%a %b %d %H:%M:%S %Y %z')
                print("Previous Commit Time: "+str(date2))
                break
    if(commitID == 1):
        print("This file only has one commit.")
        print("----------------------")
    else:
        print("Time Difference: "+str(date1-date2))
        print("----------------------")

File: rt/rs/extensions/providers/src/main/java/org/apache/cxf/jaxrs/provider/atom/AbstractAtomProvider.java
Current Commit Time: 2016-11-23 14:27:36+00:00
Previous Commit Time: 2016-11-22 17:22:36+00:00
Time Difference: 21:05:00
----------------------
File: rt/rs/extensions/providers/src/test/java/org/apache/cxf/jaxrs/provider/atom/AtomPojoProviderTest.java
Current Commit Time: 2016-11-23 14:27:36+00:00
Previous Commit Time: 2015-11-10 17:19:19+00:00
Time Difference: 378 days, 21:08:17
----------------------


### i)  How many time has each affected file of the current fixing commit been modified in the past since their creation?  

In [95]:
for file in stats.files:   
    print("File: "+file)
    log_data = repo.git.log(fixing_commit,"--pretty=\"format:%H%M%S\"", "--",file).splitlines()
    print("Times of modification(including renaming): "+str(len(log_data)))
    print("-------------------------------")

File: rt/rs/extensions/providers/src/main/java/org/apache/cxf/jaxrs/provider/atom/AbstractAtomProvider.java
Times of modification(including renaming): 8
-------------------------------
File: rt/rs/extensions/providers/src/test/java/org/apache/cxf/jaxrs/provider/atom/AtomPojoProviderTest.java
Times of modification(including renaming): 6
-------------------------------


### j)  Which developers have modified each affected file since its creation? 

In [97]:
for file in stats.files:   
    print("File: "+file) 
    log_data = repo.git.log(fixing_commit,"--pretty=format:%an", "--follow", "--",file).splitlines()
    x = np.array(log_data) 
    uniqueNames = np.unique(x)
    print("Contributors: ")
    for name in uniqueNames:
        print(name) 
    print()

File: rt/rs/extensions/providers/src/main/java/org/apache/cxf/jaxrs/provider/atom/AbstractAtomProvider.java
Contributors: 
J. Daniel Kulp
Sergey Beryozkin

File: rt/rs/extensions/providers/src/test/java/org/apache/cxf/jaxrs/provider/atom/AtomPojoProviderTest.java
Contributors: 
Colm O hEigeartaigh
J. Daniel Kulp
Sergey Beryozkin



### k) For each developer identified, how many commits have each of them submitted? From your observation, are the involving developers experienced or new ones or both?

In [99]:
all_developers = repo.git.shortlog("-sne", "--all").splitlines()

author_commits = []
identifiedDev = []
for file in stats.files:   
    #print("File: "+file) 
    log_data = repo.git.log(fixing_commit,"--pretty=format:%an", "--follow", "--",file).splitlines()
    x = np.array(log_data) 
    uniqueNames = np.unique(x)
    #print("Contributors: ")
    for name in uniqueNames:
        identifiedDev.append(name) 
dev = list(set(identifiedDev))
print("commitTimes"+"\t\t"+"author")
for developer in all_developers:
    #print(developer)
    commitTimes, author = developer.split("\t")
    for tem in dev:
        if tem in author:
            print(commitTimes+"\t\t"+author)


commitTimes		author
  5173		J. Daniel Kulp <dkulp@apache.org>
  4524		Colm O hEigeartaigh <coheigea@apache.org>
  3361		Sergey Beryozkin <sergeyb@apache.org>
  2544		Sergey Beryozkin <sberyozkin@gmail.com>
  1708		Sergey Beryozkin <sberyozkin@talend.com>
    41		Colm O hEigeartaigh <coheigea@users.noreply.github.com>
