-
Notifications
You must be signed in to change notification settings - Fork 0
/
analyzePRs.py
192 lines (157 loc) · 8.24 KB
/
analyzePRs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
'''
Created on Apr 25, 2019
@author: peipei
'''
import api
import pickle
from pprint import pprint
import json
import csv
from datetime import datetime
from itertools import chain
import glob
from github import Github
import re
def getUniquePRsInOrg(dir,org="apache"):
dict_file2url,dict_url2info=api.getUniqueIssues(dir,True)
print("unique PRs in org {} is {}".format(org,len(dict_url2info)))
pickle.dump(dict_file2url,open(dir+"/dict_file2url","wb"))
pickle.dump(dict_url2info,open(dir+"/dict_url2info","wb"))
return dict_file2url,dict_url2info
def getUniqueReposInOrg(dict_url2info,dir,org="apache"):
dict_repo2files=dict()
for issue_info in dict_url2info.values():
file=issue_info['file']
with open(file) as json_file:
data = json.load(json_file)
repo_url=data['repository']['url']
if repo_url not in dict_repo2files:
dict_repo2files[repo_url]=[]
# else:
# print("repo_url:{} existing files:{},{}".format(repo_url,file,dict_repo2files[repo_url]))
dict_repo2files[repo_url].append(file)
print("unique PRs in org {} are from {} repos".format(org,len(dict_repo2files)))
pickle.dump(dict_repo2files,open(dir+"/dict_repo2files","wb"))
dict_repo2info={repo_url:api.buildProjInfo(files) for repo_url,files in dict_repo2files.items()}
pickle.dump(dict_repo2info,open(dir+"/dict_repo2info","wb"))
return dict_repo2info
def getUniquePRsInOrgs(ws="/home/peipei/GitHubIssues/Orgs/"):
for org in ["apache","mozilla","google","facebook"]:
# prs,repos=[],[]
dict_file2url,dict_url2info=getUniquePRsInOrg(ws+org,org)
dict_repo2info=getUniqueReposInOrg(dict_url2info,ws+org,org)
prs_urls,repo_urls=dict_url2info.keys(),dict_repo2info.keys()
# prs.extend(prs)
# repos.extend(repo_urls)
print("total unique issue-{} unique repo-{}".format(len(set(prs_urls)),len(set(repo_urls))))
def getPRInfoPerLangPerRepo(url2info_file,repo2info_file,file_csv,dir="/home/peipei/GitHubIssues/Orgs/apache/"):
#file_data: the name of the dict_rep2info file
res=[]
with open(dir+url2info_file,"rb") as pickle_file:
dict_url2info = pickle.load(pickle_file)
with open(dir+repo2info_file,"rb") as pickle_file:
dict_repo2info = pickle.load(pickle_file)
for pr_url, pr_info in dict_url2info.items():
repo_url=pr_info['repo_url']
primaryLang=dict_repo2info[repo_url]['primaryLanguage']
res.append({"repo_url":repo_url,"primeLang":primaryLang,"pr_url":pr_url,"merged":pr_info['merged']})
pr_info['lang']=primaryLang
with open(dir+file_csv, 'w', encoding='utf8', newline='') as output_file:
dict_writer = csv.DictWriter(output_file,fieldnames=res[0].keys())
dict_writer.writeheader()
dict_writer.writerows(res)
pickle.dump(dict_url2info,open(dir+"dict_url2info","wb"))
def getDuplicatedPRs(url2info_file,dir="/home/peipei/GitHubIssues/Orgs/apache/"):
res=dict()
from github import Github
github=Github("870589d0ca53859c2e4e54f71b1bdce3af5e609f")
with open(dir+url2info_file,"rb") as pickle_file:
dict_url2info = pickle.load(pickle_file)
for pr_url, pr_info in dict_url2info.items():
repo_url=pr_info['repo_url']
if repo_url not in res:
res[repo_url]=dict()
id,user,title,merged=api.getPRInfo(github, pr_url)
if title not in res[repo_url]:
res[repo_url][title]=[(id,pr_url,merged)]
else:
res[repo_url][title].append((id,pr_url,merged))
for repo_url, titleInfos in res.items():
count1,count2=0,0
for title, prInfos in titleInfos.items():
if len(prInfos)>1:
count1+=1
count2+=(len(prInfos)-1)
print("in repo {} {} pull requests have the same title, they are: ".format(repo_url,len(prInfos),title))
print(sorted(prInfos,key=lambda prInfo: prInfo[0]))
print("--------------------------------")
if count1>0:
print("in repo {} total same-titled pull requests: {} groups in {} prs".format(repo_url,count1,count2))
re_version=re.compile("to\s+(version)?\s*v?\d+\.\d+\.\d+")
def getDependencyPRs(url2info_file,dir="/home/peipei/GitHubIssues/Orgs/apache/"):
definite_userCnt={"pyup-bot":0,"greenkeeperio-bot":0,"renovate[bot]":0,"dependabot-preview[bot]":0}
definite_res=list()
indefinite_res=list()
indefinite_res2=list()
from github import Github
github=Github("870589d0ca53859c2e4e54f71b1bdce3af5e609f")
#test the user login name
# print(api.getPRInfo(github, "https://github.com/mozilla/experiments-viewer/pull/381"))
# print(api.getPRInfo(github, "https://github.com/mozilla/payments-client/pull/89"))
# print(api.getPRInfo(github, "https://github.com/mozilla/foundation.mozilla.org/pull/1935"))
# print(api.getPRInfo(github, "https://github.com/mozilla/delivery-console/pull/124"))
with open(dir+url2info_file,"rb") as pickle_file:
dict_url2info = pickle.load(pickle_file)
print("total pr:{}".format(len(dict_url2info)))
for pr_url, pr_info in dict_url2info.items():
repo_url=pr_info['repo_url']
id,user,title,merged=api.getPRInfo(github, pr_url)
if user in definite_userCnt:
definite_userCnt[user]+=1
definite_res.append((pr_url,user,title,merged))
elif any([keyword in title for keyword in ['dependency','package']]):
indefinite_res.append((pr_url,user,title,merged))
elif re_version.search(title) is not None:
indefinite_res2.append((pr_url,user,title,merged))
if definite_res:
print("Definite dependency PRs")
print(definite_userCnt)
for pr_url,user,title,merged in definite_res:
print("{},'{}','{}'".format(pr_url,user,title,merged))
if indefinite_res:
print("Indefinite dependency PRs by keywords")
for pr_url,user,title,merged in indefinite_res:
print("{},'{}','{}'".format(pr_url,user,title,merged))
if indefinite_res2:
print("Indefinite dependency PRs by version")
for pr_url,user,title,merged in indefinite_res2:
print("{},'{}','{}'".format(pr_url,user,title,merged))
def getPRLinkedIssueInOrg(url2info_file,file_csv,dir="/home/peipei/GitHubIssues/Orgs/apache/"):
with open(dir+url2info_file,"rb") as pickle_file:
dict_url2info = pickle.load(pickle_file)
for pr_url, pr_info in dict_url2info.items():
linkedIssues=api.isPRLinked2Issue2(pr_url)
if linkedIssues==[]:
pr_info['fix']=False
else:
pr_info['fix']=True
pr_info['issue_urls']=linkedIssues
pickle.dump(dict_url2info,open(dir+"dict_url2info","wb"))
query_issue_urls=[issue_url for issue_url in issue_urls if issue_url in infos]
print("in lang {} {} pull requests are linked with {} issues. Among them {} issues are also going to analyzed ".format(lang,len(dict_pr2issues),len(issue_urls),len(query_issue_urls)))
# with open("/home/peipei/GitHubIssues/"+lang+file_csv, 'w', encoding='utf8', newline='') as output_file:
# dict_writer = csv.DictWriter(output_file,fieldnames=infos[0].keys())
# dict_writer.writeheader()
# dict_writer.writerows(infos)
if __name__ == '__main__':
# getUniquePRsInOrgs()
# getPRInfoPerLangPerRepo("dict_url2info","dict_repo2info","repo_lang_file.csv","/home/peipei/GitHubIssues/Orgs/facebook/")
# getDuplicatedPRs("dict_url2info","/home/peipei/GitHubIssues/Orgs/google/")
# getDuplicatedPRs("dict_url2info","/home/peipei/GitHubIssues/Orgs/facebook/")
# getDuplicatedPRs("dict_url2info","/home/peipei/GitHubIssues/Orgs/mozilla")
# getDuplicatedPRs("dict_url2info","/home/peipei/GitHubIssues/Orgs/apache/")
# getDependencyPRs("dict_url2info","/home/peipei/GitHubIssues/Orgs/google/")
getDependencyPRs("dict_url2info","/home/peipei/GitHubIssues/Orgs/facebook/")
# getDependencyPRs("dict_url2info","/home/peipei/GitHubIssues/Orgs/mozilla")
# getDependencyPRs("dict_url2info","/home/peipei/GitHubIssues/Orgs/apache/")
pass