# Task2 Crawl Pull Requests

## Tools used
For this task, I will use requests to get the html content of the pull request page, and extract the information using beautifulsoup and regular expression.

In [30]:
import requests
import re

In [11]:
from bs4 import BeautifulSoup as bs

## Feature extracting functions

In [4]:
def get_title(soup):
    try:
        return str(soup.find('span',{'class': lambda x: x and 'js-issue-title' in x}).text).replace('\n','')
    except:
        return None

In [2]:
def get_labels(soup):
    labels = []
    for i in soup.find_all('div',{'class': lambda x: x and "js-issue-labels" in x}):
        for j in i.find_all('a'):
            labels.append(j.text)
    return labels

In [3]:
def get_reviewers_assignees(soup):
    reviewers = []
    assignees = []
    for i in soup.find_all('form'):
        if i.has_attr('aria-label'):
            if i['aria-label']=='Select reviewers':
                for j in i.find_all('span'):
                    if j.has_attr('data-assignee-name'):
                        reviewers.append(j['data-assignee-name'])
            if i['aria-label']=='Select assignees':
                for j in i.find_all('span'):
                    if j.has_attr('data-assignee-name'):
                        assignees.append(j['data-assignee-name'])
    return reviewers,assignees

Because the minimized comments are using javascript, only the unminimized comments are extracted here.

Extracting minimized comments is possible if we use webdriver, but it will use too much time and computing resource.

In [58]:
def get_unminimized_comments(soup):
    comments = []
    try:
        for i in soup.find_all(None,{'class': lambda x: x and "unminimized-comment" in x}):
            author = i.find(None,{'class': lambda x:x and 'author' in x}).text
            comment = ''
            for j in i.find_all('p'):
                comment+=j.text+' '
            comments.append(author+": "+comment)
        return comments
    except:
        return []

In [8]:
def get_opened_time(soup):
    try:
        for i in soup.find_all(None,{'class': lambda x: x and 'js-discussion ' in x}):
            return i.find('relative-time')["datetime"]
    except:
        return None

In [9]:
def get_end_time(soup):
    try:
        return re.search('closed this.*?datetime="(.*?)".*?</relative-time>',str(soup),re.DOTALL).group(1)
    except:
        return None

In [46]:
def get_n_commits(soup):
    try:
        return int(soup.find(None,{'id': 'commits_tab_counter'}).text)
    except:
        return 0

In [47]:
def get_n_changed_files(soup):
    try:
        return int(soup.find(None,{'id': 'files_tab_counter'}).text)
    except:
        return 0

In [48]:
def get_n_participants(soup):
    try:
        return int(re.search('\d+',soup.find(None,{'class':'participation'}).div.text).group())
    except:
        return 0

## Crawling

In [59]:
import pandas as pd
import numpy as np
import multiprocessing
thelock = multiprocessing.Lock()



def crawl_pull(x):
    global data
    print(str(x)+ ' started\n',end='')
    a = requests.get("https://github.com/google/guava/pull/"+str(x))
    i = 0
    while not(a.status_code in [200]):
        if i>10:
            return
        i+=1
        print("retry {}\n".format(x),end='')
        a = requests.get("https://github.com/google/guava/pull/"+str(x))
    soup = bs(a.text,'html.parser')
    myjson = {}
    myjson['id'] = x
    myjson['title'] = get_title(soup)
    myjson['reviewers'],myjson['assignees'] = get_reviewers_assignees(soup)
    myjson['labels'] = get_labels(soup)
    myjson['comments'] = get_unminimized_comments(soup)
    myjson['opened_time'] = get_opened_time(soup)
    myjson['closed_time'] = get_end_time(soup)
    myjson['n_commits'] = get_n_commits(soup)
    myjson['n_changed_files'] = get_n_changed_files(soup)
    myjson['n_participants'] = get_n_participants(soup)
    thelock.acquire()
    data = data.append(pd.DataFrame([myjson]))
    thelock.release()
    print(str(x)+" done\n",end='')

In [49]:
data = pd.DataFrame(columns = ['id','title','labels','reviewers','assignees','comments',
                               'opened_time','closed_time','n_commits','n_changed_files','n_participants'])

I wanted to use multiprocessing to crawl the requests, but my VPN doesn't allow me to do so.

Here I will use a for loop to crawl the requests.

In [51]:
for i in range(1,5432):
    crawl_pull(i)

1 started
1 done
2 started
2 done
3 started
3 done
4 started
4 done
5 started
5 done
6 started
6 done
7 started
7 done
8 started
8 done
9 started
9 done
10 started
10 done
11 started
11 done
12 started
12 done
13 started
13 done
14 started
14 done
15 started
15 done
16 started
16 done
17 started
17 done
18 started
18 done
19 started
19 done
20 started
20 done
21 started
21 done
22 started
22 done
23 started
23 done
24 started
24 done
25 started
25 done
26 started
26 done
27 started
27 done
28 started
28 done
29 started
29 done
30 started
30 done
31 started
31 done
32 started
32 done
33 started
33 done
34 started
34 done
35 started
35 done
36 started
36 done
37 started
37 done
38 started
38 done
39 started
39 done
40 started
40 done
41 started
41 done
42 started
42 done
43 started
43 done
44 started
44 done
45 started
45 done
46 started
46 done
47 started
47 done
48 started
48 done
49 started
49 done
50 started
50 done
51 started
51 done
52 started
52 done
53 started
53 done
54 started


401 done
402 started
402 done
403 started
403 done
404 started
404 done
405 started
405 done
406 started
406 done
407 started
407 done
408 started
408 done
409 started
409 done
410 started
410 done
411 started
411 done
412 started
412 done
413 started
413 done
414 started
414 done
415 started
415 done
416 started
416 done
417 started
417 done
418 started
418 done
419 started
419 done
420 started
420 done
421 started
421 done
422 started
422 done
423 started
423 done
424 started
424 done
425 started
425 done
426 started
426 done
427 started
427 done
428 started
428 done
429 started
429 done
430 started
430 done
431 started
431 done
432 started
432 done
433 started
433 done
434 started
434 done
435 started
435 done
436 started
436 done
437 started
437 done
438 started
438 done
439 started
439 done
440 started
440 done
441 started
441 done
442 started
442 done
443 started
443 done
444 started
444 done
445 started
445 done
446 started
446 done
447 started
447 done
448 started
448 done
449 

792 done
793 started
793 done
794 started
794 done
795 started
795 done
796 started
796 done
797 started
797 done
798 started
798 done
799 started
799 done
800 started
800 done
801 started
801 done
802 started
802 done
803 started
803 done
804 started
804 done
805 started
805 done
806 started
806 done
807 started
807 done
808 started
808 done
809 started
809 done
810 started
810 done
811 started
811 done
812 started
812 done
813 started
813 done
814 started
814 done
815 started
815 done
816 started
816 done
817 started
817 done
818 started
818 done
819 started
819 done
820 started
820 done
821 started
821 done
822 started
822 done
823 started
823 done
824 started
824 done
825 started
825 done
826 started
826 done
827 started
827 done
828 started
828 done
829 started
829 done
830 started
830 done
831 started
831 done
832 started
832 done
833 started
833 done
834 started
834 done
835 started
835 done
836 started
836 done
837 started
837 done
838 started
838 done
839 started
839 done
840 

1167 done
1168 started
1168 done
1169 started
1169 done
1170 started
1170 done
1171 started
1171 done
1172 started
1172 done
1173 started
1173 done
1174 started
1174 done
1175 started
1175 done
1176 started
1176 done
1177 started
1177 done
1178 started
1178 done
1179 started
1179 done
1180 started
1180 done
1181 started
1181 done
1182 started
1182 done
1183 started
1183 done
1184 started
1184 done
1185 started
1185 done
1186 started
1186 done
1187 started
1187 done
1188 started
1188 done
1189 started
1189 done
1190 started
1190 done
1191 started
1191 done
1192 started
1192 done
1193 started
1193 done
1194 started
1194 done
1195 started
1195 done
1196 started
1196 done
1197 started
1197 done
1198 started
1198 done
1199 started
1199 done
1200 started
1200 done
1201 started
1201 done
1202 started
1202 done
1203 started
1203 done
1204 started
1204 done
1205 started
1205 done
1206 started
1206 done
1207 started
1207 done
1208 started
1208 done
1209 started
1209 done
1210 started
1210 done
1

1524 done
1525 started
1525 done
1526 started
1526 done
1527 started
1527 done
1528 started
1528 done
1529 started
1529 done
1530 started
1530 done
1531 started
1531 done
1532 started
1532 done
1533 started
1533 done
1534 started
1534 done
1535 started
1535 done
1536 started
1536 done
1537 started
1537 done
1538 started
1538 done
1539 started
1539 done
1540 started
1540 done
1541 started
1541 done
1542 started
1542 done
1543 started
1543 done
1544 started
1544 done
1545 started
1545 done
1546 started
1546 done
1547 started
1547 done
1548 started
1548 done
1549 started
1549 done
1550 started
1550 done
1551 started
1551 done
1552 started
1552 done
1553 started
1553 done
1554 started
1554 done
1555 started
1555 done
1556 started
1556 done
1557 started
1557 done
1558 started
1558 done
1559 started
1559 done
1560 started
1560 done
1561 started
1561 done
1562 started
1562 done
1563 started
1563 done
1564 started
1564 done
1565 started
1565 done
1566 started
1566 done
1567 started
1567 done
1

1881 done
1882 started
1882 done
1883 started
1883 done
1884 started
1884 done
1885 started
1885 done
1886 started
1886 done
1887 started
1887 done
1888 started
1888 done
1889 started
1889 done
1890 started
1890 done
1891 started
1891 done
1892 started
1892 done
1893 started
1893 done
1894 started
1894 done
1895 started
1895 done
1896 started
1896 done
1897 started
1897 done
1898 started
1898 done
1899 started
1899 done
1900 started
1900 done
1901 started
1901 done
1902 started
1902 done
1903 started
1903 done
1904 started
1904 done
1905 started
1905 done
1906 started
1906 done
1907 started
1907 done
1908 started
1908 done
1909 started
1909 done
1910 started
1910 done
1911 started
1911 done
1912 started
1912 done
1913 started
1913 done
1914 started
1914 done
1915 started
1915 done
1916 started
1916 done
1917 started
1917 done
1918 started
1918 done
1919 started
1919 done
1920 started
1920 done
1921 started
1921 done
1922 started
1922 done
1923 started
1923 done
1924 started
1924 done
1

2238 done
2239 started
2239 done
2240 started
2240 done
2241 started
2241 done
2242 started
2242 done
2243 started
2243 done
2244 started
2244 done
2245 started
2245 done
2246 started
2246 done
2247 started
2247 done
2248 started
2248 done
2249 started
retry 2249
retry 2249
retry 2249
retry 2249
retry 2249
retry 2249
retry 2249
retry 2249
retry 2249
retry 2249
retry 2249
2250 started
retry 2250
retry 2250
retry 2250
retry 2250
retry 2250
retry 2250
retry 2250
retry 2250
retry 2250
retry 2250
retry 2250
2251 started
2251 done
2252 started
2252 done
2253 started
2253 done
2254 started
2254 done
2255 started
2255 done
2256 started
2256 done
2257 started
2257 done
2258 started
2258 done
2259 started
2259 done
2260 started
retry 2260
retry 2260
retry 2260
retry 2260
retry 2260
retry 2260
retry 2260
retry 2260
retry 2260
retry 2260
retry 2260
2261 started
retry 2261
retry 2261
retry 2261
retry 2261
retry 2261
retry 2261
retry 2261
retry 2261
retry 2261
retry 2261
retry 2261
2262 started
retr

retry 2317
retry 2317
retry 2317
retry 2317
retry 2317
retry 2317
2318 started
retry 2318
retry 2318
retry 2318
retry 2318
retry 2318
retry 2318
retry 2318
retry 2318
retry 2318
retry 2318
retry 2318
2319 started
retry 2319
retry 2319
retry 2319
retry 2319
retry 2319
retry 2319
retry 2319
retry 2319
retry 2319
retry 2319
retry 2319
2320 started
retry 2320
retry 2320
retry 2320
retry 2320
retry 2320
retry 2320
retry 2320
retry 2320
retry 2320
retry 2320
retry 2320
2321 started
retry 2321
retry 2321
retry 2321
retry 2321
retry 2321
retry 2321
retry 2321
retry 2321
retry 2321
retry 2321
retry 2321
2322 started
retry 2322
retry 2322
retry 2322
retry 2322
retry 2322
retry 2322
retry 2322
retry 2322
retry 2322
retry 2322
retry 2322
2323 started
retry 2323
retry 2323
retry 2323
retry 2323
retry 2323
retry 2323
retry 2323
retry 2323
retry 2323
retry 2323
retry 2323
2324 started
retry 2324
retry 2324
retry 2324
retry 2324
retry 2324
retry 2324
retry 2324
retry 2324
retry 2324
retry 2324
retry 2

2551 done
2552 started
2552 done
2553 started
2553 done
2554 started
2554 done
2555 started
2555 done
2556 started
2556 done
2557 started
2557 done
2558 started
2558 done
2559 started
2559 done
2560 started
retry 2560
retry 2560
retry 2560
retry 2560
retry 2560
retry 2560
retry 2560
retry 2560
retry 2560
retry 2560
retry 2560
2561 started
retry 2561
retry 2561
retry 2561
retry 2561
retry 2561
retry 2561
retry 2561
retry 2561
retry 2561
retry 2561
retry 2561
2562 started
2562 done
2563 started
2563 done
2564 started
2564 done
2565 started
2565 done
2566 started
2566 done
2567 started
2567 done
2568 started
2568 done
2569 started
2569 done
2570 started
2570 done
2571 started
2571 done
2572 started
2572 done
2573 started
2573 done
2574 started
2574 done
2575 started
2575 done
2576 started
2576 done
2577 started
2577 done
2578 started
2578 done
2579 started
2579 done
2580 started
2580 done
2581 started
2581 done
2582 started
2582 done
2583 started
2583 done
2584 started
2584 done
2585 star

2898 done
2899 started
2899 done
2900 started
2900 done
2901 started
2901 done
2902 started
2902 done
2903 started
2903 done
2904 started
2904 done
2905 started
2905 done
2906 started
2906 done
2907 started
2907 done
2908 started
2908 done
2909 started
2909 done
2910 started
2910 done
2911 started
2911 done
2912 started
2912 done
2913 started
2913 done
2914 started
2914 done
2915 started
2915 done
2916 started
2916 done
2917 started
2917 done
2918 started
2918 done
2919 started
2919 done
2920 started
2920 done
2921 started
2921 done
2922 started
2922 done
2923 started
2923 done
2924 started
2924 done
2925 started
2925 done
2926 started
2926 done
2927 started
2927 done
2928 started
2928 done
2929 started
2929 done
2930 started
2930 done
2931 started
2931 done
2932 started
2932 done
2933 started
2933 done
2934 started
2934 done
2935 started
retry 2935
retry 2935
retry 2935
retry 2935
retry 2935
retry 2935
retry 2935
retry 2935
retry 2935
retry 2935
retry 2935
2936 started
2936 done
2937 

3245 done
3246 started
3246 done
3247 started
3247 done
3248 started
3248 done
3249 started
3249 done
3250 started
3250 done
3251 started
3251 done
3252 started
3252 done
3253 started
3253 done
3254 started
3254 done
3255 started
3255 done
3256 started
3256 done
3257 started
3257 done
3258 started
3258 done
3259 started
3259 done
3260 started
3260 done
3261 started
3261 done
3262 started
3262 done
3263 started
3263 done
3264 started
3264 done
3265 started
3265 done
3266 started
3266 done
3267 started
3267 done
3268 started
3268 done
3269 started
3269 done
3270 started
3270 done
3271 started
3271 done
3272 started
3272 done
3273 started
3273 done
3274 started
3274 done
3275 started
3275 done
3276 started
3276 done
3277 started
3277 done
3278 started
3278 done
3279 started
3279 done
3280 started
3280 done
3281 started
3281 done
3282 started
3282 done
3283 started
3283 done
3284 started
3284 done
3285 started
3285 done
3286 started
3286 done
3287 started
3287 done
3288 started
3288 done
3

3597 done
3598 started
3598 done
3599 started
3599 done
3600 started
3600 done
3601 started
3601 done
3602 started
3602 done
3603 started
3603 done
3604 started
3604 done
3605 started
3605 done
3606 started
3606 done
3607 started
3607 done
3608 started
3608 done
3609 started
3609 done
3610 started
3610 done
3611 started
3611 done
3612 started
3612 done
3613 started
3613 done
3614 started
3614 done
3615 started
3615 done
3616 started
3616 done
3617 started
3617 done
3618 started
3618 done
3619 started
3619 done
3620 started
3620 done
3621 started
3621 done
3622 started
3622 done
3623 started
3623 done
3624 started
3624 done
3625 started
3625 done
3626 started
3626 done
3627 started
3627 done
3628 started
3628 done
3629 started
3629 done
3630 started
3630 done
3631 started
3631 done
3632 started
3632 done
3633 started
3633 done
3634 started
3634 done
3635 started
3635 done
3636 started
3636 done
3637 started
3637 done
3638 started
3638 done
3639 started
3639 done
3640 started
3640 done
3

3925 done
3926 started
3926 done
3927 started
3927 done
3928 started
3928 done
3929 started
3929 done
3930 started
3930 done
3931 started
3931 done
3932 started
3932 done
3933 started
3933 done
3934 started
3934 done
3935 started
3935 done
3936 started
3936 done
3937 started
3937 done
3938 started
3938 done
3939 started
3939 done
3940 started
3940 done
3941 started
3941 done
3942 started
3942 done
3943 started
3943 done
3944 started
retry 3944
retry 3944
retry 3944
retry 3944
retry 3944
retry 3944
retry 3944
retry 3944
retry 3944
retry 3944
retry 3944
3945 started
3945 done
3946 started
3946 done
3947 started
3947 done
3948 started
3948 done
3949 started
3949 done
3950 started
3950 done
3951 started
3951 done
3952 started
3952 done
3953 started
3953 done
3954 started
3954 done
3955 started
3955 done
3956 started
3956 done
3957 started
3957 done
3958 started
3958 done
3959 started
3959 done
3960 started
3960 done
3961 started
3961 done
3962 started
3962 done
3963 started
3963 done
3964 

retry 4078
retry 4078
retry 4078
retry 4078
retry 4078
retry 4078
retry 4078
retry 4078
retry 4078
4079 started
retry 4079
retry 4079
retry 4079
retry 4079
retry 4079
retry 4079
retry 4079
retry 4079
retry 4079
retry 4079
retry 4079
4080 started
retry 4080
retry 4080
retry 4080
retry 4080
retry 4080
retry 4080
retry 4080
retry 4080
retry 4080
retry 4080
retry 4080
4081 started
retry 4081
retry 4081
retry 4081
retry 4081
retry 4081
retry 4081
retry 4081
retry 4081
retry 4081
retry 4081
retry 4081
4082 started
retry 4082
retry 4082
retry 4082
retry 4082
retry 4082
retry 4082
retry 4082
retry 4082
retry 4082
retry 4082
retry 4082
4083 started
retry 4083
retry 4083
retry 4083
retry 4083
retry 4083
retry 4083
retry 4083
retry 4083
retry 4083
retry 4083
retry 4083
4084 started
retry 4084
retry 4084
retry 4084
retry 4084
retry 4084
retry 4084
retry 4084
retry 4084
retry 4084
retry 4084
retry 4084
4085 started
retry 4085
retry 4085
retry 4085
retry 4085
retry 4085
retry 4085
retry 4085
retry 4

retry 4139
retry 4139
retry 4139
retry 4139
retry 4139
retry 4139
retry 4139
4140 started
retry 4140
retry 4140
retry 4140
retry 4140
retry 4140
retry 4140
retry 4140
retry 4140
retry 4140
retry 4140
retry 4140
4141 started
retry 4141
retry 4141
retry 4141
retry 4141
retry 4141
retry 4141
retry 4141
retry 4141
retry 4141
retry 4141
retry 4141
4142 started
retry 4142
retry 4142
retry 4142
retry 4142
retry 4142
retry 4142
retry 4142
retry 4142
retry 4142
retry 4142
retry 4142
4143 started
retry 4143
retry 4143
retry 4143
retry 4143
retry 4143
retry 4143
retry 4143
retry 4143
retry 4143
retry 4143
retry 4143
4144 started
retry 4144
retry 4144
retry 4144
retry 4144
retry 4144
retry 4144
retry 4144
retry 4144
retry 4144
retry 4144
retry 4144
4145 started
retry 4145
retry 4145
retry 4145
retry 4145
retry 4145
retry 4145
retry 4145
retry 4145
retry 4145
retry 4145
retry 4145
4146 started
retry 4146
retry 4146
retry 4146
retry 4146
retry 4146
retry 4146
retry 4146
retry 4146
retry 4146
retry 4

retry 4200
retry 4200
retry 4200
retry 4200
retry 4200
4201 started
retry 4201
retry 4201
retry 4201
retry 4201
retry 4201
retry 4201
retry 4201
retry 4201
retry 4201
retry 4201
retry 4201
4202 started
retry 4202
retry 4202
retry 4202
retry 4202
retry 4202
retry 4202
retry 4202
retry 4202
retry 4202
retry 4202
retry 4202
4203 started
retry 4203
retry 4203
retry 4203
retry 4203
retry 4203
retry 4203
retry 4203
retry 4203
retry 4203
retry 4203
retry 4203
4204 started
retry 4204
retry 4204
retry 4204
retry 4204
retry 4204
retry 4204
retry 4204
retry 4204
retry 4204
retry 4204
retry 4204
4205 started
retry 4205
retry 4205
retry 4205
retry 4205
retry 4205
retry 4205
retry 4205
retry 4205
retry 4205
retry 4205
retry 4205
4206 started
retry 4206
retry 4206
retry 4206
retry 4206
retry 4206
retry 4206
retry 4206
retry 4206
retry 4206
retry 4206
retry 4206
4207 started
retry 4207
retry 4207
retry 4207
retry 4207
retry 4207
retry 4207
retry 4207
retry 4207
retry 4207
retry 4207
retry 4207
4208 st

retry 4261
retry 4261
retry 4261
4262 started
retry 4262
retry 4262
retry 4262
retry 4262
retry 4262
retry 4262
retry 4262
retry 4262
retry 4262
retry 4262
retry 4262
4263 started
retry 4263
retry 4263
retry 4263
retry 4263
retry 4263
retry 4263
retry 4263
retry 4263
retry 4263
retry 4263
retry 4263
4264 started
retry 4264
retry 4264
retry 4264
retry 4264
retry 4264
retry 4264
retry 4264
retry 4264
retry 4264
retry 4264
retry 4264
4265 started
retry 4265
retry 4265
retry 4265
retry 4265
retry 4265
retry 4265
retry 4265
retry 4265
retry 4265
retry 4265
retry 4265
4266 started
retry 4266
retry 4266
retry 4266
retry 4266
retry 4266
retry 4266
retry 4266
retry 4266
retry 4266
retry 4266
retry 4266
4267 started
retry 4267
retry 4267
retry 4267
retry 4267
retry 4267
retry 4267
retry 4267
retry 4267
retry 4267
retry 4267
retry 4267
4268 started
retry 4268
retry 4268
retry 4268
retry 4268
retry 4268
retry 4268
retry 4268
retry 4268
retry 4268
retry 4268
retry 4268
4269 started
retry 4269
retry

retry 4322
4323 started
retry 4323
retry 4323
retry 4323
retry 4323
retry 4323
retry 4323
retry 4323
retry 4323
retry 4323
retry 4323
retry 4323
4324 started
retry 4324
retry 4324
retry 4324
retry 4324
retry 4324
retry 4324
retry 4324
retry 4324
retry 4324
retry 4324
retry 4324
4325 started
retry 4325
retry 4325
retry 4325
retry 4325
retry 4325
retry 4325
retry 4325
retry 4325
retry 4325
retry 4325
retry 4325
4326 started
retry 4326
retry 4326
retry 4326
retry 4326
retry 4326
retry 4326
retry 4326
retry 4326
retry 4326
retry 4326
retry 4326
4327 started
retry 4327
retry 4327
retry 4327
retry 4327
retry 4327
retry 4327
retry 4327
retry 4327
retry 4327
retry 4327
retry 4327
4328 started
retry 4328
retry 4328
retry 4328
retry 4328
retry 4328
retry 4328
retry 4328
retry 4328
retry 4328
retry 4328
retry 4328
4329 started
retry 4329
retry 4329
retry 4329
retry 4329
retry 4329
retry 4329
retry 4329
retry 4329
retry 4329
retry 4329
retry 4329
4330 started
retry 4330
retry 4330
retry 4330
retry

retry 4384
retry 4384
retry 4384
retry 4384
retry 4384
retry 4384
retry 4384
retry 4384
retry 4384
retry 4384
retry 4384
4385 started
retry 4385
retry 4385
retry 4385
retry 4385
retry 4385
retry 4385
retry 4385
retry 4385
retry 4385
retry 4385
retry 4385
4386 started
retry 4386
retry 4386
retry 4386
retry 4386
retry 4386
retry 4386
retry 4386
retry 4386
retry 4386
retry 4386
retry 4386
4387 started
retry 4387
retry 4387
retry 4387
retry 4387
retry 4387
retry 4387
retry 4387
retry 4387
retry 4387
retry 4387
retry 4387
4388 started
retry 4388
retry 4388
retry 4388
retry 4388
retry 4388
retry 4388
retry 4388
retry 4388
retry 4388
retry 4388
retry 4388
4389 started
retry 4389
retry 4389
retry 4389
retry 4389
retry 4389
retry 4389
retry 4389
retry 4389
retry 4389
retry 4389
retry 4389
4390 started
retry 4390
retry 4390
retry 4390
retry 4390
retry 4390
retry 4390
retry 4390
retry 4390
retry 4390
retry 4390
retry 4390
4391 started
retry 4391
retry 4391
retry 4391
retry 4391
retry 4391
retry 4

retry 4445
retry 4445
retry 4445
retry 4445
retry 4445
retry 4445
retry 4445
retry 4445
retry 4445
4446 started
retry 4446
retry 4446
retry 4446
retry 4446
retry 4446
retry 4446
retry 4446
retry 4446
retry 4446
retry 4446
retry 4446
4447 started
retry 4447
retry 4447
retry 4447
retry 4447
retry 4447
retry 4447
retry 4447
retry 4447
retry 4447
retry 4447
retry 4447
4448 started
retry 4448
retry 4448
retry 4448
retry 4448
retry 4448
retry 4448
retry 4448
retry 4448
retry 4448
retry 4448
retry 4448
4449 started
retry 4449
retry 4449
retry 4449
retry 4449
retry 4449
retry 4449
retry 4449
retry 4449
retry 4449
retry 4449
retry 4449
4450 started
retry 4450
retry 4450
retry 4450
retry 4450
retry 4450
retry 4450
retry 4450
retry 4450
retry 4450
retry 4450
retry 4450
4451 started
retry 4451
retry 4451
retry 4451
retry 4451
retry 4451
retry 4451
retry 4451
retry 4451
retry 4451
retry 4451
retry 4451
4452 started
retry 4452
retry 4452
retry 4452
retry 4452
retry 4452
retry 4452
retry 4452
retry 4

retry 4506
retry 4506
retry 4506
retry 4506
retry 4506
retry 4506
retry 4506
4507 started
retry 4507
retry 4507
retry 4507
retry 4507
retry 4507
retry 4507
retry 4507
retry 4507
retry 4507
retry 4507
retry 4507
4508 started
retry 4508
retry 4508
retry 4508
retry 4508
retry 4508
retry 4508
retry 4508
retry 4508
retry 4508
retry 4508
retry 4508
4509 started
retry 4509
retry 4509
retry 4509
retry 4509
retry 4509
retry 4509
retry 4509
retry 4509
retry 4509
retry 4509
retry 4509
4510 started
retry 4510
retry 4510
retry 4510
retry 4510
retry 4510
retry 4510
retry 4510
retry 4510
retry 4510
retry 4510
retry 4510
4511 started
retry 4511
retry 4511
retry 4511
retry 4511
retry 4511
retry 4511
retry 4511
retry 4511
retry 4511
retry 4511
retry 4511
4512 started
retry 4512
retry 4512
retry 4512
retry 4512
retry 4512
retry 4512
retry 4512
retry 4512
retry 4512
retry 4512
retry 4512
4513 started
retry 4513
retry 4513
retry 4513
retry 4513
retry 4513
retry 4513
retry 4513
retry 4513
retry 4513
retry 4

retry 4567
retry 4567
retry 4567
retry 4567
retry 4567
4568 started
retry 4568
retry 4568
retry 4568
retry 4568
retry 4568
retry 4568
retry 4568
retry 4568
retry 4568
retry 4568
retry 4568
4569 started
retry 4569
retry 4569
retry 4569
retry 4569
retry 4569
retry 4569
retry 4569
retry 4569
retry 4569
retry 4569
retry 4569
4570 started
retry 4570
retry 4570
retry 4570
retry 4570
retry 4570
retry 4570
retry 4570
retry 4570
retry 4570
retry 4570
retry 4570
4571 started
retry 4571
retry 4571
retry 4571
retry 4571
retry 4571
retry 4571
retry 4571
retry 4571
retry 4571
retry 4571
retry 4571
4572 started
retry 4572
retry 4572
retry 4572
retry 4572
retry 4572
retry 4572
retry 4572
retry 4572
retry 4572
retry 4572
retry 4572
4573 started
retry 4573
retry 4573
retry 4573
retry 4573
retry 4573
retry 4573
retry 4573
retry 4573
retry 4573
retry 4573
retry 4573
4574 started
retry 4574
retry 4574
retry 4574
retry 4574
retry 4574
retry 4574
retry 4574
retry 4574
retry 4574
retry 4574
retry 4574
4575 st

retry 4628
retry 4628
retry 4628
4629 started
retry 4629
retry 4629
retry 4629
retry 4629
retry 4629
retry 4629
retry 4629
retry 4629
retry 4629
retry 4629
retry 4629
4630 started
retry 4630
retry 4630
retry 4630
retry 4630
retry 4630
retry 4630
retry 4630
retry 4630
retry 4630
retry 4630
retry 4630
4631 started
retry 4631
retry 4631
retry 4631
retry 4631
retry 4631
retry 4631
retry 4631
retry 4631
retry 4631
retry 4631
retry 4631
4632 started
retry 4632
retry 4632
retry 4632
retry 4632
retry 4632
retry 4632
retry 4632
retry 4632
retry 4632
retry 4632
retry 4632
4633 started
retry 4633
retry 4633
retry 4633
retry 4633
retry 4633
retry 4633
retry 4633
retry 4633
retry 4633
retry 4633
retry 4633
4634 started
retry 4634
retry 4634
retry 4634
retry 4634
retry 4634
retry 4634
retry 4634
retry 4634
retry 4634
retry 4634
retry 4634
4635 started
retry 4635
retry 4635
retry 4635
retry 4635
retry 4635
retry 4635
retry 4635
retry 4635
retry 4635
retry 4635
retry 4635
4636 started
retry 4636
retry

retry 4689
4690 started
retry 4690
retry 4690
retry 4690
retry 4690
retry 4690
retry 4690
retry 4690
retry 4690
retry 4690
retry 4690
retry 4690
4691 started
retry 4691
retry 4691
retry 4691
retry 4691
retry 4691
retry 4691
retry 4691
retry 4691
retry 4691
retry 4691
retry 4691
4692 started
retry 4692
retry 4692
retry 4692
retry 4692
retry 4692
retry 4692
retry 4692
retry 4692
retry 4692
retry 4692
retry 4692
4693 started
retry 4693
retry 4693
retry 4693
retry 4693
retry 4693
retry 4693
retry 4693
retry 4693
retry 4693
retry 4693
retry 4693
4694 started
retry 4694
retry 4694
retry 4694
retry 4694
retry 4694
retry 4694
retry 4694
retry 4694
retry 4694
retry 4694
retry 4694
4695 started
retry 4695
retry 4695
retry 4695
retry 4695
retry 4695
retry 4695
retry 4695
retry 4695
retry 4695
retry 4695
retry 4695
4696 started
retry 4696
retry 4696
retry 4696
retry 4696
retry 4696
retry 4696
retry 4696
retry 4696
retry 4696
retry 4696
retry 4696
4697 started
retry 4697
retry 4697
retry 4697
retry

retry 4751
retry 4751
retry 4751
retry 4751
retry 4751
retry 4751
retry 4751
retry 4751
retry 4751
retry 4751
retry 4751
4752 started
retry 4752
retry 4752
retry 4752
retry 4752
retry 4752
retry 4752
retry 4752
retry 4752
retry 4752
retry 4752
retry 4752
4753 started
retry 4753
retry 4753
retry 4753
retry 4753
retry 4753
retry 4753
retry 4753
retry 4753
retry 4753
retry 4753
retry 4753
4754 started
retry 4754
retry 4754
retry 4754
retry 4754
retry 4754
retry 4754
retry 4754
retry 4754
retry 4754
retry 4754
retry 4754
4755 started
retry 4755
retry 4755
retry 4755
retry 4755
retry 4755
retry 4755
retry 4755
retry 4755
retry 4755
retry 4755
retry 4755
4756 started
retry 4756
retry 4756
retry 4756
retry 4756
retry 4756
retry 4756
retry 4756
retry 4756
retry 4756
retry 4756
retry 4756
4757 started
retry 4757
retry 4757
retry 4757
retry 4757
retry 4757
retry 4757
retry 4757
retry 4757
retry 4757
retry 4757
retry 4757
4758 started
retry 4758
retry 4758
retry 4758
retry 4758
retry 4758
retry 4

retry 4812
retry 4812
retry 4812
retry 4812
retry 4812
retry 4812
retry 4812
retry 4812
retry 4812
4813 started
retry 4813
retry 4813
retry 4813
retry 4813
retry 4813
retry 4813
retry 4813
retry 4813
retry 4813
retry 4813
retry 4813
4814 started
retry 4814
retry 4814
retry 4814
retry 4814
retry 4814
retry 4814
retry 4814
retry 4814
retry 4814
retry 4814
retry 4814
4815 started
retry 4815
retry 4815
retry 4815
retry 4815
retry 4815
retry 4815
retry 4815
retry 4815
retry 4815
retry 4815
retry 4815
4816 started
retry 4816
retry 4816
retry 4816
retry 4816
retry 4816
retry 4816
retry 4816
retry 4816
retry 4816
retry 4816
retry 4816
4817 started
retry 4817
retry 4817
retry 4817
retry 4817
retry 4817
retry 4817
retry 4817
retry 4817
retry 4817
retry 4817
retry 4817
4818 started
retry 4818
retry 4818
retry 4818
retry 4818
retry 4818
retry 4818
retry 4818
retry 4818
retry 4818
retry 4818
retry 4818
4819 started
retry 4819
retry 4819
retry 4819
retry 4819
retry 4819
retry 4819
retry 4819
retry 4

retry 4873
retry 4873
retry 4873
retry 4873
retry 4873
retry 4873
retry 4873
4874 started
retry 4874
retry 4874
retry 4874
retry 4874
retry 4874
retry 4874
retry 4874
retry 4874
retry 4874
retry 4874
retry 4874
4875 started
retry 4875
retry 4875
retry 4875
retry 4875
retry 4875
retry 4875
retry 4875
retry 4875
retry 4875
retry 4875
retry 4875
4876 started
retry 4876
retry 4876
retry 4876
retry 4876
retry 4876
retry 4876
retry 4876
retry 4876
retry 4876
retry 4876
retry 4876
4877 started
retry 4877
retry 4877
retry 4877
retry 4877
retry 4877
retry 4877
retry 4877
retry 4877
retry 4877
retry 4877
retry 4877
4878 started
retry 4878
retry 4878
retry 4878
retry 4878
retry 4878
retry 4878
retry 4878
retry 4878
retry 4878
retry 4878
retry 4878
4879 started
retry 4879
retry 4879
retry 4879
retry 4879
retry 4879
retry 4879
retry 4879
retry 4879
retry 4879
retry 4879
retry 4879
4880 started
retry 4880
retry 4880
retry 4880
retry 4880
retry 4880
retry 4880
retry 4880
retry 4880
retry 4880
retry 4

retry 4934
retry 4934
retry 4934
retry 4934
retry 4934
4935 started
retry 4935
retry 4935
retry 4935
retry 4935
retry 4935
retry 4935
retry 4935
retry 4935
retry 4935
retry 4935
retry 4935
4936 started
retry 4936
retry 4936
retry 4936
retry 4936
retry 4936
retry 4936
retry 4936
retry 4936
retry 4936
retry 4936
retry 4936
4937 started
retry 4937
retry 4937
retry 4937
retry 4937
retry 4937
retry 4937
retry 4937
retry 4937
retry 4937
retry 4937
retry 4937
4938 started
retry 4938
retry 4938
retry 4938
retry 4938
retry 4938
retry 4938
retry 4938
retry 4938
retry 4938
retry 4938
retry 4938
4939 started
retry 4939
retry 4939
retry 4939
retry 4939
retry 4939
retry 4939
retry 4939
retry 4939
retry 4939
retry 4939
retry 4939
4940 started
retry 4940
retry 4940
retry 4940
retry 4940
retry 4940
retry 4940
retry 4940
retry 4940
retry 4940
retry 4940
retry 4940
4941 started
retry 4941
retry 4941
retry 4941
retry 4941
retry 4941
retry 4941
retry 4941
retry 4941
retry 4941
retry 4941
retry 4941
4942 st

retry 4995
retry 4995
retry 4995
4996 started
retry 4996
retry 4996
retry 4996
retry 4996
retry 4996
retry 4996
retry 4996
retry 4996
retry 4996
retry 4996
retry 4996
4997 started
retry 4997
retry 4997
retry 4997
retry 4997
retry 4997
retry 4997
retry 4997
retry 4997
retry 4997
retry 4997
retry 4997
4998 started
retry 4998
retry 4998
retry 4998
retry 4998
retry 4998
retry 4998
retry 4998
retry 4998
retry 4998
retry 4998
retry 4998
4999 started
retry 4999
retry 4999
retry 4999
retry 4999
retry 4999
retry 4999
retry 4999
retry 4999
retry 4999
retry 4999
retry 4999
5000 started
retry 5000
retry 5000
retry 5000
retry 5000
retry 5000
retry 5000
retry 5000
retry 5000
retry 5000
retry 5000
retry 5000
5001 started
retry 5001
retry 5001
retry 5001
retry 5001
retry 5001
retry 5001
retry 5001
retry 5001
retry 5001
retry 5001
retry 5001
5002 started
retry 5002
retry 5002
retry 5002
retry 5002
retry 5002
retry 5002
retry 5002
retry 5002
retry 5002
retry 5002
retry 5002
5003 started
retry 5003
retry

retry 5056
5057 started
retry 5057
retry 5057
retry 5057
retry 5057
retry 5057
retry 5057
retry 5057
retry 5057
retry 5057
retry 5057
retry 5057
5058 started
retry 5058
retry 5058
retry 5058
retry 5058
retry 5058
retry 5058
retry 5058
retry 5058
retry 5058
retry 5058
retry 5058
5059 started
retry 5059
retry 5059
retry 5059
retry 5059
retry 5059
retry 5059
retry 5059
retry 5059
retry 5059
retry 5059
retry 5059
5060 started
retry 5060
retry 5060
retry 5060
retry 5060
retry 5060
retry 5060
retry 5060
retry 5060
retry 5060
retry 5060
retry 5060
5061 started
retry 5061
retry 5061
retry 5061
retry 5061
retry 5061
retry 5061
retry 5061
retry 5061
retry 5061
retry 5061
retry 5061
5062 started
retry 5062
retry 5062
retry 5062
retry 5062
retry 5062
retry 5062
retry 5062
retry 5062
retry 5062
retry 5062
retry 5062
5063 started
retry 5063
retry 5063
retry 5063
retry 5063
retry 5063
retry 5063
retry 5063
retry 5063
retry 5063
retry 5063
retry 5063
5064 started
retry 5064
retry 5064
retry 5064
retry

retry 5118
retry 5118
retry 5118
retry 5118
retry 5118
retry 5118
retry 5118
retry 5118
retry 5118
retry 5118
retry 5118
5119 started
retry 5119
retry 5119
retry 5119
retry 5119
retry 5119
retry 5119
retry 5119
retry 5119
retry 5119
retry 5119
retry 5119
5120 started
retry 5120
retry 5120
retry 5120
retry 5120
retry 5120
retry 5120
retry 5120
retry 5120
retry 5120
retry 5120
retry 5120
5121 started
retry 5121
retry 5121
retry 5121
retry 5121
retry 5121
retry 5121
retry 5121
retry 5121
retry 5121
retry 5121
retry 5121
5122 started
retry 5122
retry 5122
retry 5122
retry 5122
retry 5122
retry 5122
retry 5122
retry 5122
retry 5122
retry 5122
retry 5122
5123 started
retry 5123
retry 5123
retry 5123
retry 5123
retry 5123
retry 5123
retry 5123
retry 5123
retry 5123
retry 5123
retry 5123
5124 started
retry 5124
retry 5124
retry 5124
retry 5124
retry 5124
retry 5124
retry 5124
retry 5124
retry 5124
retry 5124
retry 5124
5125 started
retry 5125
retry 5125
retry 5125
retry 5125
retry 5125
retry 5

retry 5179
retry 5179
retry 5179
retry 5179
retry 5179
retry 5179
retry 5179
retry 5179
retry 5179
5180 started
retry 5180
retry 5180
retry 5180
retry 5180
retry 5180
retry 5180
retry 5180
retry 5180
retry 5180
retry 5180
retry 5180
5181 started
retry 5181
retry 5181
retry 5181
retry 5181
retry 5181
retry 5181
retry 5181
retry 5181
retry 5181
retry 5181
retry 5181
5182 started
retry 5182
retry 5182
retry 5182
retry 5182
retry 5182
retry 5182
retry 5182
retry 5182
retry 5182
retry 5182
retry 5182
5183 started
retry 5183
retry 5183
retry 5183
retry 5183
retry 5183
retry 5183
retry 5183
retry 5183
retry 5183
retry 5183
retry 5183
5184 started
retry 5184
retry 5184
retry 5184
retry 5184
retry 5184
retry 5184
retry 5184
retry 5184
retry 5184
retry 5184
retry 5184
5185 started
retry 5185
retry 5185
retry 5185
retry 5185
retry 5185
retry 5185
retry 5185
retry 5185
retry 5185
retry 5185
retry 5185
5186 started
retry 5186
retry 5186
retry 5186
retry 5186
retry 5186
retry 5186
retry 5186
retry 5

retry 5240
retry 5240
retry 5240
retry 5240
retry 5240
retry 5240
retry 5240
5241 started
retry 5241
retry 5241
retry 5241
retry 5241
retry 5241
retry 5241
retry 5241
retry 5241
retry 5241
retry 5241
retry 5241
5242 started
retry 5242
retry 5242
retry 5242
retry 5242
retry 5242
retry 5242
retry 5242
retry 5242
retry 5242
retry 5242
retry 5242
5243 started
retry 5243
retry 5243
retry 5243
retry 5243
retry 5243
retry 5243
retry 5243
retry 5243
retry 5243
retry 5243
retry 5243
5244 started
retry 5244
retry 5244
retry 5244
retry 5244
retry 5244
retry 5244
retry 5244
retry 5244
retry 5244
retry 5244
retry 5244
5245 started
retry 5245
retry 5245
retry 5245
retry 5245
retry 5245
retry 5245
retry 5245
retry 5245
retry 5245
retry 5245
retry 5245
5246 started
retry 5246
retry 5246
retry 5246
retry 5246
retry 5246
retry 5246
retry 5246
retry 5246
retry 5246
retry 5246
retry 5246
5247 started
retry 5247
retry 5247
retry 5247
retry 5247
retry 5247
retry 5247
retry 5247
retry 5247
retry 5247
retry 5

The crawled data:

In [53]:
data

Unnamed: 0,id,title,labels,reviewers,assignees,comments,opened_time,closed_time,n_commits,n_changed_files,n_participants
0,1,Maven repo,"[status=fixed, type=defect]",[],[],[gissuebot: Original issue created by b.k.oxle...,2014-10-31T17:03:26Z,2014-10-31T18:26:27Z,0,0,1
0,2,`Predicate` missing,"[status=will-not-fix, type=defect]",[],[],[gissuebot: Original issue created by j...@nws...,2014-10-31T17:03:28Z,2014-10-31T18:15:58Z,0,0,1
0,3,Javadoc bug (typo) in com.google.commo...,"[status=fixed, type=defect]",[],[],[gissuebot: Original issue created by ivo.weve...,2014-10-31T17:03:30Z,2014-10-31T18:23:49Z,0,0,1
0,4,Joiner seems to be missing,"[status=duplicate, type=defect]",[],[],[gissuebot: Original issue created by ted.dunn...,2014-10-31T17:03:32Z,2014-10-31T18:16:15Z,0,0,1
0,5,Merge google collections into this pro...,[status=fixed],[],[kevinb9n],[gissuebot: Original issue created by kevinb9n...,2014-10-31T17:03:34Z,2014-10-31T18:19:17Z,0,0,2
...,...,...,...,...,...,...,...,...,...,...,...
0,5427,Not a tracked issue- Refactor TriePars...,[cla: no],[],[],[olofeldre: This branch / commit contains a mo...,2021-03-04T09:46:59Z,2021-03-04T09:50:55Z,1,0,1
0,5428,Move Java-7-VM warning from `MoreObjec...,[cla: yes],[],[],[copybara-service: Move Java-7-VM warning from...,2021-03-09T01:39:16Z,2021-03-09T02:01:38Z,1,0,1
0,5429,Issue #2394 - Replace LinkedList with ...,[cla: yes],[falconetpt],[],"[antonlovstrom: #2394 Refactor TrieParser, Lin...",2021-03-10T12:54:01Z,2021-03-11T21:31:51Z,1,0,7
0,5430,Replace LinkedList with ArrayDeque.,[cla: yes],[],[],[copybara-service: Replace LinkedList with Arr...,2021-03-11T21:09:38Z,2021-03-11T21:31:51Z,0,0,0


Saving data into .csv file:

In [54]:
data.to_csv('data.csv')