-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdata_collector.py
34 lines (25 loc) · 930 Bytes
/
data_collector.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
from github import Github
from datetime import datetime
import time
import os
ACCESS_TOKEN = open("token.txt", "r").read()
github = Github(ACCESS_TOKEN)
SECONDS_IN_A_DAY = 86400
end_time = time.time()
start_time = end_time - SECONDS_IN_A_DAY
for i in range(3):
start_time_str = datetime.utcfromtimestamp(start_time).strftime("%Y-%m-%d")
end_time_str = datetime.utcfromtimestamp(end_time).strftime("%Y-%m-%d")
query = f"language:python created:{start_time_str}..{end_time_str}"
print(query)
start_time -= SECONDS_IN_A_DAY
end_time -= SECONDS_IN_A_DAY
result = github.search_repositories(query)
i = 1
for repository in result:
print(f"{i}/{result.totalCount}")
i += 1
if (repository.size > 5000):
continue
print(repository.clone_url)
os.system(f"git clone {repository.clone_url} repos/{repository.owner.login}/{repository.name}")