In [1]:
import pandas as pd

full_repo_model_df = pd.read_csv('UMLFiles_List_V2.0.csv')
full_repo_model_df

Unnamed: 0,Project,Model Link - Github
0,0-complexity/ovcdoc_public/,https://www.github.com/0-complexity/ovcdoc_pub...
1,0003088/libelektra-qt-gui-test/,https://www.github.com/0003088/libelektra-qt-g...
2,00s/deadman/,https://www.github.com/00s/deadman/tree/master...
3,01db0y/ImageUploader/,https://www.github.com/01db0y/ImageUploader/tr...
4,01db0y/ShareIt/,https://www.github.com/01db0y/ShareIt/tree/mas...
...,...,...
93602,yotomyoto/301_assignment1/,https://www.github.com/yotomyoto/301_assignmen...
93603,zeronero13/af6/,https://www.github.com/zeronero13/af6/tree/mas...
93604,zeronero13/happehardver/,https://www.github.com/zeronero13/happehardver...
93605,Akshit-/ClientServerCommunication/,https://www.github.com/Akshit-/ClientServerCom...


In [2]:
full_repo_df = full_repo_model_df.drop(columns=['Model Link - Github']).drop_duplicates(ignore_index=True)
full_repo_df

Unnamed: 0,Project
0,0-complexity/ovcdoc_public/
1,0003088/libelektra-qt-gui-test/
2,00s/deadman/
3,01db0y/ImageUploader/
4,01db0y/ShareIt/
...,...
24725,vectorxiang/vectorxiang.github.io/
24726,victorsndvg/FPL/
24727,wmde/FundraisingFrontend/
24728,yotomyoto/301_assignment1/


In [4]:
import requests
from dotenv import load_dotenv
from tqdm import tqdm
import os

_ = load_dotenv()

GH_USER = os.environ["GH_USERNAME"]
GH_TOKEN = os.environ["GH_TOKEN"]

In [5]:
def get_star_count(repo_name):
    try:
        r = requests.get('https://api.github.com/repos/{}'.format(repo_name), auth=(GH_USER, GH_TOKEN), timeout=60)
    except requests.exceptions.RequestException as e:
        if r.status_code != 404:
            print('Error: {}'.format(e))
        return None

    return r.json().get('stargazers_count', 0)

In [6]:
print(f"No. of stars: {get_star_count('zakipauzi/concept-domain-coverage/'[:-1])}")

No. of stars: 2


In [8]:
from concurrent.futures import ThreadPoolExecutor

tqdm.pandas()

def add_stargazers_count(df):
    def fetch_star_count(repo):
        return get_star_count(repo[:-1])

    with ThreadPoolExecutor(max_workers=50) as executor:
        df['Stargazers'] = list(tqdm(executor.map(fetch_star_count, df['Project']), total=len(df['Project'])))
    return df

In [9]:
full_repo_df_with_stars = add_stargazers_count(full_repo_df)
full_repo_df_with_stars.head()

100%|██████████| 24730/24730 [1:02:23<00:00,  6.61it/s]


Unnamed: 0,Project,Stargazers
0,0-complexity/ovcdoc_public/,0
1,0003088/libelektra-qt-gui-test/,0
2,00s/deadman/,0
3,01db0y/ImageUploader/,0
4,01db0y/ShareIt/,0


In [10]:
full_repo_df_with_stars.tail()

Unnamed: 0,Project,Stargazers
24725,vectorxiang/vectorxiang.github.io/,2
24726,victorsndvg/FPL/,30
24727,wmde/FundraisingFrontend/,145
24728,yotomyoto/301_assignment1/,0
24729,zeronero13/happehardver/,0


In [11]:
full_repo_df_with_stars.to_csv('UMLFiles_List_V2.0_with_stars.csv', index=False)

### Get GH projects with UML files

In [31]:
import requests
import pandas as pd

def get_repos_with_uml_files():

    headers = {
    "Accept": "application/vnd.github+json",
    "Authorization": f"Bearer {GH_TOKEN}",
    "X-GitHub-Api-Version": "2022-11-28"}
 
    ctr = 1

    repo_list = []

    url = "https://api.github.com/search/repositories?q=language:Java&stars:>=1000&per_page:100"
    
    response = requests.get(url, headers=headers)

    print(f"Total results: {response.json()['total_count']}")

    return response.json()

    if response.json()['total_count'] == 0:
        return repo_list

    print("1000 results max. Retrieving...")

    while response.status_code == 200:

        if not response.json()['items']:
            print(response.json())
            print("No more results. Exiting...")
            break

        for repo in response.json()['items']:
            repo_list.append(repo)

        print(f"Total repos retrieved: {len(repo_list)}")

        ctr += 1

        if 'next' not in response.links:
            print("No more results. Exiting...")
            break

        next_url = response.links['next']['url']

        response = requests.get(next_url, headers=headers)
    
    return repo_list

In [32]:
uml_repos = get_repos_with_uml_files()

Total results: 17014719


In [35]:
import pandas as pd

repo_df = pd.DataFrame(uml_repos)
repo_df.head()

Unnamed: 0,id,node_id,name,full_name,private,owner,html_url,description,fork,url,...,is_template,web_commit_signoff_required,topics,visibility,forks,open_issues,watchers,default_branch,permissions,score
0,132464395,MDEwOlJlcG9zaXRvcnkxMzI0NjQzOTU=,JavaGuide,Snailclimb/JavaGuide,False,"{'login': 'Snailclimb', 'id': 29880145, 'node_...",https://github.com/Snailclimb/JavaGuide,「Java学习+面试指南」一份涵盖大部分 Java 程序员所需要掌握的核心知识。准备 Jav...,False,https://api.github.com/repos/Snailclimb/JavaGuide,...,False,False,"[algorithms, interview, java, jvm, mysql, redi...",public,45536,69,146314,main,"{'admin': False, 'maintain': False, 'push': Fa...",1.0
1,206462776,MDEwOlJlcG9zaXRvcnkyMDY0NjI3NzY=,GitHub-Chinese-Top-Charts,GrowingGit/GitHub-Chinese-Top-Charts,False,"{'login': 'GrowingGit', 'id': 21018904, 'node_...",https://github.com/GrowingGit/GitHub-Chinese-T...,:cn: GitHub中文排行榜，各语言分设「软件 | 资料」榜单，精准定位中文好项目。各取...,False,https://api.github.com/repos/GrowingGit/GitHub...,...,False,False,[],public,13144,189,99618,master,"{'admin': False, 'maintain': False, 'push': Fa...",1.0
2,561730219,R_kgDOIXtSqw,hello-algo,krahets/hello-algo,False,"{'login': 'krahets', 'id': 26993056, 'node_id'...",https://github.com/krahets/hello-algo,"《Hello 算法》：动画图解、一键运行的数据结构与算法教程。支持 Python, Java...",False,https://api.github.com/repos/krahets/hello-algo,...,False,False,"[algo, algorithm, algorithms, book, data-struc...",public,12216,14,96351,main,"{'admin': False, 'maintain': False, 'push': Fa...",1.0
3,22790488,MDEwOlJlcG9zaXRvcnkyMjc5MDQ4OA==,java-design-patterns,iluwatar/java-design-patterns,False,"{'login': 'iluwatar', 'id': 582346, 'node_id':...",https://github.com/iluwatar/java-design-patterns,Design patterns implemented in Java,False,https://api.github.com/repos/iluwatar/java-des...,...,False,False,"[awesome-list, design-patterns, hacktoberfest,...",public,26507,157,89628,master,"{'admin': False, 'maintain': False, 'push': Fa...",1.0
4,127988011,MDEwOlJlcG9zaXRvcnkxMjc5ODgwMTE=,mall,macrozheng/mall,False,"{'login': 'macrozheng', 'id': 15903809, 'node_...",https://github.com/macrozheng/mall,mall项目是一套电商系统，包括前台商城系统及后台管理系统，基于SpringBoot+MyB...,False,https://api.github.com/repos/macrozheng/mall,...,False,False,"[docker, elasticsearch, elk, java, mongodb, my...",public,28743,19,77583,master,"{'admin': False, 'maintain': False, 'push': Fa...",1.0


In [36]:
repo_df.tail()

Unnamed: 0,id,node_id,name,full_name,private,owner,html_url,description,fork,url,...,is_template,web_commit_signoff_required,topics,visibility,forks,open_issues,watchers,default_branch,permissions,score
995,15771504,MDEwOlJlcG9zaXRvcnkxNTc3MTUwNA==,librec,guoguibing/librec,False,"{'login': 'guoguibing', 'id': 2831759, 'node_i...",https://github.com/guoguibing/librec,LibRec: A Leading Java Library for Recommender...,False,https://api.github.com/repos/guoguibing/librec,...,False,False,"[collaborative, collaborative-filtering, facto...",public,1028,81,3237,3.0.0,"{'admin': False, 'maintain': False, 'push': Fa...",1.0
996,42233871,MDEwOlJlcG9zaXRvcnk0MjIzMzg3MQ==,Android-Tips,tangqi92/Android-Tips,False,"{'login': 'tangqi92', 'id': 10348598, 'node_id...",https://github.com/tangqi92/Android-Tips,An awesome list of tips for Android.,False,https://api.github.com/repos/tangqi92/Android-...,...,False,False,"[android-activity, android-aidl, android-apk, ...",public,832,2,3236,master,"{'admin': False, 'maintain': False, 'push': Fa...",1.0
997,4134560,MDEwOlJlcG9zaXRvcnk0MTM0NTYw,jfinal,jfinal/jfinal,False,"{'login': 'jfinal', 'id': 1677603, 'node_id': ...",https://github.com/jfinal/jfinal,JAVA WEB + ORM Framework,False,https://api.github.com/repos/jfinal/jfinal,...,False,False,[],public,1320,12,3234,master,"{'admin': False, 'maintain': False, 'push': Fa...",1.0
998,42849407,MDEwOlJlcG9zaXRvcnk0Mjg0OTQwNw==,GalleryFinal,pengjianbo/GalleryFinal,False,"{'login': 'pengjianbo', 'id': 3264712, 'node_i...",https://github.com/pengjianbo/GalleryFinal,Android自定义相册，实现了拍照、图片选择（单选/多选）、 裁剪（单/多裁剪）、旋转、I...,False,https://api.github.com/repos/pengjianbo/Galler...,...,False,False,[],public,786,119,3226,master,"{'admin': False, 'maintain': False, 'push': Fa...",1.0
999,5728045,MDEwOlJlcG9zaXRvcnk1NzI4MDQ1,android-saripaar,ragunathjawahar/android-saripaar,False,"{'login': 'ragunathjawahar', 'id': 1141970, 'n...",https://github.com/ragunathjawahar/android-sar...,UI form validation library for Android,False,https://api.github.com/repos/ragunathjawahar/a...,...,False,False,"[android, java]",public,460,62,3222,master,"{'admin': False, 'maintain': False, 'push': Fa...",1.0


In [37]:
repo_df.shape

(1000, 81)

In [38]:
repo_df.to_csv('uml_repos.csv', index=False, encoding='utf-8')

### Remove UML tools