In [1]:
import requests
from bs4 import BeautifulSoup
import sqlite3
import time

In [2]:
ORG_URL = "https://github.com/orgs/google/repositories"
BASE_URL = "https://github.com"

In [3]:
headers = {
    'User-Agent': (
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
        'AppleWebKit/537.36 (KHTML, like Gecko) '
        'Chrome/58.0.3029.110 Safari/537.3'
    )
}

In [4]:
# Google のリポジトリ一覧ページ取得
time.sleep(1)
res = requests.get(ORG_URL, headers=headers, timeout=10)
print("ステータスコード:", res.status_code)
res.raise_for_status()

soup = BeautifulSoup(res.text, "html.parser")

repo_links = []
seen = set()
name_language_stars = {}

# /google/xxx 形式のリンクだけを収集
for a in soup.find_all("a", href=True):
    href = a["href"]

    if href.startswith("/google/") and href.count("/") == 2:
        if href not in seen:
            seen.add(href)
            repo_links.append(a)

# 処理開始
for a in repo_links:
    name = a.get_text(strip=True)
    repo_path = a["href"]
    repo_url = BASE_URL + repo_path

    print(f"\n--- {name} を取得中: {repo_url}")
    time.sleep(1)

    r = requests.get(repo_url, headers=headers, timeout=10)
    if r.status_code != 200:
        print("  リポジトリページ取得失敗:", r.status_code)
        continue

    rsoup = BeautifulSoup(r.text, "html.parser")

    # 言語を抽出（例: "Python 53%" → Python）
    language = "Unknown"
    for tag in rsoup.find_all("a"):
        text = tag.get_text(strip=True)
        if text.endswith("%") and " " in text:
            language = text.split()[0]
            break
    lang_tag = rsoup.find("span", class_="color-fg-default text-bold mr-1")
    language = lang_tag.get_text(strip=True) if lang_tag else "Unknown"        
    # スター数を抽出（例: "1.2k stars" → 1.2k）
    stars = "0"
    for tag in rsoup.find_all("a"):
        text = tag.get_text(strip=True)
        if "stars" in text:
            stars = text.split()[0]
            break

    print("SCRAPED:", name, language, stars)
    name_language_stars[name] = (language, stars)

print("見つかったリポジトリ数:", len(repo_links))
print("件数:", len(name_language_stars))
print("例:", list(name_language_stars.items())[:3])

ステータスコード: 200

--- perfetto を取得中: https://github.com/google/perfetto
SCRAPED: perfetto C++ 5kstars

--- XNNPACK を取得中: https://github.com/google/XNNPACK
SCRAPED: XNNPACK C 2.2kstars

--- go-github を取得中: https://github.com/google/go-github
SCRAPED: go-github Go 11kstars

--- conscrypt を取得中: https://github.com/google/conscrypt
SCRAPED: conscrypt Java 1.4kstars

--- zerocopy を取得中: https://github.com/google/zerocopy
SCRAPED: zerocopy Rust 2.1kstars

--- xls を取得中: https://github.com/google/xls
SCRAPED: xls C++ 1.4kstars

--- ksp を取得中: https://github.com/google/ksp
SCRAPED: ksp Kotlin 3.3kstars

--- adk-go を取得中: https://github.com/google/adk-go
SCRAPED: adk-go Go 4.9kstars

--- site-kit-wp を取得中: https://github.com/google/site-kit-wp
SCRAPED: site-kit-wp JavaScript 1.3kstars

--- wasefire を取得中: https://github.com/google/wasefire
SCRAPED: wasefire Rust 129stars

--- orbax を取得中: https://github.com/google/orbax
SCRAPED: orbax Python 455stars

--- adk-python を取得中: https://github.com/google/adk-pyt

In [5]:
import sqlite3

path = ''
db_name = 'test.db'
db_path = path + db_name

In [6]:
try:
    conn = sqlite3.connect(db_path)
    cur = conn.cursor()

    sql = '''
        CREATE TABLE google (
            name TEXT,
            language TEXT,
            stars INT
        );
    '''

    cur.execute(sql)
    conn.commit()
    print("テーブル作成OK")

except sqlite3.Error as e:
    print(f"エラーが発生しました: {e}")

finally:
    conn.close()

テーブル作成OK


In [7]:
try:
    conn = sqlite3.connect(db_path)
    cur = conn.cursor()

    sql = "INSERT INTO google (name, language, stars) VALUES (?, ?, ?)"

    for name, (language, stars) in name_language_stars.items():
        cur.execute(sql, (name, language, stars))

    conn.commit()
    print("INSERT 完了")

except sqlite3.Error as e:
    print(f"エラーが発生しました: {e}")

finally:
    conn.close()

INSERT 完了


In [8]:
try:
    conn = sqlite3.connect(db_path)
    cur = conn.cursor()

    sql = "SELECT * FROM google"
    cur.execute(sql)

except sqlite3.Error as e:
    print(f"エラーが発生しました: {e}")

else:
    for idx, (name, language, stars) in enumerate(cur, start=1):
        print(idx, name, language, stars)

finally:
    conn.close()

1 perfetto C++ 5kstars
2 XNNPACK C 2.2kstars
3 go-github Go 11kstars
4 conscrypt Java 1.4kstars
5 zerocopy Rust 2.1kstars
6 xls C++ 1.4kstars
7 ksp Kotlin 3.3kstars
8 adk-go Go 4.9kstars
9 site-kit-wp JavaScript 1.3kstars
10 wasefire Rust 129stars
11 orbax Python 455stars
12 adk-python Python 15.6kstars
13 osv-scalibr Go 538stars
14 or-tools C++ 12.7kstars
15 angle C++ 3.8kstars
16 koladata C++ 27stars
17 nomulus Java 1.8kstars
18 nearby C++ 888stars
19 skia-buildbot Go 158stars
20 device-infra Java 58stars
21 selinux-policy-languages Unknown 14stars
22 aarch64-esr-decoder Rust 99stars
23 kotlin-fhirpath Kotlin 6stars
24 dawn C++ 781stars
25 yggdrasil-decision-forests C++ 622stars
26 tunix Python 1.9kstars
27 dwh-migration-tools Java 54stars
28 desugar_jdk_libs Java 389stars
29 tcmalloc C++ 5kstars
30 meridian Python 1.2kstars
