# GitHub Issues Query

This notebook lists issues (tickets) from a GitHub repository.

Setup:
1. Duplicate `.env.example` to `.env` in this folder
2. Fill `GITHUB_TOKEN`, `GITHUB_OWNER`, `GITHUB_REPO` (and `GITHUB_API_URL` if Enterprise)
3. Run the cells



## Fetch all issues from the repository

In [3]:
import os
from pathlib import Path
from dotenv import load_dotenv
import requests
import pandas as pd

# Load environment variables from .env if present
load_dotenv(dotenv_path=Path(".env"), override=False)

GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
GITHUB_OWNER = os.getenv("GITHUB_OWNER")
GITHUB_REPO = os.getenv("GITHUB_REPO")
GITHUB_API_URL = os.getenv("GITHUB_API_URL", "https://api.github.com")

missing = [name for name, val in {
    "GITHUB_TOKEN": GITHUB_TOKEN,
    "GITHUB_OWNER": GITHUB_OWNER,
    "GITHUB_REPO": GITHUB_REPO,
}.items() if not val]

if missing:
    raise RuntimeError(f"Missing required env vars: {', '.join(missing)}. Copy .env.example to .env and fill them.")

session = requests.Session()
session.headers.update({
    "Authorization": f"Bearer {GITHUB_TOKEN}",
    "Accept": "application/vnd.github+json",
    "X-GitHub-Api-Version": "2022-11-28",
})

repo_issues_url = f"{GITHUB_API_URL}/repos/{GITHUB_OWNER}/{GITHUB_REPO}/issues"


def fetch_issues(state: str = "open", labels: str | None = None, per_page: int = 50, max_pages: int = 5):
    params = {"state": state, "per_page": per_page}
    if labels:
        params["labels"] = labels

    issues = []
    url = repo_issues_url
    for _ in range(max_pages):
        resp = session.get(url, params=params)
        resp.raise_for_status()
        page_items = resp.json()
        # GitHub returns PRs in this endpoint as well; filter to issues only
        page_issues = [it for it in page_items if "pull_request" not in it]
        issues.extend(page_issues)
        # Pagination via Link header
        next_url = None
        if "link" in resp.headers:
            for part in resp.headers["link"].split(","):
                seg, rel = part.split(";")
                if 'rel="next"' in rel:
                    next_url = seg.strip()[1:-1]
                    break
        if not next_url:
            break
        url, params = next_url, None  # next already encodes params
    return issues

issues = fetch_issues(state="open", labels=None, per_page=50, max_pages=5)
print(f"Fetched {len(issues)} issues from {GITHUB_OWNER}/{GITHUB_REPO}")

df = pd.DataFrame([
    {
        "number": it.get("number"),
        "title": it.get("title"),
        "state": it.get("state"),
        "created_at": it.get("created_at"),
        "updated_at": it.get("updated_at"),
        "user": (it.get("user") or {}).get("login"),
        "assignees": ",".join([a.get("login") for a in (it.get("assignees") or [])]),
        "labels": ",".join([l.get("name") for l in (it.get("labels") or [])]),
        "url": it.get("html_url"),
    }
    for it in issues
])

df.head(10)


Fetched 42 issues from TheSoftwareDevGuild/TheGuildGenesis


Unnamed: 0,number,title,state,created_at,updated_at,user,assignees,labels,url
0,126,Use jupyter notebook to analyze github issues,open,2025-10-30T13:17:36Z,2025-10-30T13:17:36Z,joelamouche,joelamouche,"jupyter-notebook,python",https://github.com/TheSoftwareDevGuild/TheGuil...
1,125,Improve UX and design of theguild.dev,open,2025-10-28T11:23:19Z,2025-10-28T11:23:19Z,joelamouche,,"good first issue,ux,design,hacktoberfest",https://github.com/TheSoftwareDevGuild/TheGuil...
2,118,Badge enhancements,open,2025-10-17T08:35:11Z,2025-10-30T13:44:59Z,joelamouche,joelamouche,"enhancement,planning,hacktoberfest",https://github.com/TheSoftwareDevGuild/TheGuil...
3,110,Discord bot: relay git activity on a discord c...,open,2025-10-10T09:57:28Z,2025-10-30T09:00:26Z,joelamouche,,"good first issue,nodejs,typescript,discord-bot...",https://github.com/TheSoftwareDevGuild/TheGuil...
4,106,Implement JWT for our profile api,open,2025-10-09T13:18:18Z,2025-10-27T08:48:29Z,joelamouche,tusharshah21,"front end,rust,back-end,react,typescript,hackt...",https://github.com/TheSoftwareDevGuild/TheGuil...
5,105,Add backend endpoints to fetch attestations,open,2025-10-09T12:31:06Z,2025-10-09T12:31:06Z,joelamouche,,"rust,back-end,db",https://github.com/TheSoftwareDevGuild/TheGuil...
6,104,Blockchain Indexer - Get blockchain data from ...,open,2025-10-09T12:20:02Z,2025-10-09T12:33:22Z,joelamouche,oscarwroche,"enhancement,planning",https://github.com/TheSoftwareDevGuild/TheGuil...
7,103,Improve auth logic for API,open,2025-10-09T12:14:41Z,2025-10-14T08:05:29Z,joelamouche,oscarwroche,"enhancement,planning",https://github.com/TheSoftwareDevGuild/TheGuil...
8,102,Add twitter handle to profiles in the backend,open,2025-10-08T13:18:45Z,2025-10-27T08:43:18Z,joelamouche,ayushhh101,"good first issue,rust,back-end,db,hacktoberfest",https://github.com/TheSoftwareDevGuild/TheGuil...
9,101,Add twitter account to profiles,open,2025-10-08T13:16:50Z,2025-10-16T13:09:30Z,joelamouche,,"enhancement,planning",https://github.com/TheSoftwareDevGuild/TheGuil...


In [5]:
# Fetch all closed tickets
closed_tickets = fetch_issues(state="closed")

## Analyze closed tickets

In [None]:
# analyze closed tickets
# we want to get the following stats about all closed tickets: 
# - how many different contributors, 
# - how many different closed tickets, 
# - how many tags associated with the different tickets (total sum)
# - total sum of contributions points associated with tickets (see tags)
import re
import json


def get_contributions_from_tags(tags):
    # filter out the tags that don't look like xpts (where x is a number)
    tags = [tag for tag in tags if re.match(r'^\d+pts$', tag.get("name"))]
    # We want to sum the numbers
    return sum(int(tag.get("name").split("pts")[0]) for tag in tags)

def analyze_closed_tickets(closed_tickets):
    
    # Initialize dictionaries to store unique contributors and tags
    contributors = set()
    tags = set()
    total_contributions = 0
    
    # Process each closed ticket
    for ticket in closed_tickets:
        # Get the user who closed the ticket
        user = ticket.get("user", {}).get("login")
        if user:
            contributors.add(user)
        
        # Get the tags associated with the ticket
        for label in ticket.get("labels", []):
            tags.add(label.get("name"))
            
        # Get the contributions points from the tags
        contributions = get_contributions_from_tags(ticket.get("labels", []))
        total_contributions += contributions
    
    return {
        "contributors": len(contributors),
        "tags": len(tags),
        "total_contribution_tokens": total_contributions,
        "number_of_closed_tickets": len(closed_tickets),
    }
# analyze
stats = analyze_closed_tickets(closed_tickets)

# pretty print stats
print(json.dumps(stats, indent=4))
    

{
    "contributors": 4,
    "tags": 26,
    "total_contributions": 940,
    "number_of_closed_tickets": 34
}
