# 获取 GItHub Issues & PRs

以 Langchain 项目 (https://github.com/langchain-ai/langchain) 为例，下面我们分析根据筛选条件获取 GitHub 的 Issues 和 Pull Requests

In [3]:
import requests
import datetime
import os

class GitHubClient:
    def __init__(self, token):
        self.token = token
        self.headers = {'Authorization': f'token {self.token}'}

    def fetch_updates(self, repo, since=None, until=None):
        updates = {
            'commits': self.fetch_commits(repo, since, until),
            'issues': self.fetch_issues(repo, since, until),
            'pull_requests': self.fetch_pull_requests(repo, since, until)
        }
        return updates

    def fetch_commits(self, repo, since=None, until=None):
        url = f'https://api.github.com/repos/{repo}/commits'
        params = {}
        if since:
            params['since'] = since
        if until:
            params['until'] = until

        response = requests.get(url, headers=self.headers, params=params)
        response.raise_for_status()
        return response.json()

    def fetch_issues(self, repo, since=None, until=None):
        url = f'https://api.github.com/repos/{repo}/issues'
        params = {
            'state': 'closed',
            'since': since,
            'until': until
        }
        response = requests.get(url, headers=self.headers, params=params)
        response.raise_for_status()
        return response.json()

    def fetch_pull_requests(self, repo, since=None, until=None):
        url = f'https://api.github.com/repos/{repo}/pulls'
        params = {
            'state': 'closed',
            'since': since,
            'until': until
        }
        response = requests.get(url, headers=self.headers, params=params)
        response.raise_for_status()
        return response.json()

    def export_daily_progress(self, repo):
        today = datetime.datetime.now().date().isoformat()
        updates = self.fetch_updates(repo, since=today)
        
        repo_dir = os.path.join('daily_progress', repo.replace("/", "_"))
        os.makedirs(repo_dir, exist_ok=True)
        
        file_path = os.path.join(repo_dir, f'{today}.md')
        with open(file_path, 'w') as file:
            file.write(f"# Daily Progress for {repo} ({today})\n\n")
            file.write("\n## Issues Closed Today\n")
            for issue in updates['issues']:
                file.write(f"- {issue['title']} #{issue['number']}\n")
            file.write("\n## Pull Requests Merged Today\n")
            for pr in updates['pull_requests']:
                file.write(f"- {pr['title']} #{pr['number']}\n")
        
        print(f"Exported daily progress to {file_path}")
        return file_path

    def export_time_range_progress(self, repo, days):
        today = datetime.datetime.now().date()
        since = (today - datetime.timedelta(days=days)).isoformat()
        until = today.isoformat()
        
        updates = self.fetch_updates(repo, since=since, until=until)
        
        repo_dir = os.path.join('daily_progress', repo.replace("/", "_"))
        os.makedirs(repo_dir, exist_ok=True)
        
        date_str = f"last_{days}_days"
        file_path = os.path.join(repo_dir, f'{date_str}.md')
        with open(file_path, 'w') as file:
            file.write(f"# Progress for {repo} (Last {days} Days)\n\n")
            file.write(f"\n## Issues Closed in the Last {days} Days\n")
            for issue in updates['issues']:
                file.write(f"- {issue['title']} #{issue['number']}\n")
            file.write(f"\n## Pull Requests Merged in the Last {days} Days\n")
            for pr in updates['pull_requests']:
                file.write(f"- {pr['title']} #{pr['number']}\n")
        
        print(f"Exported time-range progress to {file_path}")
        return file_path


In [4]:

github_client = GitHubClient(token="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")


### 查看获取的 Issues 和 PRs 原始格式

In [5]:
result = github_client.fetch_updates(repo="langchain-ai/langchain")

HTTPError: 401 Client Error: Unauthorized for url: https://api.github.com/repos/langchain-ai/langchain/commits

In [31]:
from pprint import pprint

pprint(result["pull_requests"][0])

{'_links': {'comments': {'href': 'https://api.github.com/repos/langchain-ai/langchain/issues/25021/comments'},
            'commits': {'href': 'https://api.github.com/repos/langchain-ai/langchain/pulls/25021/commits'},
            'html': {'href': 'https://github.com/langchain-ai/langchain/pull/25021'},
            'issue': {'href': 'https://api.github.com/repos/langchain-ai/langchain/issues/25021'},
            'review_comment': {'href': 'https://api.github.com/repos/langchain-ai/langchain/pulls/comments{/number}'},
            'review_comments': {'href': 'https://api.github.com/repos/langchain-ai/langchain/pulls/25021/comments'},
            'self': {'href': 'https://api.github.com/repos/langchain-ai/langchain/pulls/25021'},
            'statuses': {'href': 'https://api.github.com/repos/langchain-ai/langchain/statuses/2301e5944cd71d0f054426aadb2185b2c2c86c28'}},
 'active_lock_reason': None,
 'assignee': None,
 'assignees': [],
 'author_association': 'CONTRIBUTOR',
 'auto_merge': None

In [8]:
pprint(result["issues"][0])

{'active_lock_reason': None,
 'assignee': None,
 'assignees': [],
 'author_association': 'CONTRIBUTOR',
 'body': '**Description:**\r\n'
         '\r\n'
         'The get  time point method in the _consume() method of '
         'core.rate_limiters.InMemoryRateLimiter uses time.time(), which can '
         'be affected by system time backwards. Therefore, it is recommended '
         'to use the monotonically increasing monotonic() to obtain the '
         'time\r\n'
         '\r\n'
         '```python\r\n'
         '        with self._consume_lock:\r\n'
         '            now = time.time()  # time.time() -> time.monotonic()\r\n'
         '\r\n'
         '            # initialize on first call to avoid a burst\r\n'
         '            if self.last is None:\r\n'
         '                self.last = now\r\n'
         '\r\n'
         '            elapsed = now - self.last  # when use time.time(), '
         'elapsed may be negative when system time backwards\r\n'
         '\r\n'
    

In [26]:
# Get today's date in ISO 8601 format, starting from midnight
today = datetime.datetime.now().replace(hour=0, minute=0, second=0, microsecond=0).isoformat()

# Fetch all merged PRs for today
merged_prs_today = github_client.fetch_pull_requests(repo='langchain-ai/langchain')

# Print the titles of merged PRs
for pr in merged_prs_today:
    print(f"PR #{pr['number']}: {pr['title']} (Merged at {pr['closed_at']})")

PR #24954: langchain[patch]: Release 0.2.12 (Merged at 2024-08-02T04:04:50Z)
PR #24952: core[patch]: Release 0.2.27 (Merged at 2024-08-02T01:43:24Z)
PR #24951: infra: test core on py 3.9, 10, 11 (Merged at 2024-08-02T01:23:37Z)
PR #24950: docs: fix redirect (Merged at 2024-08-02T00:45:54Z)
PR #24948: core: docstrings `BaseCallbackHandler update (Merged at 2024-08-02T00:46:53Z)
PR #24936: core[patch]: Fix tool args schema inherited field parsing (Merged at 2024-08-02T01:36:33Z)
PR #24862: core: runnable config ensure_config deep copy from var_child_runnable… (Merged at 2024-08-02T00:30:32Z)
PR #24376: [community]: adding artifact to Tavily search (Merged at 2024-08-02T04:12:11Z)


In [25]:
issues_today = github_client.fetch_issues(repo='langchain-ai/langchain')

# Print the titles of issues
for issue in issues_today:
    print(f"issue #{issue['number']}: {issue['title']} ")

issue #24925: BaseTool's `tool_call_schema` ignores inherited fields of  an `args_schema`, causing incomplete tool inputs 


In [None]:
datetime.datetime.now().replace(hour=0, minute=0, second=0, microsecond=0).isoformat()

In [32]:
github_client.export_daily_progress(repo='langchain-ai/langchain')

Exported daily progress to daily_progress/langchain-ai_langchain/2024-08-04.md


'daily_progress/langchain-ai_langchain/2024-08-04.md'