From 9cee763edcddfd1702abc6d4345db4cb70dcec41 Mon Sep 17 00:00:00 2001 From: Rosie Wood Date: Thu, 22 Feb 2024 02:26:11 +0000 Subject: [PATCH] Add retries to github repo reader (#10980) --- .../readers/github/repository/base.py | 3 +++ .../github/repository/github_client.py | 19 ++++++++++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/llama-index-integrations/readers/llama-index-readers-github/llama_index/readers/github/repository/base.py b/llama-index-integrations/readers/llama-index-readers-github/llama_index/readers/github/repository/base.py index 749e24f0e680e..cd19df3b2a276 100644 --- a/llama-index-integrations/readers/llama-index-readers-github/llama_index/readers/github/repository/base.py +++ b/llama-index-integrations/readers/llama-index-readers-github/llama_index/readers/github/repository/base.py @@ -76,6 +76,7 @@ def __init__( verbose: bool = False, concurrent_requests: int = 5, timeout: Optional[int] = 5, + retries: int = 0, filter_directories: Optional[Tuple[List[str], FilterType]] = None, filter_file_extensions: Optional[Tuple[List[str], FilterType]] = None, ): @@ -92,6 +93,7 @@ def __init__( - concurrent_requests (int): Number of concurrent requests to make to the Github API. - timeout (int or None): Timeout for the requests to the Github API. Default is 5. + - retries (int): Number of retries for requests made to the Github API. Default is 0. - filter_directories (Optional[Tuple[List[str], FilterType]]): Tuple containing a list of directories and a FilterType. If the FilterType is INCLUDE, only the files in the directories in the list will be @@ -115,6 +117,7 @@ def __init__( self._verbose = verbose self._concurrent_requests = concurrent_requests self._timeout = timeout + self._retries = retries self._filter_directories = filter_directories self._filter_file_extensions = filter_file_extensions diff --git a/llama-index-integrations/readers/llama-index-readers-github/llama_index/readers/github/repository/github_client.py b/llama-index-integrations/readers/llama-index-readers-github/llama_index/readers/github/repository/github_client.py index e31e970d39ee9..5e804bc0b781c 100644 --- a/llama-index-integrations/readers/llama-index-readers-github/llama_index/readers/github/repository/github_client.py +++ b/llama-index-integrations/readers/llama-index-readers-github/llama_index/readers/github/repository/github_client.py @@ -272,6 +272,7 @@ async def request( method: str, headers: Dict[str, Any] = {}, timeout: Optional[int] = 5, + retries: int = 0, **kwargs: Any, ) -> Any: """ @@ -285,6 +286,7 @@ async def request( - `method (str)`: HTTP method to use for the request. - `headers (dict)`: HTTP headers to include in the request. - `timeout (int or None)`: Timeout for the request in seconds. Default is 5. + - `retries (int)`: Number of retries for the request. Default is 0. - `**kwargs`: Keyword arguments to pass to the endpoint URL. Returns: @@ -297,7 +299,7 @@ async def request( Examples: >>> response = client.request("getTree", "GET", owner="owner", repo="repo", - tree_sha="tree_sha", timeout=5) + tree_sha="tree_sha", timeout=5, retries=0) """ try: import httpx @@ -314,6 +316,7 @@ async def request( headers=_headers, base_url=self._base_url, timeout=timeout, + transport=httpx.AsyncHTTPTransport(retries=retries), ) as _client: try: response = await _client.request( @@ -331,6 +334,7 @@ async def get_branch( branch: Optional[str] = None, branch_name: Optional[str] = None, timeout: Optional[int] = 5, + retries: int = 0, ) -> GitBranchResponseModel: """ Get information about a branch. (Github API endpoint: getBranch). @@ -339,6 +343,9 @@ async def get_branch( - `owner (str)`: Owner of the repository. - `repo (str)`: Name of the repository. - `branch (str)`: Name of the branch. + - `branch_name (str)`: Name of the branch (alternative to `branch`). + - `timeout (int or None)`: Timeout for the request in seconds. Default is 5. + - `retries (int)`: Number of retries for the request. Default is 0. Returns: - `branch_info (GitBranchResponseModel)`: Information about the branch. @@ -360,6 +367,7 @@ async def get_branch( repo=repo, branch=branch, timeout=timeout, + retries=retries, ) ).text ) @@ -370,6 +378,7 @@ async def get_tree( repo: str, tree_sha: str, timeout: Optional[int] = 5, + retries: int = 0, ) -> GitTreeResponseModel: """ Get information about a tree. (Github API endpoint: getTree). @@ -379,6 +388,7 @@ async def get_tree( - `repo (str)`: Name of the repository. - `tree_sha (str)`: SHA of the tree. - `timeout (int or None)`: Timeout for the request in seconds. Default is 5. + - `retries (int)`: Number of retries for the request. Default is 0. Returns: - `tree_info (GitTreeResponseModel)`: Information about the tree. @@ -395,6 +405,7 @@ async def get_tree( repo=repo, tree_sha=tree_sha, timeout=timeout, + retries=retries, ) ).text ) @@ -405,6 +416,7 @@ async def get_blob( repo: str, file_sha: str, timeout: Optional[int] = 5, + retries: int = 0, ) -> Optional[GitBlobResponseModel]: """ Get information about a blob. (Github API endpoint: getBlob). @@ -414,6 +426,7 @@ async def get_blob( - `repo (str)`: Name of the repository. - `file_sha (str)`: SHA of the file. - `timeout (int or None)`: Timeout for the request in seconds. Default is 5. + - `retries (int)`: Number of retries for the request. Default is 0. Returns: - `blob_info (GitBlobResponseModel)`: Information about the blob. @@ -431,6 +444,7 @@ async def get_blob( repo=repo, file_sha=file_sha, timeout=timeout, + retries=retries, ) ).text ) @@ -444,6 +458,7 @@ async def get_commit( repo: str, commit_sha: str, timeout: Optional[int] = 5, + retries: int = 0, ) -> GitCommitResponseModel: """ Get information about a commit. (Github API endpoint: getCommit). @@ -453,6 +468,7 @@ async def get_commit( - `repo (str)`: Name of the repository. - `commit_sha (str)`: SHA of the commit. - `timeout (int or None)`: Timeout for the request in seconds. Default is 5. + - `retries (int)`: Number of retries for the request. Default is 0. Returns: - `commit_info (GitCommitResponseModel)`: Information about the commit. @@ -469,6 +485,7 @@ async def get_commit( repo=repo, commit_sha=commit_sha, timeout=timeout, + retries=retries, ) ).text )