In [150]:
import asyncio
import dataclasses
from datetime import datetime
import os
import pprint
from typing import List, Optional


from dotenv import load_dotenv

# load the environment variables from .env
load_dotenv();

In [25]:
pp = pprint.PrettyPrinter(indent=2)

In [2]:
from gql import gql, Client
from gql.transport.aiohttp import AIOHTTPTransport

In [17]:
endpoint = "https://api.github.com/graphql"
github_token = os.getenv("github_token")

In [5]:
%%bash
curl -H "Authorization: bearer $github_token" -X POST -d " \
 { \
   \"query\": \"query { viewer { login }}\" \
 } \
" https://api.github.com/graphql

{"data":{"viewer":{"login":"xmnlab"}}}


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100    86  100    39  100    47     61     74 --:--:-- --:--:-- --:--:--   136


In [84]:
transport = AIOHTTPTransport(
    headers={
        "Authorization": f"bearer {github_token}"
    },
    url=endpoint
)

async def query(endpoint: str, github_token: str, transport: AIOHTTPTransport):
    # Using `async with` on the client will start a connection on the transport
    # and provide a `session` variable to execute queries on this connection
    async with Client(
        transport=transport, 
        fetch_schema_from_transport=True,
    ) as session:

        # Execute single query
        query = gql(
            """
            query {
              viewer {
                login
              }
            }
        """
        )

        result = await session.execute(query)
        pp.pprint(result)


await query(endpoint, github_token, transport)

{'viewer': {'login': 'xmnlab'}}


In [217]:
@dataclasses.dataclass
class GitHubGraphQLSearchFilter:
    authors: List[str] = dataclasses.field(default_factory=list)
    assignee: List[str] = dataclasses.field(default_factory=list)
    search_type: str="pr" # pr or issue
    start_datetime: Optional[datetime] = None
    end_datetime: Optional[datetime] = None
    specific_datetime: Optional[datetime] = None
    status: List[str] = dataclasses.field(default_factory=list)  # open, closed, merged
        

In [220]:
class GitHubGraphQLSearch:
    token: str=""
    trasnport: Client = None
    org_name: str = ""
    repo_name: str = ""
        
    def __init__(self, token: str, endpoint: str, org_name: str, repo_name: str):
        self.token = token
        self.org_name = org_name
        self.repo_name = repo_name
        self.transport = AIOHTTPTransport(
            headers={
                "Authorization": f"bearer {self.token}"
            },
            url=endpoint
        )
        
    def render_tmpl(self, tmpl: str, input_data: dict):
        result = tmpl
        new_result = []
        
        for line in result.split("\n"):
            if "#" in line:
                continue
            new_result.append(line)
        
        result = "\n".join(new_result)
            

        for k, v in input_data.items():
            content = None
            if isinstance(v, str):
                content = v
            elif isinstance(v, list):
                content = " ".join([
                    f"{k}:{vi}" for vi in v
                ])
            else:
                content = str(v)

            result = result.replace("{{" + k + "}}", content)
        return result


    async def search_pagination(self, session: Client, gql_tmpl: str, variables: dict):
        has_next_page = True
        pagination_after = ""
        limit = 100
        results = []
        has_result = False

        while has_next_page:
            _variables = dict(variables)
            _variables.update(
                after="" if not pagination_after else f', after: "{pagination_after}"'
            )

            gql_stmt = self.render_tmpl(gql_tmpl, _variables)
            print(gql_stmt)

            query = gql(gql_stmt)
            params = {"first": limit}

            result = await session.execute(query, variable_values=params)

            try:
                has_next_page = result["search"]["pageInfo"]["hasNextPage"]
                pagination_after = result["search"]["pageInfo"]["endCursor"]
                has_result = True
            except:
                has_next_page = False
                has_result = False

            if not has_result:
                break

            results += result["search"]["edges"]
        return results
    
    async def search(self, search_filters: GitHubGraphQLSearchFilter):
        # Using `async with` on the client will start a connection on the transport
        # and provide a `session` variable to execute queries on this connection
        async with Client(
            transport=self.transport, 
            fetch_schema_from_transport=True,
        ) as session:
            if search_filters.search_type not in ["pr", "issue"]:
                raise Exception("search_type should be 'pr' or 'issue'")

            gql_node_type = "PullRequest" if search_filters.search_type == "pr" else "Issue"

            # Execute single query
            gql_tmpl = """
            query ($first: Int!)  {
              search (query: "repo:{{org_name}}/{{repo_name}} is:{{search_type}} {{status}} {{assignee}} {{author}}", type: ISSUE, first: $first {{after}}) {
                edges {
                  node {
                    ... on {{gql_node_type}} {
                      id
                      number
                      url
                      title
                      author {
                        login
                      }
                      createdAt
                      closedAt
                      mergedAt  # [search_type=="pr"]
                      lastEditedAt
                      state
                      updatedAt
                    }
                  }
                }
                issueCount
                pageInfo {
                    startCursor
                    hasNextPage
                    endCursor
                }
              }
            }
            """

            variables = {
                "org_name": self.org_name,
                "repo_name": self.repo_name,
                "gql_node_type": gql_node_type,
                "search_type": search_filters.search_type,
                "author": search_filters.authors,
                "assignee": search_filters.assignee,
                "status": " ".join([f"is:{status}" for status in search_filters.status])
            }

            return await self.search_pagination(session, gql_tmpl, variables)
        

In [222]:
search_filters = GitHubGraphQLSearchFilter(
    search_type="pr",
    authors=[
        "pearu",
    ]
)

gh_search = GitHubGraphQLSearch(token=github_token, endpoint=endpoint, org_name="pytorch", repo_name="pytorch")
results = await gh_search.search(search_filters)


            query ($first: Int!)  {
              search (query: "repo:pytorch/pytorch is:pr   author:pearu", type: ISSUE, first: $first ) {
                edges {
                  node {
                    ... on PullRequest {
                      id
                      number
                      url
                      title
                      author {
                        login
                      }
                      createdAt
                      closedAt
                      lastEditedAt
                      state
                      updatedAt
                    }
                  }
                }
                issueCount
                pageInfo {
                    startCursor
                    hasNextPage
                    endCursor
                }
              }
            }
            


In [223]:
len(results)

18

### Questions:

* What `<repo>` PRs are "Open"?
* What `<repo>` PRs have been Closed during that time?
* Only return for specific members for `<repo>` (for PRs - this is who authored the PR, for Issues - this is who is assigned)
* What `<repo>` Issues are linked to each of these PRs?

In [224]:
gh_search = GitHubGraphQLSearch(token=github_token, endpoint=endpoint, org_name="pytorch", repo_name="pytorch")

### What `<repo>` PRs are "Open"?

In [225]:
search_filters = GitHubGraphQLSearchFilter(
    search_type="pr",
    authors=[
        "xmnlab",
        "pearu",
        "IvanYashchuk",
        "ysiraichi",
        "hameerabbasi"
    ],
    status=["open"]
)
results = await gh_search.search(search_filters)


            query ($first: Int!)  {
              search (query: "repo:pytorch/pytorch is:pr is:open  author:xmnlab author:pearu author:IvanYashchuk author:ysiraichi author:hameerabbasi", type: ISSUE, first: $first ) {
                edges {
                  node {
                    ... on PullRequest {
                      id
                      number
                      url
                      title
                      author {
                        login
                      }
                      createdAt
                      closedAt
                      lastEditedAt
                      state
                      updatedAt
                    }
                  }
                }
                issueCount
                pageInfo {
                    startCursor
                    hasNextPage
                    endCursor
                }
              }
            }
            


In [226]:
len(results)

24

### What `<repo>` PRs have been Closed during that time?

In [227]:
search_filters = GitHubGraphQLSearchFilter(
    search_type="pr",
    authors=[
        "xmnlab",
        "pearu",
        "IvanYashchuk",
        "ysiraichi",
        "hameerabbasi"
    ],
    status=["closed"]
)
results = await gh_search.search(search_filters)


            query ($first: Int!)  {
              search (query: "repo:pytorch/pytorch is:pr is:closed  author:xmnlab author:pearu author:IvanYashchuk author:ysiraichi author:hameerabbasi", type: ISSUE, first: $first ) {
                edges {
                  node {
                    ... on PullRequest {
                      id
                      number
                      url
                      title
                      author {
                        login
                      }
                      createdAt
                      closedAt
                      lastEditedAt
                      state
                      updatedAt
                    }
                  }
                }
                issueCount
                pageInfo {
                    startCursor
                    hasNextPage
                    endCursor
                }
              }
            }
            

            query ($first: Int!)  {
              search (query: "r

In [228]:
print(len(results))

123


### Only return specific members from `<repo>` (for PRs - this is who authored the PR, for Issues - this is who is assigned)


### What `<repo>` Issues are linked to each of these PRs?

## Refs

* https://docs.github.com/en/graphql/guides/forming-calls-with-graphql#authenticating-with-graphql
* https://github.com/graphql-python/gql
* https://gql.readthedocs.io/en/latest/async/async_usage.html#async-usage