<a href="https://colab.research.google.com/github/xjdeng/github-ai-query/blob/main/gemini_query_small_github_repo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install path.py==12.0.1
# Import necessary modules
import os
from google.colab import userdata
import google.generativeai as genai
from path import Path as path
from vertexai.preview import tokenization
import pprint
GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY') #Set up Gemini Credentials, see video: https://www.youtube.com/watch?v=S1elvCs1gyI
genai.configure(api_key=GOOGLE_API_KEY)
model = genai.GenerativeModel("gemini-1.5-flash-latest")
model_name = "gemini-1.5-flash"
tokenizer = tokenization.get_tokenizer_for_model(model_name)

def count_tokens(txt):
  result = tokenizer.count_tokens(txt)
  return result.total_tokens

def generate_tree(directory, prefix=""):

    tree = []
    entries = sorted(os.listdir(directory))  # Sort entries for consistent order

    for index, entry in enumerate(entries):
        path = os.path.join(directory, entry)
        is_last = (index == len(entries) - 1)

        if is_last:
            tree.append(f"{prefix}└── {entry}")
            next_prefix = f"{prefix}    "
        else:
            tree.append(f"{prefix}├── {entry}")
            next_prefix = f"{prefix}│   "

        if os.path.isdir(path):
            tree.append(generate_tree(path, next_prefix))
    return "\n".join(tree)

def query_repo(query, repo_url, refresh = False):
  if repo_url.lower().endswith(".git"):
    repo_url = repo_url[:-4]
  local_repo_dir = str(path(repo_url).name)
  if refresh:
    !rm -rf $local_repo_dir
  if not path(local_repo_dir).exists():
    !git clone $repo_url
  files_to_open = []
  for f in path(local_repo_dir).walkfiles():
    if (f.ext == ".py") or (f.ext == ".md") or (f.ext == ".txt"):
      files_to_open.append(f)
  prompt = f"""
  I'm making the following query on the Github repo below and I'll give you the file structure and the contents of all of the files.

  My Query:
  ---
  {query}
  ---

  File structure:
  ---
  {generate_tree(local_repo_dir)}
  ---

  Next, I'll give you the contents of each file in the following section -
  *************************************************************************

  """
  for f in files_to_open:
    with open(f,'r') as ff:
      contents = ff.read()
    prompt += f"""
    {str(f.name)}:
    ---
    {contents}
    ---

    """
  prompt += "*************************************************************************"
  n_tokens = count_tokens(prompt)
  print(f"{n_tokens} Tokens")
  if n_tokens > 1000000:
    print("This repo has more than 1 million tokens and the query is likely to fail as a result due to Gemini's context window of 1 million.")
  response = model.generate_content(prompt)
  return response.text



In [3]:
# @title
pprint.pprint(query_repo("Do a code review on this repo and point out its biggest problems.",
                         "https://github.com/run-llama/llama_index"))

Cloning into 'llama_index'...
remote: Enumerating objects: 110702, done.[K
remote: Counting objects: 100% (5863/5863), done.[K
remote: Compressing objects: 100% (402/402), done.[K
remote: Total 110702 (delta 5595), reused 5520 (delta 5434), pack-reused 104839 (from 1)[K
Receiving objects: 100% (110702/110702), 241.99 MiB | 22.32 MiB/s, done.
Resolving deltas: 100% (76586/76586), done.
Updating files: 100% (10765/10765), done.
4255911 Tokens
This repo has more than 1 million tokens and the query is likely to fail as a result due to Gemini's context window of 1 million.




TooManyRequests: 429 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent?%24alt=json%3Benum-encoding%3Dint: Resource has been exhausted (e.g. check quota).