In [1]:
import sys, os
from pathlib import Path
import llm, sqlite_utils
from itertools import batched

In [2]:
print("Environment check goes brrr...")
assert(sys.executable.split('/')[-3] == 'conda-env')
assert(os.environ.get('ENVIRONMENT_LOADED') == 'roombot')

Environment check goes brrr...


In [3]:
print("Database create (or load) goes brrr...")

print("   ...brrr... creating path if not exist")
db_dir = "./db/"
Path(db_dir).mkdir(parents=True, exist_ok=True)

print("   ...brrr... loading db")
db_file = db_dir + "embeddings.db"
db = sqlite_utils.Database(db_file)

Database create (or load) goes brrr...
   ...brrr... creating path if not exist
   ...brrr... loading db


In [4]:
print("Embeddings collection create (or load) goes brrr...")

print("   ...brrr... getting embedding model")
model_id = "3-large"
embedding_model = llm.get_embedding_model(model_id)

collection_name = "code-collection"

if llm.Collection.exists(db, collection_name):
    print("   ...brrr... deleting pre-existing embedding collection in db")
    collection = llm.Collection(collection_name, db, model=embedding_model)
    collection.delete()
    
print("   ...brrr... creating embedding collection in db")
collection = llm.Collection(collection_name, db, model=embedding_model)


Embeddings collection create (or load) goes brrr...
   ...brrr... getting embedding model
   ...brrr... deleting pre-existing embedding collection in db
   ...brrr... creating embedding collection in db


In [22]:
print("LLM model setup goes brrr...")
llm_model = llm.get_model("gpt-4o")

LLM model setup goes brrr...


In [26]:
print("Reading code goes brrr...")

print("    ...brrr... Finding code to embed")
code_dir = os.environ['ROOMBOT_CODE_DIR'].rstrip('/') + '/'
to_embed = [
    # (id, content_to_embed, metadata)
]

print("        ...brr... using directory:", code_dir)
i = 0
for path in Path(code_dir).glob("**/*"):
    if path.is_file():
        code = path.read_text()
        # TODO: generate description of code's quality through LLM and embed that
        response = llm_model.prompt("You are an expert code auditor, working as a member a large team. "+
                                    "Your team is working on a project involving a large amount of code. "+
                                    "Some of the code is known to be of poor quality in terms of long-term "+
                                    "maintainability, cleanlieness, and performance. "+
                                    "The team's goal is to prioritize which parts of the code require immediate "+
                                    "attention and refactoring. You have been given the following code file,  "+
                                    "delimited between `>>>>>>>>>` and `<<<<<<<<<`"+
                                    "Your task is to write a short description of the quality of the code. "+
                                    "These descriptions will later be used to prioritize the code. The code follows now: "+
                                    ">>>>>>>>>\n" + code + "\n<<<<<<<<<")
        code_description = (response.text())
        to_embed.append((i, code_description, {"path": str(path)}))
        i+=1
        print("" + str(i) + " done")

# print("        ...brr... using hardcoded")
# to_embed = [
#     (1, "This is really good code.", {"path": "good"}),
#     (2, "This is really bad code.", {"path": "bad"}),
# ]

to_embed

Reading code goes brrr...
    ...brrr... Finding code to embed
        ...brr... using directory: /home/ydhamija/Stuff/yawa/src/
1 done
2 done
3 done
4 done
5 done
6 done
7 done
8 done
9 done
10 done
11 done
12 done


[(0,
  "The provided code snippet offers a high-level overview of a Rust-based workout application, YAWA (Yet Another Workout App). Overall, the code structure appears to be organized into several logical modules, including adapters, controllers, services, lifting, and programs. This modular approach is beneficial for maintainability, as it promotes separation of concerns and encapsulation.\n\nHowever, without seeing the implementations of the modules themselves, it's difficult to assess the internal quality of the code regarding long-term maintainability, cleanliness, and performance. Key areas that may require attention include:\n\n1. **Documentation Quality:** While there are high-level comments explaining each module's purpose, the quality of in-code comments, function documentation, and usage examples needs to be assessed in the development of these modules.\n\n2. **Modularity and Complexity:** The extent to which each module adheres to single responsibility principles will impact

In [27]:
print("Embedding goes brrr...")
print("    ...brrr... Embedding into db collection")
print("        ...brrr... OpenAI money decreasing")
collection.embed_multi_with_metadata(to_embed, store=True)

Embedding goes brrr...
    ...brrr... Embedding into db collection
        ...brrr... OpenAI money decreasing


In [28]:
print("Searching embeddings goes brrr...")
print("    ...brrr... OpenAI money decreasing")
query = "This code needs to be refactored."
for entry in collection.similar(query, number=3):
    print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>")
    print("ENTRY ID and SCORE:")
    print(entry.id, entry.score)
    print("\n\nEMBEDDED CONTENT:")
    print(entry.content)
    print("\n\nMETADATA:")
    print(entry.metadata)
    print("<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<")

Searching embeddings goes brrr...
    ...brrr... OpenAI money decreasing
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
ENTRY ID and SCORE:
5 0.3685439548651149


EMBEDDED CONTENT:
### Code Quality Description

The provided code has several issues that negatively impact its maintainability, cleanliness, and performance:

1. **Error Handling Inconsistencies**: While the code uses the `anyhow` crate for error handling, there is an inconsistent use of error types. For instance, `get_user_confirmation` returns a `Result<bool>`, while others return `Result<bool, anyhow::Error>`. This can introduce confusion and make it harder to unify error handling throughout the module.

2. **Repetitive Code**: The functions `did_complete_lift` and `did_complete_maximum_reps` contain repetitive patterns for retrieving user input. This could be abstracted to reduce duplication and improve readability.

3. **Infinite Loop in User Confirmation**: The `get_use