In [1]:
import json

with open('data/mapping.json', 'r') as fp:
    mappings = json.load(fp)

In [2]:
from openai import OpenAI
import dotenv

dotenv.load_dotenv('.env', override=True)

client = OpenAI()

def get_embedding(text, model="text-embedding-ada-002"):
   text = text.replace("\n", " ")
   return client.embeddings.create(input = [text], model=model).data[0].embedding

In [3]:
import pinecone
import dotenv
import os

dotenv.load_dotenv('.env', override=True)

def load_index():
    pinecone.init(
        api_key=os.environ['PINECONE_API_KEY'],  # app.pinecone.io
        environment=os.environ["PINECONE_ENV"]  # find next to API key in console
    )

    index_name = 'codestyle-semantic-search'
    if not index_name in pinecone.list_indexes():
        raise KeyError(f"Index '{index_name}' does not exist.")

    return pinecone.Index(index_name)

  from tqdm.autonotebook import tqdm


In [4]:
index = load_index()


In [5]:
def create_context(question, index, max_len=3750):
    """
    Find most relevant context for a question via Pinecone search
    """
    q_embed = get_embedding(question)
    res = index.query(q_embed, top_k=5, include_metadata=True)
    

    cur_len = 0
    contexts = []

    for row in res['matches']:
        text = mappings[row['id']]
        cur_len += row['metadata']['n_tokens'] + 4
        if cur_len < max_len:
            contexts.append(text)
        else:
            cur_len -= row['metadata']['n_tokens'] + 4
            if max_len - cur_len < 200:
                break
    return "\n\n###\n\n".join(contexts)

In [6]:
create_context("how should I name a new class", index)

'Styleguide: Google C++ Style Guide - Naming; Section: General Naming Rules (link https://google.github.io/styleguide/cppguide.html#General_Naming_Rules); Guide: Optimize for readability using names that would be clear even to people on a different team. Use names that describe the purpose or intent of the object. Do not worry about saving horizontal space as it is far more important to make your code immediately understandable by a new reader. Minimize the use of abbreviations that would likely be unknown to someone outside your project (especially acronyms and initialisms). Do not abbreviate by deleting letters within a word. As a rule of thumb, an abbreviation is probably OK if it\'s listed in Wikipedia. Generally speaking, descriptiveness should be proportional to the name\'s scope of visibility. For example, n may be a fine name within a 5-line function, but within the scope of a class, it\'s likely too vague. class MyClass { public: int CountFooErrors(const std::vector<Foo>& foos

In [16]:

client = OpenAI()

def answer_question(
    index,
    question="How to name a variable?",
    max_len=3550,
    debug=False,
):
    """
    Answer a question based on the most similar context from the dataframe texts
    """
    context = create_context(
        question,
        index,
        max_len=max_len,
    )
    if debug:
        print("Context:\n" + context)
        print("\n\n")
    try:
        #print(instruction.format(context, question))
        response = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "You are a helpful codestyle assistant. When answering question, use following format: 1. Describe style requirements in one or two sentences, 2. Give a few examples that follow described styleguides, 3. Provide descriptions that you were able to find that clarify why such style was chosen"},
                {"role": "user", "content": question},
            ]
        )
        # response = openai.Completion.create(
        #     prompt=instruction.format(context, question),
        #     temperature=0,
        #     max_tokens=max_tokens,
        #     top_p=1,
        #     frequency_penalty=0,
        #     presence_penalty=0,
        #     stop=stop_sequence,
        #     **model_param,
        # )
        return response.choices[0].message.content
    except Exception as e:
        print(e)
        return ""

In [18]:
answer_question(index, question="I have a function 'GenerateName' that is templated. Should I name templated parameter <template typename NameType> or <template typename name_type>?")

'1. The style requirement for naming templated parameters is to use lowercase with underscores.\n\n2. Examples that follow the style guideline:\n   - `template<typename name_type>`\n   - `template<typename value_type>`\n\n3. The convention of using lowercase with underscores for templated parameter names is consistent with the naming style used for variables and function parameters in C++. It helps improve code readability and maintain consistency throughout the codebase.'