In [776]:
from tree_sitter_language_pack import get_binding, get_language, get_parser

python_binding = get_binding('python')  # this is an int pointing to the C binding
python_lang = get_language('python')  # this is an instance of tree_sitter.Language
python_parser = get_parser('python')  # this is an instance of tree_sitter.Parser

In [777]:
python_node = python_parser.parse(bytes("""
class Node:
    def __init__(self):
        self.x = 10
    def return_type(self):
        return self.x
""", "utf8"))

python_node.root_node.type

'module'

In [778]:
root_node = python_node.root_node
print(root_node.type)
print(root_node.text)

module
b'class Node:\n    def __init__(self):\n        self.x = 10\n    def return_type(self):\n        return self.x\n'


In [779]:
# get functions from root node 
root_node.children

[<Node type=class_definition, start_point=(1, 0), end_point=(5, 21)>]

In [780]:
# ge

In [781]:
javascript_binding = get_binding('javascript')  # this is an int pointing to the C binding
javascript_lang = get_language('javascript')  # this is an instance of tree_sitter.Language
javascript_parser = get_parser('javascript')  # this is an instance of tree_sitter.Parser

In [782]:
js_node = javascript_parser.parse(bytes("""
function hello() {
    return "Hello, world!";
}
""", "utf8"))

In [783]:
js_node.root_node.type

'program'

In [784]:
from pathlib import Path

def extract_nodes(tree, source_code, source_code_path):
    allowed_node_types = ["class_definition", "function_definition"]

    def get_node_text(node):
        return source_code[node.start_byte:node.end_byte].decode('utf-8')

    def get_module_name(file_path):
        module_name = Path(file_path).stem
        return module_name

    def get_struct_name(node):
        if node.type == 'class_definition':
            start_byte = node.start_byte
            end_byte = node.end_byte
            definition = get_node_text(node)
            class_name = definition.split()[1].split('(')[0]

            return class_name

        return None

    def get_docstring(node):
        def traverse(node):
            # if the string_content node's parent's parent's is expression_statement type, we know it's docstring
            is_expression_statement_type = node.parent and node.parent.parent and node.parent.parent.type == "expression_statement"
            if is_expression_statement_type and node.type == "string_content":
                return get_node_text(node)
            
            docstring = ""
            for child in node.children:
                child_docstring = traverse(child)
                if child_docstring:
                    docstring += child_docstring
            
            return docstring

        return traverse(node)

    def get_context(node):
        file_path = Path(source_code_path).resolve()
        file_name = Path(file_path).name
        module = get_module_name(file_path)
        struct_name = get_struct_name(node)

        return {
            "module": module,
            "file_path": str(file_path),
            "file_name": file_name,
            "struct_name": struct_name,
            "snippet": get_node_text(node)
        }

    root_node = tree.root_node
    # class_node = None
    results = []

    # Traverse the tree to find classes and methods
    def traverse(node):
        # nonlocal class_node
        # ........... need better way to resolve class methods

        if node.type in allowed_node_types:
            results.append({
                "name": get_node_text(node.child_by_field_name('name')),
                "signature": get_node_text(node),
                "code_type": node.type,
                "docstring": get_docstring(node),
                "line": node.start_point[0] + 1,
                "line_from": node.start_point[0] + 1,
                "line_to": node.end_point[0] + 1,
                "context": get_context(node),
                "node": node
            })

        for child in node.children:
            traverse(child)

    traverse(root_node)
    return results

In [785]:
source_code_path = "example.py"

with open(source_code_path, "rb") as file:
    source_code = file.read()

python_node = python_parser.parse(source_code)

extract_nodes(python_node, source_code, source_code_path)

[{'name': 'calculate_area',
  'signature': 'def calculate_area(radius):\n    """Calculate the area of a circle given its radius."""\n    if radius <= 0:\n        raise ValueError("Radius must be positive")\n    return PI * (radius ** 2)',
  'code_type': 'function_definition',
  'docstring': 'Calculate the area of a circle given its radius.',
  'line': 12,
  'line_from': 12,
  'line_to': 16,
  'context': {'module': 'example',
   'file_path': '/home/devblin/desktop/project/reviewturtl/cookbooks/example.py',
   'file_name': 'example.py',
   'struct_name': None,
   'snippet': 'def calculate_area(radius):\n    """Calculate the area of a circle given its radius."""\n    if radius <= 0:\n        raise ValueError("Radius must be positive")\n    return PI * (radius ** 2)'},
  'node': <Node type=function_definition, start_point=(11, 0), end_point=(15, 29)>},
 {'name': 'Circle',
  'signature': 'class Circle:\n    """This is circle class."""\n\n    def __init__(self, radius):\n        self.radius 

In [2]:
import json
obj = json.dumps([
        {
            "file_path": "example1.py",
            "file_content": """
    class Node:
        def __init__(self):
            self.x = 10
        def return_type(self):
            return self.x
    """,
        },
        {
            "file_path": "example2.py",
            "file_content": """
    def hello():
        return "Hello, world!"
    """,
        },
    ])

# save to json file
with open("example.json", "w") as file:
    file.write(obj)

In [2]:
import dspy
import random

turbo = dspy.OpenAI(
    model="gpt-4o-mini",
    api_key="sk-proj-rMRPEXeOov1ABcrWo5qCT3BlbkFJFp2lEr9usDeymtmpVVKa",
    max_tokens=3000,
)
qa = dspy.TypedChainOfThought("question->answer")

def ask_question(question):
    with dspy.context(lm=turbo):
        answer = qa.forward(question=question)
        print(f"Question: {question}")
        print(f"Answer: {answer}")
        print("---")

def main():
    questions = [
        "What is the largest desert in the world?",
        "Who wrote the novel '1984'?",
        "What is the chemical symbol for gold?",
        "Which planet is known as the Red Planet?",
        "Who painted the Mona Lisa?",
        "What is the capital of Japan?",
        "What is the largest mammal on Earth?",
        "In which year did World War II end?",
        "What is the main ingredient in guacamole?",
        "Who invented the telephone?",
        "What is the tallest mountain in the world?",
        "Who was the first person to walk on the moon?",
        "What is the largest ocean on Earth?",
        "Who wrote 'Romeo and Juliet'?",
        "What is the capital of Australia?",
        "What is the chemical formula for water?",
        "Who painted 'The Starry Night'?",
        "What is the largest country by land area?",
        "What is the smallest planet in our solar system?",
        "Who discovered penicillin?",
        "What is the longest river in the world?",
        "Who is known as the father of modern physics?",
        "What is the largest organ in the human body?",
        "What is the capital of Brazil?",
        "Who wrote 'To Kill a Mockingbird'?",
        "What is the chemical symbol for silver?",
        "What is the fastest land animal?",
        "Who painted the Sistine Chapel ceiling?",
        "What is the largest continent by land area?",
        "What is the main component of the Earth's atmosphere?",
        "Who invented the light bulb?",
        "What is the deepest point in the ocean?",
        "Who was the first woman to win a Nobel Prize?",
        "What is the largest species of shark?",
        "What is the capital of Canada?",
        "Who wrote 'The Great Gatsby'?",
        "What is the chemical symbol for iron?",
        "What is the largest moon in our solar system?",
        "Who painted 'The Persistence of Memory'?",
        "What is the longest wall in the world?",
        "What is the most abundant element in the universe?",
        "Who developed the theory of evolution by natural selection?",
        "What is the largest bird in the world?",
        "What is the capital of Egypt?",
        "Who wrote 'War and Peace'?",
        "What is the chemical symbol for oxygen?",
        "What is the hottest planet in our solar system?",
        "Who sculpted 'David'?",
        "What is the largest waterfall in the world?",
        "What is the most widely spoken language in the world?"
    ]
    random.shuffle(questions)
    
    for question in questions:
        ask_question(question)

if __name__ == "__main__":
    main()


  from .autonotebook import tqdm as notebook_tqdm


Question: Which planet is known as the Red Planet?
Answer: Prediction(
    reasoning='identify the planet commonly referred to as the Red Planet. We know that the planets in our solar system have distinct characteristics and names. The planet that is often associated with a reddish appearance due to iron oxide on its surface is Mars. Therefore, the answer is Mars.',
    answer='Mars'
)
---
Question: Who was the first person to walk on the moon?
Answer: Prediction(
    reasoning='determine who made this historic achievement. We know that the first moon landing occurred during the Apollo 11 mission in 1969. The astronauts on this mission were Neil Armstrong, Buzz Aldrin, and Michael Collins. However, only two of them landed on the moon. Neil Armstrong was the first to step onto the lunar surface, followed by Buzz Aldrin. Therefore, the first person to walk on the moon was Neil Armstrong.',
    answer='Neil Armstrong'
)
---
Question: What is the largest ocean on Earth?
Answer: Prediction(

In [1]:
import openai
import time
import random

# Set your OpenAI API key
openai.api_key = "sk-proj-rMRPEXeOov1ABcrWo5qCT3BlbkFJFp2lEr9usDeymtmpVVKa"

# Define the base request payload
base_payload = {
    "model": "gpt-4o-mini",
    "messages": [
        {
            "role": "user",
            "content": (
                "A Classifier that checks whether the query that is being asked has been already asked before.\n\n"
                "I am Building this classifier to ensure that the AI-assistant does not answer essentially the same question multiple times.\n\n"
                "Remember that the YOU SHOULD ABSOLUTELY NOT GET CONFUSED BETWEEN A GENERAL QUESTION AND A SPECIFIC QUESTION. A GENERAL QUESTION AND A SPECIFIC QUESTION ARE NOT DUPLICATE.\n"
                "few_shot_example:\n\n"
                "### Example 1\n\n"
                "Previous Queries: \n\n"
                "[\n"
                "\"How to reset my password?\",\n"
                "\"What is the process to unlock my account?\",\n"
                "\"How can I install the new software update?\",\n"
                "\"Steps to connect to the VPN\",\n"
                "\"Procedure to request a new laptop\"\n"
                "]\n"
                "Latest Query:\n\n"
                "Can you guide me on how to reset my account password?\n"
                "is_duplicate:\n\n"
                "True\n\n"
                "### Example 2\n\n"
                "Previous Queries: \n\n"
                "[\n"
                "\"technical issues troubleshooting solutions\",\n"
                "\"Technical Solutions\",\n"
                "\"How to fix the issue\",\n"
                "\"IT Related Issues\"\n"
                "]\n"
                "Latest Query:\n\n"
                "My system is running slow. Can you help me with that?\n"
                "is_duplicate:\n\n"
                "False\n\n"
                "---\n\n"
                "Follow the following format.\n\n"
                "Previous Queries: These are all the queries that has been asked before\n\n"
                "Latest Query: This is the latest query that is being asked\n\n"
                "Reasoning: Let's think step by step in order to ${produce the is_duplicate}. We ...\n\n"
                "Is Duplicate: Whether the query that is being asked has been already asked before (Respond with true or false)\n\n"
                "---\n\n"
                "Previous Queries:\n"
                "[1] «request for information or resources»\n"
                "[2] «SQL query to check for long running processes»\n"
                "[3] «Query to check for long-running processes.»\n\n"
                "Latest Query: Query to check for long-running processes in SQL.\n\n"
                "Reasoning: Let's think step by step in order to"
            )
        }
    ],
    "frequency_penalty": 0,
    "max_tokens": 4096,
    "n": 1,
    "presence_penalty": 0,
    "temperature": 0.0,
    "top_p": 1
}

# Function to slightly modify the content
def modify_content(content):
    modifications = [
        ("password", "passcode"),
        ("account", "profile"),
        ("software update", "system upgrade"),
        ("VPN", "remote access"),
        ("laptop", "computer"),
        ("technical issues", "IT problems"),
        ("system", "computer"),
        ("SQL query", "database query"),
        ("long running processes", "time-consuming operations"),
        ("resources", "assets")
    ]
    for old, new in random.sample(modifications, 3):
        content = content.replace(old, new)
    return content

# Function to make a single API request
def make_request(i):
    modified_payload = base_payload.copy()
    modified_payload["messages"][0]["content"] = modify_content(base_payload["messages"][0]["content"])
    
    try:
        response = openai.chat.completions.create(**modified_payload)
        return f"Request {i+1}:\nModified content: {modified_payload['messages'][0]['content'][:100]}...\nResponse: {response}\n"
    except Exception as e:
        return f"Error in request {i+1}: {str(e)}"

# Make 50 sequential requests
for i in range(50):
    print(make_request(i))
    time.sleep(0.2)  # Small delay to avoid overwhelming the API

Request 1:
Modified content: A Classifier that checks whether the query that is being asked has been already asked before.

I am ...
Response: ChatCompletion(id='chatcmpl-A6fMB1oW0g70u6jzrpCk2BKwsH0bV', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='determine if the latest query is a duplicate. The previous queries include a general request for information, a specific SQL query to check for time-consuming operations, and a query that is very similar to the latest one, which is about checking for long-running processes. The latest query specifies the context (SQL) but is essentially asking the same thing as the third previous query, which is also about checking for long-running processes. Therefore, the latest query does not introduce a new concept or context but rather reiterates a previously asked question.\n\nIs Duplicate: True', role='assistant', function_call=None, tool_calls=None, refusal=None))], created=1726152307, model='gpt

{'method': 'post', 'url': '/chat/completions', 'files': None, 'json_data': {'messages': [{'role': 'system', 'content': 'As an expert AI, your task requires utmost precision. Analyze the **AI Response** generated from the **User Transcript**, along with a provided list of **Top K RAG Contexts** and **Language**. Adhere strictly to these guidelines for generating follow-up questions:\n\nGenerate 3 very brief follow-up questions in the given Language solely based on all the retrieved RAG CONTEXTS that the user would likely ask next.\nMake ABSOLUTELY SURE that these questions must be answerable using information exclusively from the retrieved RAG CONTEXTS.\nEnsure that these questions strictly adhere to the retrieved RAG CONTEXTS, given Language and only inquire about details which is already answered in the RAG CONTEXTS.\nDO NOT GENERATE generate questions that prompt for information not present in the RAG CONTEXT.Ensure the questions focus on tech-related aspects from retrieved RAG CONTEXTS.\nDo not repeat questions that have already been asked.\n'}, {'role': 'user', 'content': "User Transcript:\nhmm this is good , but i want something else as well \n\nAI Response:\nIf you're still looking for more solutions regarding BitLocker asking for the recovery key, here are some additional steps you can consider:\n\n1. **Update Windows**: Ensure that your Windows operating system is fully updated. Sometimes, updates can resolve underlying issues with BitLocker.\n\n2. **Check for Hardware Changes**: If you've recently made any hardware changes (like replacing the motherboard or hard drive), this could trigger BitLocker to ask for the recovery key. Reverting any recent hardware changes may help.\n\n3. **Reset TPM**: If the Trusted Platform Module (TPM) is causing issues, you can reset it. To do this, enter the BIOS/UEFI settings during startup and look for the TPM settings. Resetting it may require you to reinitialize BitLocker.\n\n4. **Disable and Re-enable BitLocker**: If possible, you can try disabling BitLocker completely and then re-enabling it. This can sometimes reset any misconfigurations that are causing the recovery key prompts.\n\n5. **Check Group Policy Settings**: Ensure that the Group Policy settings related to BitLocker are correctly configured. Sometimes, incorrect settings can lead to issues with recovery key prompts.\n\n6. **Consult Microsoft Support**: If none of the above steps work, consider reaching out to Microsoft Support for more specialized assistance.\n\nIf you need further help or clarification on any of these steps, feel free to ask!\n\nLanguage:\nENGLISH\n\nTop K RAG Contexts:\n\nHow to fix a slow computer.pdf#page=3\n\nUnable to connect Outlook and Teams shows error CAA20002.pdf#page=2\n\nVeeva - Customer Maintenance DCR Provider updates.pdf#page=5\n\nVeeva Call Unlock.pdf#page=1\n\nVeeva - Customer Maintenance DCR Provider updates.pdf#page=2"}], 'model': 'gpt-3.5-turbo-0125', 'function_call': None, 'max_tokens': 2000, 'seed': 123, 'temperature': 0, 'tool_choice': {'type': 'function', 'function': {'name': 'Followup'}}, 'tools': [{'type': 'function', 'function': {'name': 'Followup', 'description': 'Followup questions structure for testing', 'parameters': {'properties': {'followup': {'items': {'type': 'string'}, 'title': 'Followup', 'type': 'array'}}, 'required': ['followup'], 'type': 'object'}}}], 'top_p': 1}}