In [4]:
import git2vectors

  from tqdm.autonotebook import tqdm


In [5]:
repo = "https://github.com/hwchase17/langchain"
vectorstore = git2vectors.create_vectorstore(repo)

Fetching data from git repo...
Clearing index testindex from previous runs...
Done!

describe_index_stats:
{'dimension': 1536,
 'index_fullness': 0.1,
 'namespaces': {'': {'vector_count': 4196}},
 'total_vector_count': 4196}


In [34]:
from langchain.prompts import PromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.chains import LLMChain

llm = ChatOpenAI(
    openai_api_key=git2vectors.OPENAI_API_KEY,
    model_name='gpt-4',
    temperature=0.2
)

template="""A user is asking a question about a code repository. Here is there query:
{query}

Here are some documents containing similar information to the query:
{similar_documents}

If you don't know say "idk" else answer the question.
"""

prompt = PromptTemplate(
    input_variables=["query", "similar_documents"],
    template=template,
)
chain = LLMChain(llm=llm, prompt=prompt)

In [37]:
query = "How can I count the tokens in a PromptTemplate?"

similarity_resp = vectorstore.similarity_search_with_score(
    query, k=10
)

chain_inputs = {
    "query" : query,
    "similar_documents": [doc.page_content for doc, _ in similarity_resp]
}
chain_resp = chain(chain_inputs)

if "idk" in chain_resp['text']:
    print("RAG FAILED")
else:
    print(chain_resp['text'])

To count the tokens in a PromptTemplate, you can use the `len()` function in Python. First, you need to convert the PromptTemplate into a string, and then split the string into tokens (words). Here's an example:

```python
from langchain.prompts.prompt import PromptTemplate

template = (
    """Question: {question}
    Answer: Let's think step by step."""
)
prompt = PromptTemplate(template=template, input_variables=["question"])

# Convert the PromptTemplate to a string
template_str = str(prompt)

# Split the string into tokens (words)
tokens = template_str.split()

# Count the tokens
token_count = len(tokens)

print("Token count:", token_count)
```

This code will output the number of tokens in the given PromptTemplate.


In [48]:
# template_str = prompt.template
# for var in prompt.input_variables:
#     template_str.replace("{var}", "")

# # count number of tokens in template_str
# import tiktoken
# tokenizer = tiktoken.Tokenizer()
# tokenizer.tokenize(template_str)