## Example with LLama

In [3]:
from langchain.document_loaders import JSONLoader
import json

print(JSONLoader)
questions = None

with open('./data/questions.json', 'r') as file:
    # Load the JSON data
    data = file.read()
    questions = data

loader = JSONLoader(file_path="./data/questions.json", jq_schema="[]", text_content=False)
data_loaded = loader.load()

<class 'langchain.document_loaders.json_loader.JSONLoader'>


In [7]:
from langchain.llms import LlamaCpp
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain import PromptTemplate, LLMChain
from dotenv import load_dotenv
import os

load_dotenv()

model_path = os.getenv("MODEL_PATH")

template = """Question: {question}

Answer: Let's think step by step
"""

prompt = PromptTemplate(template=template, input_variables=["question"])

callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

llm = LlamaCpp(
    model_path=model_path,
    callback_manager=callback_manager,
    verbose=True
)

llm_chain = LLMChain(prompt=prompt, llm=llm)

question = "how many type values accept a json file?"

llm_chain.run(question)

Python-dotenv could not parse statement starting at line 1
llama.cpp: loading model from ./models/ggml-vic13b-q5_1.bin
llama_model_load_internal: format     = ggjt v2 (pre #1508)
llama_model_load_internal: n_vocab    = 32000
llama_model_load_internal: n_ctx      = 512
llama_model_load_internal: n_embd     = 5120
llama_model_load_internal: n_mult     = 256
llama_model_load_internal: n_head     = 40
llama_model_load_internal: n_layer    = 40
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: ftype      = 9 (mostly Q5_1)
llama_model_load_internal: n_ff       = 13824
llama_model_load_internal: n_parts    = 1
llama_model_load_internal: model size = 13B
llama_model_load_internal: ggml ctx size =    0.09 MB
llama_model_load_internal: mem required  = 11359.05 MB (+ 1608.00 MB per state)
.
llama_init_from_file: kv self size  =  400.00 MB
AVX = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD


1. Create your TypeScript schema for the data you want to validate.
2. Use `infer` keyword to automatically infer the type of the json data based on the schema.
3. Check if the JSON data conforms to the type defined in the schema using the `instanceof` operator.
4. If it does, use the validated data!

Here's an example:
```typescript
type Task = {
  id: number;
  name: string;
};

function validateTask(task: Task): task is Task {
  if (task.id === 42) {
    return task;
  } else {
    throw new Error("Invalid task");
  }
}

const myTask: Task = { id: 42, name: "Build a snowman" };

const validatedTask = validateTask(myTask);
```


llama_print_timings:        load time =  1115.87 ms
llama_print_timings:      sample time =   134.65 ms /   199 runs   (    0.68 ms per token)
llama_print_timings: prompt eval time =  2699.98 ms /    24 tokens (  112.50 ms per token)
llama_print_timings:        eval time = 21745.32 ms /   198 runs   (  109.82 ms per token)
llama_print_timings:       total time = 27637.88 ms


'\n1. Create your TypeScript schema for the data you want to validate.\n2. Use `infer` keyword to automatically infer the type of the json data based on the schema.\n3. Check if the JSON data conforms to the type defined in the schema using the `instanceof` operator.\n4. If it does, use the validated data!\n\nHere\'s an example:\n```typescript\ntype Task = {\n  id: number;\n  name: string;\n};\n\nfunction validateTask(task: Task): task is Task {\n  if (task.id === 42) {\n    return task;\n  } else {\n    throw new Error("Invalid task");\n  }\n}\n\nconst myTask: Task = { id: 42, name: "Build a snowman" };\n\nconst validatedTask = validateTask(myTask);\n```'

In [64]:
template = """Question: {question}

Answer: Let's think step by step
"""

prompt = PromptTemplate(template=template, input_variables=["question"])

callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

llm = LlamaCpp(
    model_path=model_path,
    callback_manager=callback_manager,
    verbose=True
)

llm_chain = LLMChain(prompt=prompt, llm=llm)

question = "How much knowledge do you have?"

llm_chain.run(question)

llama.cpp: loading model from ./models/ggml-vic13b-q5_1.bin
llama_model_load_internal: format     = ggjt v2 (pre #1508)
llama_model_load_internal: n_vocab    = 32000
llama_model_load_internal: n_ctx      = 512
llama_model_load_internal: n_embd     = 5120
llama_model_load_internal: n_mult     = 256
llama_model_load_internal: n_head     = 40
llama_model_load_internal: n_layer    = 40
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: ftype      = 9 (mostly Q5_1)
llama_model_load_internal: n_ff       = 13824
llama_model_load_internal: n_parts    = 1
llama_model_load_internal: model size = 13B
llama_model_load_internal: ggml ctx size =    0.09 MB
llama_model_load_internal: mem required  = 11359.05 MB (+ 1608.00 MB per state)
.
llama_init_from_file: kv self size  =  400.00 MB
AVX = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 | 



Step 1:
"How much" refers to the quantity or amount of something. It is a measure word that is used before a noun to indicate the quantity of the noun being referred to. For example, "how much" ice cream do you want?

Step 2:
In this question, "knowledge" is the noun being referred to. It refers to the understanding or information about something. Therefore, the correct answer would be "I have some knowledge."


llama_print_timings:        load time =   964.30 ms
llama_print_timings:      sample time =    71.58 ms /   104 runs   (    0.69 ms per token)
llama_print_timings: prompt eval time =  2353.67 ms /    22 tokens (  106.98 ms per token)
llama_print_timings:        eval time = 11084.48 ms /   103 runs   (  107.62 ms per token)
llama_print_timings:       total time = 15095.36 ms


'\nStep 1:\n"How much" refers to the quantity or amount of something. It is a measure word that is used before a noun to indicate the quantity of the noun being referred to. For example, "how much" ice cream do you want?\n\nStep 2:\nIn this question, "knowledge" is the noun being referred to. It refers to the understanding or information about something. Therefore, the correct answer would be "I have some knowledge."'

In [65]:
question = "Can you give me an example oa reduce function in javascript?"

llm_chain.run(question)

Llama.generate: prefix-match hit



What do we want to achieve with the "reduce" function? We want to perform some operation on each element of an array and return a single value. This is called "folding" an array into a single value.

Now, let's try to understand what the reduce function does. The "reduce" function takes two arguments: a callback function and an initial value. The callback function will be invoked on each element of the array with the current element as its argument. Then, the callback function returns the next value in the sequence based on the current element and the previous accumulator value.

Here's an example to help you understand reduce better:
```javascript
const numbers = [1, 2, 3, 4, 5];

// This is the initial value
let result = 0;

// Callback function that adds each element of the array to the previous accumulator value
function addToAccumulator(currentValue, currentElement) {
  return currentValue + currentElement;
}

// Use reduce function to sum up all the numbers in the array
const su


llama_print_timings:        load time =   964.30 ms
llama_print_timings:      sample time =   186.54 ms /   256 runs   (    0.73 ms per token)
llama_print_timings: prompt eval time =  2404.48 ms /    24 tokens (  100.19 ms per token)
llama_print_timings:        eval time = 28767.47 ms /   256 runs   (  112.37 ms per token)
llama_print_timings:       total time = 35434.71 ms


'\nWhat do we want to achieve with the "reduce" function? We want to perform some operation on each element of an array and return a single value. This is called "folding" an array into a single value.\n\nNow, let\'s try to understand what the reduce function does. The "reduce" function takes two arguments: a callback function and an initial value. The callback function will be invoked on each element of the array with the current element as its argument. Then, the callback function returns the next value in the sequence based on the current element and the previous accumulator value.\n\nHere\'s an example to help you understand reduce better:\n```javascript\nconst numbers = [1, 2, 3, 4, 5];\n\n// This is the initial value\nlet result = 0;\n\n// Callback function that adds each element of the array to the previous accumulator value\nfunction addToAccumulator(currentValue, currentElement) {\n  return currentValue + currentElement;\n}\n\n// Use reduce function to sum up all the numbers i

## Sequential chains with LLama

In [40]:
from langchain.chains import SimpleSequentialChain

llm = LlamaCpp(
    model_path=model_path,
    callback_manager=callback_manager,
    verbose=True
)

template = """
Give me a famous artist from the country the user suggests.

% USER SUGGESTION
{user_suggestion}

YOUR REPONSE:
"""

prompt_template = PromptTemplate(input_variables=["user_suggestion"], template=template)

artist_chain = LLMChain(llm=llm, prompt=prompt_template)


llama.cpp: loading model from ./models/ggml-vic13b-q5_1.bin
llama_model_load_internal: format     = ggjt v2 (pre #1508)
llama_model_load_internal: n_vocab    = 32000
llama_model_load_internal: n_ctx      = 512
llama_model_load_internal: n_embd     = 5120
llama_model_load_internal: n_mult     = 256
llama_model_load_internal: n_head     = 40
llama_model_load_internal: n_layer    = 40
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: ftype      = 9 (mostly Q5_1)
llama_model_load_internal: n_ff       = 13824
llama_model_load_internal: n_parts    = 1
llama_model_load_internal: model size = 13B
llama_model_load_internal: ggml ctx size =    0.09 MB
llama_model_load_internal: mem required  = 11359.05 MB (+ 1608.00 MB per state)
.
llama_init_from_file: kv self size  =  400.00 MB
AVX = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 | 


In [None]:
template = """
Given the artist, provide one historical fact about him or her.
% ARTIST
{user_artist}

YOUR REPONSE:
"""

prompt_template = PromptTemplate(input_variables=["user_artist"], template=template)
history_chain = LLMChain(llm=llm, prompt=prompt_template)

In [42]:
all_chains = SimpleSequentialChain(chains=[artist_chain, history_chain], verbose=True)

In [None]:
review = all_chains.run("Scotland")

In [44]:
template = """Your job is to come up with a classic dish from the area that the users suggests.
% USER LOCATION
{user_location}

YOUR RESPONSE:
"""
prompt_template = PromptTemplate(input_variables=["user_location"], template=template)

# Holds my 'location' chain
location_chain = LLMChain(llm=llm, prompt=prompt_template)

In [45]:
template = """Given a meal, give a short and simple recipe on how to make that dish at home.
% MEAL
{user_meal}

YOUR RESPONSE:
"""
prompt_template = PromptTemplate(input_variables=["user_meal"], template=template)

# Holds my 'meal' chain
meal_chain = LLMChain(llm=llm, prompt=prompt_template)

In [46]:
overall_chain = SimpleSequentialChain(chains=[location_chain, meal_chain], verbose=True)

In [47]:
review = overall_chain.run("Rome")



[1m> Entering new SimpleSequentialChain chain...[0m


Llama.generate: prefix-match hit


Well, Rome is known for its delicious and diverse cuisine. One classic dish from this region that I would recommend trying is "Carbonara." It's a traditional pasta dish made with eggs, bacon or pancetta, cheese, and spices. The ingredients are cooked together in a single pot, creating a rich and savory sauce that's perfect for a hearty meal. It's easy to make and always a crowd pleaser. Do you have any other specific ingredient you would like to include in the dish?[36;1m[1;3mWell, Rome is known for its delicious and diverse cuisine. One classic dish from this region that I would recommend trying is "Carbonara." It's a traditional pasta dish made with eggs, bacon or pancetta, cheese, and spices. The ingredients are cooked together in a single pot, creating a rich and savory sauce that's perfect for a hearty meal. It's easy to make and always a crowd pleaser. Do you have any other specific ingredient you would like to include in the dish?[0m



llama_print_timings:        load time =   967.36 ms
llama_print_timings:      sample time =    85.86 ms /   125 runs   (    0.69 ms per token)
llama_print_timings: prompt eval time =  4081.68 ms /    40 tokens (  102.04 ms per token)
llama_print_timings:        eval time = 13386.84 ms /   124 runs   (  107.96 ms per token)
llama_print_timings:       total time = 19436.32 ms
Llama.generate: prefix-match hit


I don't think so[33;1m[1;3mI don't think so[0m

[1m> Finished chain.[0m



llama_print_timings:        load time =   967.36 ms
llama_print_timings:      sample time =     4.97 ms /     7 runs   (    0.71 ms per token)
llama_print_timings: prompt eval time = 16304.39 ms /   162 tokens (  100.64 ms per token)
llama_print_timings:        eval time =   667.13 ms /     6 runs   (  111.19 ms per token)
llama_print_timings:       total time = 17091.85 ms


## Trying Open AI

In [54]:
import os
from dotenv import load_dotenv
from langchain.llms import OpenAI

load_dotenv()

open_api_key = os.getenv("OPENAI_API_KEY")

newLLm = OpenAI(temperature=1, openai_api_key=open_api_key)

template = """Your job is to come up with a classic dish from the area that the users suggests.
% USER LOCATION
{user_location}

YOUR RESPONSE:
"""
prompt_template = PromptTemplate(input_variables=["user_location"], template=template)

# Holds my 'location' chain
location_chain = LLMChain(llm=newLLm, prompt=prompt_template)

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


In [55]:
template = """Given a meal, give a short and simple recipe on how to make that dish at home.
% MEAL
{user_meal}

YOUR RESPONSE:
"""
prompt_template = PromptTemplate(input_variables=["user_meal"], template=template)

# Holds my 'meal' chain
meal_chain = LLMChain(llm=newLLm, prompt=prompt_template)
overall_chain = SimpleSequentialChain(chains=[location_chain, meal_chain], verbose=True)

In [56]:
review = overall_chain.run("Rome")



[1m> Entering new SimpleSequentialChain chain...[0m
[36;1m[1;3mThe classic dish from Rome is Spaghetti alla Carbonara. It is made with eggs, cheese, and bacon, all cooked together with spaghetti. It is a rich, flavorful dish that is sure to please![0m
[33;1m[1;3m
Spaghetti alla Carbonara 
Ingredients: 
- 1 lb of spaghetti 
- Freshly ground black pepper
- 2 eggs 
- 1 cup of Parmesan cheese 
- 6-8 bacon slices 
- Chopped parsley 

Instructions: 
1. Cook the spaghetti in salted boiling water until al dente.
2. In the meantime, in a bowl, beat the eggs with a pinch of salt and the Parmesan cheese. 
3. Fry the bacon until crispy, and then remove it from the heat. 
4. Once the spaghetti is cooked, drain it and add it to the pan with the bacon. 
5. Mix together and add the egg and cheese mixture. 
6. Cook on low heat for a few minutes, stirring constantly. 
7. When everything is combined, remove the pan from the heat and add freshly ground black pepper and chopped parsley. 
8. Serve 

In [57]:
template = """
Give me a famous artist from the country the user suggests.

% USER SUGGESTION
{user_suggestion}

YOUR REPONSE:
"""

prompt_template = PromptTemplate(input_variables=["user_suggestion"], template=template)

artist_chain = LLMChain(llm=newLLm, prompt=prompt_template)

In [59]:
template = """
Given the artist, provide one historical fact about him or her.
% ARTIST
{user_artist}

YOUR REPONSE:
"""

prompt_template = PromptTemplate(input_variables=["user_artist"], template=template)
history_chain = LLMChain(llm=newLLm, prompt=prompt_template)
all_chains = SimpleSequentialChain(chains=[artist_chain, history_chain], verbose=True)

review = all_chains.run("Scotland")



[1m> Entering new SimpleSequentialChain chain...[0m
[36;1m[1;3mDavid Tennant, a Scottish actor famous for his roles in Doctor Who and Broadchurch.[0m
[33;1m[1;3mIn 2009, David Tennant was appointed an OBE (Officer of the Order of the British Empire) for his services to drama.[0m

[1m> Finished chain.[0m


### Sumarization

In [61]:
from langchain.chains.summarize import load_summarize_chain
from langchain.document_loaders import JSONLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

llm = LlamaCpp(
    model_path=model_path,
    callback_manager=callback_manager,
    verbose=True
)

loader = JSONLoader(file_path="./data/questions.json", jq_schema="[]", text_content=False)
data_loaded = loader.load()

# Get your splitter ready
text_splitter = RecursiveCharacterTextSplitter(chunk_size=700, chunk_overlap=50)

# Split your docs into texts
texts = text_splitter.split_documents(data_loaded)

# There is a lot of complexity hidden in this one line. I encourage you to check out the video above for more detail
chain = load_summarize_chain(llm, chain_type="map_reduce", verbose=True)

chain.run(texts)

llama.cpp: loading model from ./models/ggml-vic13b-q5_1.bin
llama_model_load_internal: format     = ggjt v2 (pre #1508)
llama_model_load_internal: n_vocab    = 32000
llama_model_load_internal: n_ctx      = 512
llama_model_load_internal: n_embd     = 5120
llama_model_load_internal: n_mult     = 256
llama_model_load_internal: n_head     = 40
llama_model_load_internal: n_layer    = 40
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: ftype      = 9 (mostly Q5_1)
llama_model_load_internal: n_ff       = 13824
llama_model_load_internal: n_parts    = 1
llama_model_load_internal: model size = 13B
llama_model_load_internal: ggml ctx size =    0.09 MB
llama_model_load_internal: mem required  = 11359.05 MB (+ 1608.00 MB per state)
.
AVX = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 | 
llama_init_from_file: kv self size  =  400.00 MB




[1m> Entering new MapReduceDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mWrite a concise summary of the following:


"[]"


CONCISE SUMMARY:[0m

The article describes how the COVID-19 pandemic has affected global supply chains and highlights several key challenges that businesses are facing. It notes that many companies have had to pivot their operations, source from new suppliers, and implement risk management strategies to mitigate the impact of the pandemic on their supply chain. The article also discusses how advances in technology such as blockchain and machine learning can help companies better manage supply chain risks and improve transparency. Finally, it emphasizes the importance of collaboration between businesses, governments, and other stakeholders to ensure that global supply chains remain resilient in the face of future crises.


llama_print_timings:        load time =  1167.99 ms
llama_print_timings:      sample time =    98.51 ms /   144 runs   (    0.68 ms per token)
llama_print_timings: prompt eval time =  3003.24 ms /    26 tokens (  115.51 ms per token)
llama_print_timings:        eval time = 15473.40 ms /   143 runs   (  108.21 ms per token)
llama_print_timings:       total time = 20748.22 ms
None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.



[1m> Finished chain.[0m


Downloading (…)olve/main/vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]



[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mWrite a concise summary of the following:


"
The article describes how the COVID-19 pandemic has affected global supply chains and highlights several key challenges that businesses are facing. It notes that many companies have had to pivot their operations, source from new suppliers, and implement risk management strategies to mitigate the impact of the pandemic on their supply chain. The article also discusses how advances in technology such as blockchain and machine learning can help companies better manage supply chain risks and improve transparency. Finally, it emphasizes the importance of collaboration between businesses, governments, and other stakeholders to ensure that global supply chains remain resilient in the face of future crises."


CONCISE SUMMARY:[0m


Llama.generate: prefix-match hit



The COVID-19 pandemic has disrupted global supply chains, forcing companies to adapt by sourcing from new suppliers and implementing risk management strategies. Technology such as blockchain and machine learning can help mitigate supply chain risks and improve transparency. Collaboration between stakeholders is crucial for ensuring the resilience of global supply chains in future crises.
[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m



llama_print_timings:        load time =  1167.99 ms
llama_print_timings:      sample time =    62.62 ms /    87 runs   (    0.72 ms per token)
llama_print_timings: prompt eval time = 16353.07 ms /   152 tokens (  107.59 ms per token)
llama_print_timings:        eval time =  9769.86 ms /    87 runs   (  112.30 ms per token)
llama_print_timings:       total time = 27585.56 ms


'\nThe COVID-19 pandemic has disrupted global supply chains, forcing companies to adapt by sourcing from new suppliers and implementing risk management strategies. Technology such as blockchain and machine learning can help mitigate supply chain risks and improve transparency. Collaboration between stakeholders is crucial for ensuring the resilience of global supply chains in future crises.'

In [62]:
from langchain.chains.summarize import load_summarize_chain
from langchain.document_loaders import JSONLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

import os
from dotenv import load_dotenv
from langchain.llms import OpenAI

load_dotenv()

open_api_key = os.getenv("OPENAI_API_KEY")

newLLm = OpenAI(temperature=1, openai_api_key=open_api_key)

loader = JSONLoader(file_path="./data/questions.json", jq_schema="[]", text_content=False)
data_loaded = loader.load()

# Get your splitter ready
text_splitter = RecursiveCharacterTextSplitter(chunk_size=700, chunk_overlap=50)

# Split your docs into texts
texts = text_splitter.split_documents(data_loaded)

# There is a lot of complexity hidden in this one line. I encourage you to check out the video above for more detail
chain = load_summarize_chain(newLLm, chain_type="map_reduce", verbose=True)

chain.run(texts)



[1m> Entering new MapReduceDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mWrite a concise summary of the following:


"[]"


CONCISE SUMMARY:[0m

[1m> Finished chain.[0m


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mWrite a concise summary of the following:


" This article is blank."


CONCISE SUMMARY:[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


'\n\nThe article has no content.'