# Mistral AI

In [26]:
from mistralai.client import MistralClient
from mistralai.models.chat_completion import ChatMessage
import os, tomli

In [27]:
with open('.streamlit/secrets.toml','rb') as f:
    secrets = tomli.load(f)
    
api_key = secrets["MISTRAL_API_KEY"]
# api_key = os.environ["MISTRAL_API_KEY"]
model = "mistral-tiny"

client = MistralClient(api_key=api_key)

In [2]:
messages = [
    ChatMessage(role="user", content="What is the best French cheese?")
]
messages

[ChatMessage(role='user', content='What is the best French cheese?')]

In [3]:
def chat2struct(chat):
    return [{'role': m.role, 'content': m.content} for m in chat]
chat2struct(messages)

[{'role': 'user', 'content': 'What is the best French cheese?'}]

In [4]:
m = [{'role': 'system', 'content': 'If I say hello, say world'}]

In [5]:
def struct2chat(struct):
    return [ChatMessage(role=m['role'], content=m['content']) for m in struct]
struct2chat(m)

[ChatMessage(role='system', content='If I say hello, say world')]

In [16]:
ChatMessage(role=m[0]['role'], content=m[0]['content'])

ChatMessage(role='system', content='If I say hello, say world')

In [18]:
# No streaming
chat_response = client.chat(
    model=model,
    messages=messages,
)

print(chat_response.choices[0].message.content)

It is subjective to determine the "best" French cheese as it depends on personal preferences. Some popular and highly regarded French cheeses are:

1. Roquefort: A blue-veined cheese from the Massif Central region, known for its strong, pungent flavor and distinctive tang.

2. Comté: A nutty, buttery, and slightly sweet cheese from the Franche-Comté region, made from unpasteurized cow's milk.

3. Camembert de Normandie: A soft, Earthy, and tangy cheese from the Normandy region, famous for its white mold rind.

4. Brie de Meaux: A creamy and mild soft-ripened cheese from the Île-de-France region, known for its edible white rind and rich, buttery flavor.

5. Munster: A pungent and smelly cheese from the Alsace region, characterized by its orange-red rind and strong, distinctive flavor.

6. Chaource: A bloomy-rind, mild, and creamy cheese from the Île-de-France region, with a slightly tangy taste and a buttery texture.

7. Époisses de Bourgogne: A soft, smelly, and runny cheese from the B

## Summary

In [None]:
!pip install webvtt-py

In [7]:
import os, webvtt

In [8]:
os.listdir('vtt')

['YannMike_2023-03-08.vtt']

In [9]:
file = 'YannMike_2023-03-08.vtt'
chat = webvtt.read('vtt/'+file)
str = []
for caption in chat:
    str.append(caption.text)
sep = '\n'
convo = sep.join(str)
with open('txt/'+file.replace('vtt','txt'),mode='w') as f:
    f.write(convo)

In [13]:
import math
words = convo.split() # split the string into words 
num_tokens = math.floor(len(words) * 3/4) # 3/4 of the words are tokens
num_tokens

68

In [17]:
context = 'summarize the following conversation'
model = 'mistral-tiny'
messages = [
    ChatMessage(role="system", content=context),
    ChatMessage(role="user", content=convo)
]
# messages = [
#         {'role': 'system','content': context},
#         {'role': 'user', 'content': convo}
#             ]
completion = client.chat(
    model=model,
    messages=messages
)
completion.choices[0].message.content

'The speaker is planning to conduct an experiment where they record conversations and generate VTT files. They have developed a Python app to process these files and intend to use the ChatGPT API for analysis. However, they are having trouble finding time to implement this due to various commitments.'

## Question Answering

https://docs.mistral.ai/guides/basic-RAG/

### RAG from scratch

In [18]:
from mistralai.client import MistralClient, ChatMessage
import numpy as np
import os

In [20]:
import requests

response = requests.get('https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt')
text = response.text

In [24]:
folder = "book"
filename = "essay.txt"

# Create the folder if it doesn't exist
if not os.path.exists(folder):
    os.makedirs(folder)
    
with open(os.path.join(folder, filename), "wb") as file:
    file.write(response.text.encode('utf-8'))

In [25]:
chunk_size = 2048
chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
len(chunks)

37

In [28]:
def get_text_embedding(input):
    embeddings_batch_response = client.embeddings(
          model="mistral-embed",
          input=input
      )
    return embeddings_batch_response.data[0].embedding
text_embeddings = np.array([get_text_embedding(chunk) for chunk in chunks])

In [31]:
import faiss

d = text_embeddings.shape[1]
index = faiss.IndexFlatL2(d)
index.add(text_embeddings)

In [44]:
question = "Are we going to save democracy?"
question_embeddings = np.array([get_text_embedding(question)])

In [45]:
question_embeddings

array([[-0.01547241,  0.00917053,  0.01520538, ..., -0.05145264,
         0.01053619, -0.01382446]])

In [46]:
D, I = index.search(question_embeddings, k=2) # distance, index
retrieved_chunk = [chunks[i] for i in I.tolist()[0]]

In [47]:
prompt = f"""
Context information is below.
---------------------
{retrieved_chunk}
---------------------
Given the context information and not prior knowledge, answer the query.
Query: {question}
Answer:
"""

In [48]:
def run_mistral(user_message, model="mistral-medium-latest"):
    messages = [
        ChatMessage(role="user", content=user_message)
    ]
    chat_response = client.chat(
        model=model,
        messages=messages
    )
    return (chat_response.choices[0].message.content)

run_mistral(prompt)

'The provided context does not mention or discuss democracy, politics, or saving democracy. Therefore, I cannot provide an answer to this query based on the given context information.'

### Impromptu

In [1]:
import requests
import os

url = "https://www.impromptubook.com/wp-content/uploads/2023/03/impromptu-rh.pdf"
folder = "book"
filename = "impromptu-rh.pdf"

# Create the folder if it doesn't exist
if not os.path.exists(folder):
    os.makedirs(folder)

# Download the file
response = requests.get(url)
response.raise_for_status()

# Save the file
with open(os.path.join(folder, filename), "wb") as file:
    file.write(response.content)

print("PDF file downloaded and saved successfully.")

PDF file downloaded and saved successfully.


In [2]:
# break down into 1 pdf per page
import pypdf

def pdf_to_pages(file):
	"extract text (pages) from pdf file"
	pages = []
	pdf = pypdf.PdfReader(file)
	for p in range(len(pdf.pages)):
		page = pdf.pages[p]
		text = page.extract_text()
		pages += [text]
	return pages

file = "book/impromptu-rh.pdf"
pages = pdf_to_pages(file)

In [14]:
print(pages[31])

25
EDUCATION
If Hollywood central casting ever wants to portray a 
beloved instructor from an idealized past, they could do worse 
than University of Texas at Austin Professor Steven Mintz. Over 
four decades of teaching, Professor Mintz has published books 
and articles on topics as diverse as the psychology of prominent 
Anglo-American literary families and political good vs. evil.
In collared shirts, with graying hair, Mintz can’t suppress his 
smile as he teaches. Students adore him: among hundreds who 
have anonymously rated Mintz online, his average rating is a 
perfect five out of five, with posts such as “easily the best orator 
I’ve ever witnessed,” “his lectures feel more like storytelling 
than class,” and “passionate about what he teaches.”
Professor Mintz, frankly, excelled as a professor long before the 
development of LLMs. So you might have expected him to have 
reacted with indifference or hostility to the late 2022 public 
release of GPT-4’s cousin, ChatGPT.
Instead, 

In [17]:
if not os.path.exists("book/chap1"):
    os.makedirs("book/chap1")
for p in pages[31:53]: # PDF pages 32-54 - chapter 1 on Education
    with open(f"book/chap1/impromptu-rh_ch1_p{pages.index(p)+1}.txt",mode='w',encoding='utf-8') as f:
        f.write(p)

In [59]:
text = pages[31:53]
chunk_size = 2048
chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]

In [60]:
def get_text_embedding(input):
    embeddings_batch_response = client.embeddings(
          model="mistral-embed",
          input=input
      )
    return embeddings_batch_response.data[0].embedding
text_embeddings = np.array([get_text_embedding(chunk) for chunk in chunks])

In [61]:
import faiss
d = text_embeddings.shape[1]
index = faiss.IndexFlatL2(d)
index.add(text_embeddings)

In [62]:
question = "How can AI be used for Education?"
question_embeddings = np.array([get_text_embedding(question)])
D, I = index.search(question_embeddings, k=2) # distance, index
retrieved_chunk = [chunks[i] for i in I.tolist()[0]]
prompt = f"""
Context information is below.
---------------------
{retrieved_chunk}
---------------------
Given the context information and not prior knowledge, answer the query.
Query: {question}
Answer:
"""
def run_mistral(user_message, model="mistral-medium-latest"):
    messages = [
        ChatMessage(role="user", content=user_message)
    ]
    chat_response = client.chat(
        model=model,
        messages=messages
    )
    return (chat_response.choices[0].message.content)

run_mistral(prompt)

'AI can be used in various ways for education, such as:\n\n1. Personalized Learning: AI can help teachers create customized learning paths for students based on their prior knowledge, skills, interests, and goals. This can help students learn at their own pace and in their own style.\n2. Interactive Learning: AI can create simulations, games, and virtual reality experiences to make learning more engaging and interactive. These tools can help students develop critical thinking, problem-solving, and teamwork skills.\n3. Collaborative Learning: AI can facilitate collaborative learning experiences by generating prompts and scenarios to foster creative problem-solving, communication, and teamwork.\n4. Feedback and Assessment: AI can provide immediate feedback and assessment to students, helping them identify their strengths and areas for improvement. This can help teachers monitor student progress and adjust instruction accordingly.\n5. Content Creation: AI can help teachers create and cura