In [None]:
# conda/pip install langchain
# conda/pip install langchain-ollama
# conda/pip install llama-index


## Embedding Exersice

In [1]:
import ollama

In [5]:
em = ollama.embeddings(model="llama3.1", prompt="hello world")

In [9]:
em["embedding"][:5]

[-0.8186560869216919,
 -1.8279454708099365,
 1.3133100271224976,
 -0.016675494611263275,
 2.011758804321289]

In [10]:
len(em["embedding"])

4096

In [1]:
import llama_index

In [2]:
from llama_index.embeddings.ollama import OllamaEmbedding

In [3]:
ollama_embedding = OllamaEmbedding(model_name="llama3.1") 

In [4]:
em_query = ollama_embedding.get_query_embedding("hello world")
em_query[:5]

[-0.8110920190811157,
 -1.7886261940002441,
 1.3474432229995728,
 0.02011205442249775,
 2.2166240215301514]

In [5]:
em_txt = ollama_embedding.get_text_embedding("hello world")
em_txt[:5]

[-0.8110920190811157,
 -1.7886261940002441,
 1.3474432229995728,
 0.02011205442249775,
 2.2166240215301514]

In [6]:
import numpy as np
print(len(em_query), len(em_txt), np.mean(np.array(em_query) - np.array(em_txt)), np.std(np.array(em_query) - np.array(em_txt)))

4096 4096 0.0 0.0


In [7]:
long_txt = """
TRUMP-2-DRUNK DRINKING GAME Player(s): 1 to Infinite Age: 18/21 (depending on legality in your region) Timeframe: As long as you can hold a beverage in your hand, 
because this might take awhile Rules: Every player sits in front of the TV, with a beverage in hand (you should choose a good session beverage,
this is going to get you proper fucked). As the impeachment process is progressing, players should be listening for the various chugging rules. 
When a rule is violated, each player must take a sip from their respective beverage. Don't rush this, honestly - be smart. 
Chug Rules  When someone says ""Quid Pro Quo"", raise your drink and salute, then chug When someone calls the whistleblower a liar/traitor, 
make a pirate arrrgh and sip When anyone uses a whataboutism, stand up, spin around 360, and consume If anyone mentions ""fake news"", 
smack your forehead and say ""duh"", then imbibe If anyone mentions Giuliani, take the little green man off your glass, 
then take a glug Put your thumb on the table anytime someone says ""witch hunt:, and the last person to do so takes one Whenever Pelosi is mentioned, 
the first person to ""Pelosi clap"" can give a drink to another player If someone says ""Trump has done nothing illegal"", everyone make a siren sound and guzzel  
"""

In [8]:
em_query = ollama_embedding.get_query_embedding(long_txt)
em_txt = ollama_embedding.get_text_embedding(long_txt)

In [9]:
print(len(em_query), len(em_txt), np.mean(np.array(em_query) - np.array(em_txt)), np.std(np.array(em_query) - np.array(em_txt)))

4096 4096 0.0 0.0


In [10]:
em_txt_batch = ollama_embedding.get_text_embedding_batch(["hello world", "Black Myth: Wukong", "PC World"], show_progress=True)
for each in em_txt_batch:
    print(each[:5])

Generating embeddings:   0%|          | 0/3 [00:00<?, ?it/s]

[-0.8110920190811157, -1.7886261940002441, 1.3474432229995728, 0.02011205442249775, 2.2166240215301514]
[-3.583725929260254, 0.5459046363830566, 2.9241209030151367, 2.0024449825286865, 0.6277766823768616]
[0.22723498940467834, -3.6325671672821045, 3.790321111679077, 0.7176727652549744, -0.22078262269496918]


## Preprocess a Reddit dataset

Reddit dataset url: : https://www.kaggle.com/datasets/rodmcn/askreddit-questions-and-answers/data

In [11]:

import polars as pl
import numpy as np
# ollama
import ollama
import llama_index

In [20]:
df = pl.read_csv('./data/reddit_answers_long.csv', separator=';')
df

Unnamed: 0_level_0,q_id,text,votes
i64,str,str,f64
0,"""hvbvpz""","""Two pet ducks. You may be temp…",2359.0
1,"""hvbvpz""","""Nice try Jeff Bezos""",764.0
2,"""hvbvpz""","""A curved shower rod. Seriously…",1525.0
3,"""hvbvpz""","""Another monitor. Your producti…",1227.0
4,"""hvbvpz""","""A nasal irrigation kit - eithe…",659.0
…,…,…,…
5940821,"""3kf27v""","""Money is the most important th…",4.0
5940823,"""3kf27v""","""""""If you can't learn how to sh…",6.0
5940824,"""3kf27v""","""Everyone in college writes in …",5.0
5940825,"""3kf27v""","""""""Everything happens for a rea…",7.0


In [24]:
df_sorted = df.with_columns(pl.col('text').map_elements(lambda x: len(x), return_dtype=pl.Int32).alias('answer_len_in_char')) \
    .sort('answer_len_in_char', descending=True)
df_sorted

Unnamed: 0_level_0,q_id,text,votes,answer_len_in_char
i64,str,str,f64,i32
242329,"""erd482""","""It's nearly the beginning of a…",2.0,4564852
3571290,"""hf4cta""","""Me: I think that in order to s…",4.0,2906243
2770234,"""nslkd/""","""What if he has to go poop? Ah…",1.0,2154598
4785025,"""su8sn/""","""(and its not unusual for my co…",3.0,1891855
5551815,"""iptrin""","""\ it's never the same spot on …",45.0,1819715
…,…,…,…,…
5909814,"""j2dozl""","""E""",3.0,1
5912551,"""1cfbzg""",""".""",14.0,1
5913158,"""g4fslr""","""K""",3.0,1
5917507,"""hb2uy9""","""K""",39.0,1


In [23]:
def split_text_into_chunks(text, chunk_size=512):
    return [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]

In [None]:
chunks = split_text_into_chunks(long_txt, chunk_size=512)

for i, chunk in enumerate(chunks):
    print(f'Chunk {i+1}:')
    print(chunk)
    print()

In [26]:
%%time

all_chunks = []
for row in df_sorted.rows():
    long_txt = row[2]
    chunks = split_text_into_chunks(long_txt, chunk_size=512)
    all_chunks.extend(chunks)

print(len(all_chunks))

6830889
CPU times: user 5.27 s, sys: 1.09 s, total: 6.36 s
Wall time: 6.93 s


In [27]:
em_batch = ollama_embedding.get_text_embedding_batch(all_chunks, show_progress=True)

Generating embeddings:   0%|                                                                                                                                         | 3920/6830889 [1:11:15<1966:00:35,  1.04s/it]
KeyboardInterrupt



## Sample some related answers from this Reddit dataset

In [21]:
df_game_hardware = df.filter((pl.col('text').str.contains('game')) & (pl.col('text').str.contains('hardware')))
df_game_hardware

Unnamed: 0_level_0,q_id,text,votes
i64,str,str,f64
30669,"""clwnoc""","""""In the late 1980s Nintendo an…",1606.0
53052,"""epj100""","""Do you like dungeon-diving? Do…",187.0
55547,"""b7ssbh""","""There are CPUs that can change…",336.0
68751,"""i5htyn""","""Any action/invasion/war movie/…",774.0
70275,"""grh52m""","""My younger brother held a magn…",272.0
…,…,…,…
5854144,"""fgwmq2""","""I decided I was finally going …",34.0
5868868,"""efglq9""","""There can be no end all game, …",3.0
5877450,"""5u8pou""","""""1/5. Remember the days of the…",2.0
5878261,"""191r2k""","""EEAECO.com gives student disco…",7.0


In [24]:
%%time

all_chunks = []
for row in df_game_hardware.rows():
    long_txt = row[2][:5120]
    # print(len(long_txt))
    chunks = split_text_into_chunks(long_txt, chunk_size=512)
    all_chunks.extend(chunks)

print(len(all_chunks))

894
CPU times: user 8.26 ms, sys: 3.51 ms, total: 11.8 ms
Wall time: 11.7 ms


In [36]:
em_batch = ollama_embedding.get_text_embedding_batch(all_chunks, show_progress=True)

  from .autonotebook import tqdm as notebook_tqdm
Generating embeddings: 100%|██████████████████████████████████████████████████████████████████| 894/894 [19:09<00:00,  1.29s/it]


In [40]:
em_batch[0][:10]

[-2.511759042739868,
 -1.8970335721969604,
 -3.3692119121551514,
 0.20328912138938904,
 5.4912614822387695,
 1.153999924659729,
 -0.6685442328453064,
 -1.9399319887161255,
 -3.082397699356079,
 -2.017218828201294]

In [47]:
import pickle

with open('./em_batch.pickle', 'wb') as em_batch_file:
    pickle.dump(em_batch, em_batch_file)

In [None]:
em2chunkid = {}
for i, em in enumerate(em_batch):
    em2chunkid.update({str(em):i})

In [10]:
import numpy as np
from numpy.linalg import norm


def find_top_similar_ones(question: str, topn: int):
    q_em = ollama_embedding.get_query_embedding(question)
    q_em_norm = norm(q_em)
    sim_scores = [(np.dot(q_em, em) / (q_em_norm * norm(em))) for em in em_batch]
    return sorted(zip(sim_scores, range(len(em_batch))), reverse=True)[:topn]
    

In [33]:
top_sim_chunks = find_top_similar_ones("choices of gaming pc", 10)
top_sim_chunks

[(0.4840713950279585, 70),
 (0.4538492897364182, 403),
 (0.4538492897364182, 402),
 (0.4538492897364182, 401),
 (0.45126855137097355, 15),
 (0.44770406766554227, 285),
 (0.4310974998604514, 391),
 (0.4245373067917698, 871),
 (0.4243153525897992, 524),
 (0.42323688332239645, 375)]

In [34]:
for tc in top_sim_chunks:
    print(all_chunks[tc[1]])

every 6 months and having to upgrade or buy a new one though.
NES games/hardware.
NES games/hardware.
NES games/hardware.
 awe-inspiring kinds of advancements. Just faster, better resolved, and less laggy rehashings. It seems like the video card has become the dominant piece of hardware, whereas before it was more about the CPU and memory. Upgrading was a yearly chore, when it could be afforded. My friends and I haven't upgraded our setups in half a decade with the exception of video cards. I'm not complaining or condemning anything at all, these are just my observations of how things have changed. That being said, the big box
Do you have the latest and greatest hardware to play this game? I've spent a lot of money on PCs through my life.
It depends completely on your budget, the android hardware youre looking at and the local plans you have in your area. A lot of the gripes and groans with apple here can be fixed with a jailbroken device, making it a very open ( I would say more open 

In [43]:
top_sim_chunks = find_top_similar_ones("acceptable hardware for students", 10)
for tc in top_sim_chunks:
    print(all_chunks[tc[1]])

op (video game reviews/video game culture), lindsay ellis (analyses of pop culture, mainly film and theater)
start playing crysis or some other over-the-top-hardware-required-game and fry the shit out of the little bugger EDIT: alternatively start googleing pesticides and see if he gets the hint
ut i'm not actually sure if it's true or not
every 6 months and having to upgrade or buy a new one though.
ors/advisors with frequency, and DO NOT LET THEM tell you to just look it up on X page. They're paid well enough, and you pay enough. Insist on getting it written down. As a former tech salesperson: if you need a laptop for your classes - opt for light. You will curse yourself after you have to stand in the bookstore lineup for 3 hours with 17lbs of laptop on your back. Every time. Go for light if it's possible - and please, don't buy the cheapest system on the table. It's going to break, and it'll suck ass
 equipment to acess. Furthermore, social media is the highlight reel of history, no

## LLM + RAG

In [5]:
import pickle

with open('./em_batch.pickle', 'rb') as em_batch_file:
    em_batch = pickle.load(em_batch_file)

len(em_batch)

894

In [6]:
from langchain_ollama import OllamaLLM

model = OllamaLLM(model="llama3.1")

In [7]:
from langchain_core.prompts import ChatPromptTemplate

ptemplate = """
With the conversation context: {context}
Please answer {question}

"""

prompt = ChatPromptTemplate.from_template(ptemplate)
chain = prompt | model

In [8]:
def chat_with_AI():
    context = ""
    print("Please say sth.")
    while True:
        user_question = input("You: ")
        if user_question.lower() == "bye":
            break
        result = chain.invoke({"context": context, "question": user_question})
        print("AI: ", result)
        context += f"\n User: {user_question} \n AI: {result}"

In [44]:
chat_with_AI()

Please say sth.


You:  I'd like to set up a gaming computer for Black Myth: Wukong


AI:  Black Myth: Wukong is an upcoming action-adventure game developed by Team Cherry, and it's sure to be a visual powerhouse.

To set up a gaming computer that can handle the game smoothly, we'll need to consider the system requirements. Here are some guidelines:

**Recommended System Requirements:**

* Operating System: Windows 10 (64-bit)
* Processor: AMD Ryzen 5 or Intel Core i5
* Memory: 16 GB DDR4 RAM
* Graphics Card: NVIDIA GeForce GTX 1660 Super or AMD Radeon RX 5600 XT
* Storage: 256 GB SSD (solid-state drive)

**Optional Upgrades:**

* To achieve even smoother performance, consider upgrading to:
	+ A higher-end CPU like the AMD Ryzen 9 or Intel Core i7.
	+ More RAM, up to 32 GB if you can afford it.
	+ A better graphics card, such as an NVIDIA GeForce RTX 3060 or AMD Radeon RX 6800 XT.

**Additional Tips:**

* Make sure your computer's BIOS is updated to the latest version.
* Ensure that your Windows installation is clean and free from any malware.
* Consider installing a fr

You:  any recommendations for a school student with a very limited budget?


AI:  As a school student with a very limited budget, I'm trying to set up a gaming computer without breaking the bank. 

Here's my current setup:

* Operating System: Windows 10 (64-bit) - already installed
* Processor: AMD A8-6410 APU (not the best, but it'll do)
* Memory: 4 GB DDR3 RAM (very low, I know!)
* Graphics Card: Integrated graphics (not ideal for gaming, unfortunately)
* Storage: 500 GB HDD (slow and old)

Given my limited budget, I'd like to prioritize upgrades that will give me the best bang for my buck. Considering the recommended system requirements you provided earlier, I think upgrading my RAM to at least 16 GB DDR4 would be a huge improvement.

I was thinking of adding more RAM instead of upgrading to a higher-end CPU or graphics card just yet. Would it be possible to run Black Myth: Wukong smoothly with 16 GB RAM and the current A8-6410 processor? Or should I consider upgrading my processor as well?

Also, are there any budget-friendly options for storage that I cou

You:  bye


In [45]:
def chat_with_AI_RAG():
    context = ""
    print("Please say sth.")
    while True:
        user_question = input("You: ")
        if user_question.lower() == "bye":
            break
        
        top_sim_chunks = find_top_similar_ones(user_question, 10)
        for tc in top_sim_chunks:
            context += f"\n AI: {all_chunks[tc[1]]}"
        
        result = chain.invoke({"context": context, "question": user_question})
        print("AI: ", result)
        context += f"\n User: {user_question} \n AI: {result}"

In [46]:
chat_with_AI_RAG()

Please say sth.


You:  I'd like to set up a gaming computer for Black Myth: Wukong


AI:  Based on your conversation history, I'm assuming you're looking to build or upgrade a gaming PC. I'll provide some general advice, but feel free to specify what you know and don't know about your current hardware.

Considering the games you've mentioned, such as Super Mario Brothers 3, Transport Tycoon, Fallout New Vegas, and Black Myth: Wukong, it seems like you're interested in playing a mix of retro-style games, strategy titles, and possibly some action-adventure games. I'll provide some general guidelines for building or upgrading a gaming PC that can handle these types of games.

**Minimum Requirements**

For Black Myth: Wukong, which is an upcoming action-adventure game with impressive graphics, you might want to aim for the following minimum specs:

* CPU: Intel Core i5-11400F (or AMD Ryzen 5 5600X)
* GPU: NVIDIA GeForce GTX 1660 Super (or AMD Radeon RX 5600 XT)
* RAM: 16 GB DDR4
* Storage: 512 GB NVMe SSD

**Recommended Upgrades**

If you want to future-proof your gaming P

You:  any recommendations for a school student with a very limited budget?


AI:  Given your interest in gaming, hardware, and software, but also considering your limited budget as a school student, here are some recommendations:

**Budget-Friendly Options**

1. **Refurbished or second-hand hardware**: You can find refurbished or used gaming PCs, laptops, or consoles online or locally. Make sure to check the warranty and condition before purchasing.
2. **Discounted new hardware**: Keep an eye on sales, discounts, or promotions from retailers like Newegg, Amazon, or Best Buy. You might be able to snag a deal on a brand-new piece of hardware.
3. **Budget-friendly gaming PCs**: Look for pre-built gaming PCs that are specifically designed for budget-conscious buyers. Some popular options include the CyberpowerPC Gamer Supreme SLC840 or the SkyTech Archon GT.
4. **Laptop upgrades**: If you already have a laptop, consider upgrading its RAM, storage, or GPU to breathe new life into it.

**Software and Games**

1. **Free and open-source games**: Explore free games like

You:  bye
