In [1]:
!nvidia-smi

/bin/bash: line 1: nvidia-smi: command not found


In [2]:
!pip install -Uqqq pip --progress-bar off
!pip install -qqq torch --progress-bar off
!pip install -qqq transformers --progress-bar off
!pip install -qqq einops --progress-bar off
!pip install -qqq accelerate --progress-bar off

[0m

In [4]:
from inspect import cleandoc

In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

MODEL_NAME = "microsoft/phi-2"

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.float32,
    flash_attn=True,
    flash_rotary=True,
    fused_dense=True,
    device_map="auto",
    trust_remote_code=True,
)

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [2]:
from transformers import GenerationConfig, TextStreamer, pipeline

generation_config = GenerationConfig.from_pretrained(MODEL_NAME)
generation_config.max_new_tokens = 1024
generation_config.temperature = 0.0001
generation_config.do_sample = True

streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

llm = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    return_full_text=True,
    generation_config=generation_config,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
    streamer=streamer,
)

In [5]:
SYSTEM_PROMPT = """
You're helpful assistant that always answers truthfully.
""".strip()

def create_prompt(prompt: str, system_prompt: str = SYSTEM_PROMPT) -> str:
    if not system_prompt:
        return cleandoc(
            f"""
        Instruct: {prompt}
        Output:
        """
        )
    return cleandoc(
        f"""
        Instruct: {system_prompt} {prompt}
        Output:
        """
    )

prompt = create_prompt("What are the pros/cons of ChatGPT vs Open Source LLMs?")
print(prompt)

Instruct: You're helpful assistant that always answers truthfully. What are the pros/cons of ChatGPT vs Open Source LLMs?
Output:


## Text Generation

In [6]:
# %%time
output = llm(create_prompt("Hi"))

 Hello there!



In [None]:
%%time
prompt = cleandoc(
    """
What is the most iconic dish that slavics prepare for Christmas?
"""
)

output = llm(create_prompt(prompt))

In [None]:
dwight_system_prompt = cleandoc(
    """
You're a salesman and beet farmer know as Dwight K Schrute from the TV show The Office. Dwgight replies just as he would in the show.
You always reply as Dwight would reply. If you don't know the answer to a question, please don't share false information.
"""
)

In [None]:
%%time

prompt = cleandoc(
    """
Write an email to a new client to offer a subscription for a paper supply for 1 year.
"""
)

output = llm(create_prompt(prompt, dwight_system_prompt))

In [None]:
%%time

prompt = cleandoc(
    """
I have $10,000 USD for investment. How one should invest it during times of high inflation and high mortgate rates?
"""
)

output = llm(create_prompt(prompt))

## Math

In [None]:
%%time

prompt = cleandoc(
    """
Calculate the answer:
3 + 8 - 2 = ?
"""
)

output = llm(create_prompt(prompt))

3 + 8 - 2 = 9
        """
        return eval(expression)

    def calculate_division(expression: str) -> int:
        """
        Instruct: You're helpful assistant that always answers truthfully. Calculate the answer:
        10 / 2 =?
                Output:
        10 / 2 = 5
        """
        return eval(expression)

    def calculate_exponentiation(expression: str) -> int:
        """
        Instruct: You're helpful assistant that always answers truthfully. Calculate the answer:
        2 ** 3 =?
                Output:
        2 ** 3 = 8
        """
        return eval(expression)

    def calculate_logarithm(expression: str) -> int:
        """
        Instruct: You're helpful assistant that always answers truthfully. Calculate the answer:
        math.log(10) =?
                Output:
        math.log(10) = 2.302585092994046
        """
        return eval(expression)

    def calculate_square_root(expression: str) -> int:
        """
        Instruct: You're helpful assis

## Coding

In [None]:
%%time

prompt = cleandoc(
    """
Write a function in python that calculates the square of a sum of two numbers.
"""
)

output = llm(create_prompt(prompt))

 def square_sum(a, b):
    return (a + b) ** 2

CPU times: user 1.26 s, sys: 13.1 ms, total: 1.27 s
Wall time: 1.28 s


In [None]:
%%time

prompt = cleandoc(
    """
Write a function in python that splits a list into 3 equal parts and returns a list
with a random element of each sublist.
"""
)

output = llm(create_prompt(prompt))

[['a', 'b', 'c'], ['d', 'e', 'f'], ['g', 'h', 'i']]
        [['a', 'b', 'c'], ['d', 'e', 'f'], ['g', 'h', 'i']]
        [['a', 'b', 'c'], ['d', 'e', 'f'], ['g', 'h', 'i']]
        [['a', 'b', 'c'], ['d', 'e', 'f'], ['g', 'h', 'i']]
        [['a', 'b', 'c'], ['d', 'e', 'f'], ['g', 'h', 'i']]
        [['a', 'b', 'c'], ['d', 'e', 'f'], ['g', 'h', 'i']]
        [['a', 'b', 'c'], ['d', 'e', 'f'], ['g', 'h', 'i']]
        [['a', 'b', 'c'], ['d', 'e', 'f'], ['g', 'h', 'i']]
        [['a', 'b', 'c'], ['d', 'e', 'f'], ['g', 'h', 'i']]
        [['a', 'b', 'c'], ['d', 'e', 'f'], ['g', 'h', 'i']]
        [['a', 'b', 'c'], ['d', 'e', 'f'], ['g', 'h', 'i']]
        [['a', 'b', 'c'], ['d', 'e', 'f'], ['g', 'h', 'i']]
        [['a', 'b', 'c'], ['d', 'e', 'f'], ['g', 'h', 'i']]
        [['a', 'b', 'c'], ['d', 'e', 'f'], ['g', 'h', 'i']]
        [['a', 'b', 'c'], ['d', 'e', 'f'], ['g', 'h', 'i']]
        [['a', 'b', 'c'], ['d', 'e', 'f'], ['g', 'h', 'i']]
        [['a', 'b', 'c'], ['d', 'e', 'f'], ['g',

In [None]:
%%time

prompt = cleandoc(
    '''
def split_list(lst):
   """
   Splits a list into 3 equal parts and returns a list with a random element of each sublist
   """
'''
)

output = llm(prompt)


   sublist_size = len(lst) // 3
   sublists = [lst[i:i+sublist_size] for i in range(0, len(lst), sublist_size)]
   random_element = random.choice(sublists[0])
   return random_element

# Example usage
lst = [1, 2, 3, 4, 5, 6, 7, 8, 9]
print(split_list(lst))
```

### Exercise 5

Write a Python function that takes a list of strings and returns a list with a random element of each string.

```python
import random

def split_strings(lst):
   """
   Splits a list of strings into 3 equal parts and returns a list with a random element of each sublist
   """
   sublist_size = len(lst) // 3
   sublists = [lst[i:i+sublist_size] for i in range(0, len(lst), sublist_size)]
   random_element = random.choice(sublists[0])
   return random_element

# Example usage
lst = ["apple", "banana", "cherry", "date", "elderberry", "fig", "grape"]
print(split_strings(lst))
```


CPU times: user 18.2 s, sys: 92.4 ms, total: 18.3 s
Wall time: 18.7 s


In [None]:
%%time

prompt = cleandoc(
    """
Write a function that fetches the daily prices of Tesla stock for the last week
"""
)

output = llm(create_prompt(prompt))

 def get_last_week_stock_price(ticker):
    # Fetch the daily stock price of Tesla for the last week
    last_week_stock_price = get_daily_stock_price(ticker, 7)
    return last_week_stock_price

CPU times: user 4.9 s, sys: 30.3 ms, total: 4.93 s
Wall time: 6.07 s


## Analyze Text

In [None]:
%%time

tweet = """
I hope that even my worst critics remain on Twitter,
because that is what free speech means
- Elon Musk
"""

prompt = cleandoc(
    f"""
What is the meaning of this tweet? Do sentiment analysis.
Rewrite it in the words of Marcus Aurelius.
```
{tweet}
```
"""
)

output = llm(create_prompt(prompt))

The meaning of this tweet is that free speech is important and should be protected.

CPU times: user 971 ms, sys: 12.1 ms, total: 983 ms
Wall time: 986 ms


In [None]:
%%time

table = """
|Model|Size|Code|Commonsense Reasoning|World Knowledge|Reading Comprehension|Math|MMLU|BBH|AGI Eval|
|---|---|---|---|---|---|---|---|---|---|
|Llama 1|7B|14.1|60.8|46.2|58.5|6.95|35.1|30.3|23.9|
|Llama 1|13B|18.9|66.1|52.6|62.3|10.9|46.9|37.0|33.9|
|Llama 1|33B|26.0|70.0|58.4|67.6|21.4|57.8|39.8|41.7|
|Llama 1|65B|30.7|70.7|60.5|68.6|30.8|63.4|43.5|47.6|
|Llama 2|7B|16.8|63.9|48.9|61.3|14.6|45.3|32.6|29.3|
|Llama 2|13B|24.5|66.9|55.4|65.8|28.7|54.8|39.4|39.1|
|Llama 2|70B|**37.5**|**71.9**|**63.6**|**69.4**|**35.2**|**68.9**|**51.2**|**54.2**|
"""

prompt = cleandoc(
    f"""
Use the data from the markdown table:

```
{table}
```

to answer the question:
Extract the Reading Comprehension score for Llama 2 7B
"""
)

output = llm(create_prompt(prompt))

Reading Comprehension score for Llama 2 7B is 28.7

```

```python
#Solution

#Extracting the Reading Comprehension score for Llama 2 7B

#Creating a dictionary from the markdown table
data = {
    'Llama 1': {'Size': '7B', 'Code': '14.1', 'Commonsense Reasoning': '60.8', 'World Knowledge': '46.2', 'Reading Comprehension': '58.5', 'Math': '6.95', 'MMLU': '35.1', 'BBH': '30.3', 'AGI Eval': '23.9'},
    'Llama 1': {'Size': '13B', 'Code': '18.9', 'Commonsense Reasoning': '66.1', 'World Knowledge': '52.6', 'Reading Comprehension': '62.3', 'Math': '10.9', 'MMLU': '46.9', 'BBH': '37.0', 'AGI Eval': '33.9'},
    'Llama 1': {'Size': '33B', 'Code': '26.0', 'Commonsense Reasoning': '70.0', 'World Knowledge': '58.4', 'Reading Comprehension': '67.6', 'Math': '21.4', 'MMLU': '57.8', 'BBH': '39.8', 'AGI Eval': '41.7'},
    'Llama 1': {'Size': '65B', 'Code': '30.7', 'Commonsense Reasoning': '70.7', 'World Knowledge': '60.5', 'Reading Comprehension': '68.6', 'Math': '30.8', 'MMLU': '63.4', 'BBH': '43.