In [1]:
import sys
import os
import fire
import torch
import transformers
import json
import jsonlines

from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig

In [2]:
model_name = 'WizardLM/WizardCoder-15B-V1.0'
load_8bit = False

In [3]:
if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"

try:
    if torch.backends.mps.is_available():
        device = "mps"
except:
    pass

In [4]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
if device == "cuda":
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        load_in_8bit=load_8bit,
        torch_dtype=torch.float16,
        device_map="auto",
    )
elif device == "mps":
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        device_map={"": device},
        torch_dtype=torch.float16,
    )

model.config.pad_token_id = tokenizer.pad_token_id

if not load_8bit:
    model.half()

In [17]:
def evaluate(
        batch_data,
        input=None,
        temperature=0.1,
        top_p=0.9,
        top_k=40,
        num_beams=1,
        max_new_tokens=2048,
        **kwargs,
):
    prompts = generate_prompt(batch_data, input)
    inputs = tokenizer(prompts, return_tensors="pt", max_length=512, truncation=True, padding=True)
    input_ids = inputs["input_ids"].to(device)
    generation_config = GenerationConfig(
        temperature=temperature,
        top_p=top_p,
        top_k=top_k,
        num_beams=num_beams,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.pad_token_id,
        **kwargs,
    )
    with torch.no_grad():
        generation_output = model.generate(
            input_ids=input_ids,
            generation_config=generation_config,
            return_dict_in_generate=True,
            output_scores=True,
            max_new_tokens=max_new_tokens,
        )
    s = generation_output.sequences
    output = tokenizer.batch_decode(s, skip_special_tokens=False)
    return output

def generate_prompt(instruction, input=None):
    return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
{instruction}

### Response:"""

In [6]:
def test(prompt):
    res = evaluate(prompt.strip())

    print(res[0])

In [10]:
test('Write a Python code to count 1 to 10')

Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
Write a Python code to count 1 to 10

### Response:```python
for i in range(1, 11):
    print(i)
```

Output:

```
1
2
3
4
5
6
7
8
9
10
```


In [25]:
%%time

test('''
You are given two integer arrays nums1 and nums2 sorted in ascending order and an integer k.

Define a pair (u, v) which consists of one element from the first array and one element from the second array.

Return the k pairs (u1, v1), (u2, v2), ..., (uk, vk) with the smallest sums.


''')

Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
You are given two integer arrays nums1 and nums2 sorted in ascending order and an integer k.

Define a pair (u, v) which consists of one element from the first array and one element from the second array.

Return the k pairs (u1, v1), (u2, v2),..., (uk, vk) with the smallest sums.

### Response:To solve this problem, we can use a two-pointer approach. We start by initializing two pointers, one for each array, pointing to the first element. We also initialize a variable `min_sum` to infinity and a list `result` to store the pairs.

We then iterate through both arrays, comparing the sum of the current pair with `min_sum`. If the sum is smaller than `min_sum`, we update `min_sum` and add the pair to the `result` list.

After we have iterated through both arrays, we return the first `k` pairs from the `result` list.

Here's the Python code:

```python
def k_smallest_p