In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from torch.nn import functional as F
import random

model_name = "eryk-mazus/polka-1.1b"
device = 'cuda'

tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left")

model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
model.generation_config.pad_token_id = tokenizer.pad_token_id
model.eval()

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(43904, 2048)
    (layers): ModuleList(
      (0-21): 22 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (k_proj): Linear(in_features=2048, out_features=256, bias=False)
          (v_proj): Linear(in_features=2048, out_features=256, bias=False)
          (o_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=2048, out_features=5632, bias=False)
          (up_proj): Linear(in_features=2048, out_features=5632, bias=False)
          (down_proj): Linear(in_features=5632, out_features=2048, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((2048,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((2048,), eps=1e-05)
      )
    )
    (norm): 

In [2]:
def query(question, answer):
	return f"Pytanie: {question}\nOdpowiedź: {answer}"

examples = {
  "Kiedy": [
    query("Kiedy odbyła się bitwa pod Grunwaldem?", "w 1410"),
    query("Kiedy miała miejsce wojna secesyjna?", "w 1861"),
    query("Kiedy odbyła się rewolucja francuska?", "w 1789")
  ],

  "Gdzie": [
    query("Gdzie znajduje się Mount Everest?", "w Himalajach"),
    query("Gdzie leży Warszawa?", "w Polsce"),
    query("Gdzie znajduje się rzeka Amazonka?", "w Ameryce Południowej")
  ],

  "Ile": [
    query("Ile planet znajduje się w Układzie Słonecznym?", "8"),
    query("Ile jest krajów na świecie?", "195"),
    query("Ile wynosi pierwiastek z czterech?", "2")
  ]

  # "Czy": [
  #   query("Czy Warszawa jest stolicą Polski?", "tak"),
  #   query("Czy słońce jest planetą?", "nie"),
  #   query("Czy Ziemia obraca się wokół Słońca?", "tak"),
  # ]
}

In [3]:
def ask(question, examples=[]):
  system_prompt = "\n\n".join(examples)

  prompt = f"{system_prompt}\n\n{query(question, "")}"

  model_inputs = tokenizer([prompt], return_tensors="pt").to(device)
  generated_ids = model.generate(
    **model_inputs,
    max_new_tokens=126,
    do_sample=True,
    penalty_alpha=0.6,
    top_k=10
  )

  output = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
  # print (50 * '-', flush=True)
  # print (f"Pytanie: \"{question}\"", flush=True)

  response = output.removeprefix(prompt).strip().replace("\\n", "\n").split("\n")[0].split("\"")[0]
  # system_prompt = prompt + response
  # print (f"Odpowiedź: \"{response}", flush=True)
  # print(output)
  return response



In [4]:
def log_probs_from_logits(logits, labels):
    logp = F.log_softmax(logits, dim=-1)
    logp_label = torch.gather(logp, 2, labels.unsqueeze(2)).squeeze(-1)
    return logp_label

def sentence_prob(sentence_txt):
    input_ids = tokenizer(sentence_txt, return_tensors='pt')['input_ids'].to(device)
    with torch.no_grad():
        output = model(input_ids=input_ids)
        log_probs = log_probs_from_logits(output.logits[:, :-1, :], input_ids[:, 1:])
        seq_log_probs = torch.sum(log_probs)
    return seq_log_probs.cpu().numpy()  

In [5]:
def test(question):
	prefix = question.split()[0]
	if prefix == "Czy":
		return max(["tak", "nie"], key=lambda a: sentence_prob(query(question, a)))

	if prefix in examples:
		return ask(question, examples=examples[prefix])
	
	keys = list(examples.keys())
	return ask(question, examples=[examples[keys[i]][i] for i in range(3)])

In [6]:
test("Ile książek o Królu Maciusiu napisał Janusz Korczak?")

Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)


'6'

In [7]:
for i, question in enumerate(open("task4_questions.txt").readlines()):
	if not i % 100: print(i)
	with open("found_answers.txt", mode="a") as answers:
		answers.write(test(question.strip()) + "\n")

0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
2200
2300
2400


In [11]:
!python answer_check_for_task4.py

TOTAL SCORE: 87.0


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
