In [1]:
import os
import pickle

data_folder = '/home/mcwave/code/automath/atp/datasets/provability/mathlib4_states_w_proof/'
file_names = os.listdir(data_folder)

data = []

count = 0
for file_name in file_names:
    if not file_name.endswith("pkl"):
        continue
    if not 'Algebra' in file_name:
        continue
    count += 1
    if count <= 5:
        continue
    print("Loading", file_name)
    file_path = os.path.join(data_folder, file_name)
    fin = open(file_path, 'rb')
    while True:
        try:
            pair = pickle.load(fin)
            data.append(pair) #(pair[1][0], pair[1][2][0]))
        except:
            break
    break

print(len(data), "examples loaded")

Loading Mathlib__Algebra__Ring__Subring__Pointwise.lean.pkl
12680 examples loaded


In [2]:
from utils.lean_math_utils import *
from utils.lean_theorem_utils import *

def count_lines(string):
    # Split the string into lines
    lines = string.splitlines()
    # Count the number of lines
    return len(lines)

def extract_first_case(state_pp):
    state_pp = state_pp.strip()
    if not state_pp.startswith('case'):
        return state_pp
    lines = state_pp.split('\n')
    first_case = []
    for line in lines[1:]:
        if line.strip().startswith('case'):
            break
        if line.strip() != '':
            first_case.append(line)
    return '\n'.join(first_case)


# Params:
#   hyp: tuple(name, type)
#   tactics: list(tactic)
def is_hypothesis_useful(hyp, tactics):
    for tactic in tactics:
        tokens = tokenize_lean_tactic(tactic)
        if hyp[0] in tokens:
            idx = tokens.index(hyp[0])
            if idx > 0:
                if hyp[0].startswith('h'):
                    return True
                if tokens[idx - 1] == 'exact':
                    return True
                if tokens[idx - 1] == 'at':
                    return True
                if tokens[idx - 1] == '[':
                    return True
                if idx < len(tokens) - 1 and tokens[idx + 1] == ']':
                    return True
                for operator in TargetNode.operators:
                    if operator in hyp[1]:
                        return True
    return False

def create_hypothesis_predict_data(raw_state_pp, tactics, theorem_name):
    is_case = raw_state_pp.strip().startswith('case')
    state_pp = extract_first_case(raw_state_pp)
    if is_case and count_lines(state_pp) < count_lines(raw_state_pp) - 2:
        tactics = tactics[0:1]
    #
    premise = Premise()
    premise.theorem_name = theorem_name
    premise.parse_state(state_pp)
    #
    useful_hypotheses, useless_hypotheses = [], OrderedDict()
    for hyp in premise.hypotheses.items():
        useful = is_hypothesis_useful(hyp, tactics)
        if useful:
            #print("YES:", hyp)
            useful_hypotheses.append(hyp)
        else:
            #print("NO :", hyp)
            useless_hypotheses[hyp[0]] = hyp[1]
    premise.hypotheses = useless_hypotheses
    return premise, useful_hypotheses

idx = 120

state_pp = data[idx][1][0]
tactics = data[idx][1][2]
theorem_name = data[idx][0][3]

print("STATE_PP:\n" + state_pp)

print("TACTICS:\n" + "\n".join(tactics))

premise, useful_hypotheses = create_hypothesis_predict_data(state_pp, tactics, theorem_name)

print("STATE_PP:\n" + premise.to_theorem_code())
print("\nHYPOTHESES:\n", useful_hypotheses)


STATE_PP:
case h
M : Type u_1
R : Type u_2
inst‚úù¬≤ : Monoid M
inst‚úù¬π : Ring R
inst‚úù : MulSemiringAction M R
a : M
S : Subring R
nvar0 : R
‚ä¢ nvar0 ‚àà map (MulSemiringAction.toRingHom M R a) S ‚Üî nvar0 ‚àà a ‚Ä¢ S
TACTICS:
symm
simp [eq_comm (a := a)]
cases S
rw [smul_neg]
rw [‚Üê eq_f‚ÇÄ']
STATE_PP:
theorem Subring.pointwise_smul_def (M: Type u_1) (R: Type u_2) (inst‚úù¬≤: Monoid M) (inst‚úù¬π: Ring R) (inst‚úù: MulSemiringAction M R) (a: M) (S: Subring R) (nvar0: R) : nvar0 ‚àà map (MulSemiringAction.toRingHom M R a) S ‚Üî nvar0 ‚àà a ‚Ä¢ S :=

HYPOTHESES:
 []


In [7]:
from datasets import Dataset

MIN_LENGTH = 4

TEST_MOD = 130

train_state_pps = []
test_state_pps = []
train_target_hyps = []
test_target_hyps = []
seen_hashes = set()
fin = open('/home/mcwave/code/axiomatization/datasets/mathlib4_all_states_w_proof_hyp_pred.pkl', 'rb')

while True:
    try:
        premise, hypotheses = pickle.load(fin)
        state_pp = premise.to_theorem_code()
        target_hyp = str([x[1] for x in hypotheses])
        hash_value = hash(state_pp + '|' + target_hyp)
        if hash_value in seen_hashes:
            continue
        else:
            seen_hashes.add(hash_value)
        #data.append((state_pp, target_hyp))
        if len(state_pp) < 4 or len(target_hyp) < 4:
            continue
        if hash(premise.theorem_name) % TEST_MOD == 0:
            test_state_pps.append(state_pp)
            test_target_hyps.append(target_hyp)
        else:
            train_state_pps.append(state_pp)
            train_target_hyps.append(target_hyp)
    except:
        break
    
fin.close()

print("Train:", len(train_state_pps))
print("Test:", len(test_state_pps))

Train: 3396064
Test: 24237


In [3]:
import torch
from transformers import AutoTokenizer
from datasets import load_dataset, Dataset, load_from_disk

MAX_LENGTH = 300

# Initialize tokenizer and model

model_name = "morph-labs/morph-prover-v0-7b" #"internlm/internlm2-math-7b" #"ScalableMath/Lean-STaR-plus"  # 'Saisam/gpt-neo-math-small' #

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.padding_side = "right"

# Define the separator token
sep_token = "<sep>"
pad_token = "<pad>"

# Check if the separator token already exists in the vocabulary
if sep_token not in tokenizer.get_vocab():
    tokenizer.add_tokens([sep_token])
if pad_token not in tokenizer.get_vocab():
    tokenizer.add_tokens([pad_token])

# Set the separator token
tokenizer.sep_token = sep_token
tokenizer.pad_token = pad_token

tokenizer.add_special_tokens({
    'sep_token': sep_token,
    'pad_token': pad_token
})

# tokenizer = AutoTokenizer.from_pretrained("datasets/text_for_tokenization/mathlib4_20240617_bpe_tokenizer")
# # Define the tokens
# sep_token = "<sep>"
# pad_token = "<pad>"

# # Set the sep_token and pad_token
# tokenizer.sep_token = sep_token
# tokenizer.pad_token = pad_token
# #tokenizer.add_special_tokens({'sep_token': '[SEP]'})


# Function to tokenize and prepare the data
def prepare_data(examples):
    # Concatenate instruction and response with a separator
    full_texts = [f"{instruction} <sep> {response}" for instruction, response in zip(examples['instruction'], examples['response'])]
    
    # Tokenize the full texts
    encodings = tokenizer(full_texts, truncation=True, padding='max_length', max_length=MAX_LENGTH, return_tensors='pt')
    #print(encodings)
    
    # Create attention masks: 1 for response tokens, 0 for instruction tokens and padding
    attention_masks = []
    labels = []
    
    for input_ids in encodings['input_ids']:
        attention_mask = torch.zeros_like(input_ids)
        label = input_ids.clone()
        
        pad_token_idx = (input_ids == tokenizer.pad_token_id).nonzero()
        end_idx = pad_token_idx[0].item() if len(pad_token_idx) > 0 else len(input_ids)
        sep_token_idx = (input_ids == tokenizer.sep_token_id).nonzero()
        #print("sep_token_idx:", sep_token_idx)
        if len(sep_token_idx) == 0:
            sep_token_idx = 0
        else:
            sep_token_idx = sep_token_idx.item()

        attention_mask[0:end_idx] = 1
        attention_masks.append(attention_mask)
        
        label[0:sep_token_idx+1] = -100
        labels.append(label)
    
    return {
        'input_ids': encodings['input_ids'],
        'attention_mask': torch.stack(attention_masks),
        'labels': torch.stack(labels)
    }

# # Create the Hugging Face dataset
# test_dataset = Dataset.from_dict({
#     'instruction': test_state_pps,
#     'response': test_target_hyps
# }).shuffle(seed=42)

# # Apply the tokenization and preparation function
# tokenized_test = test_dataset.map(
#     prepare_data,
#     batched=True,
#     num_proc=4
#     #remove_columns=dataset.column_names
# )

# # Create the Hugging Face dataset
# train_dataset = Dataset.from_dict({
#     'instruction': train_state_pps,
#     'response': train_target_hyps
# }).shuffle(seed=42)

# # Apply the tokenization and preparation function
# tokenized_train = train_dataset.map(
#     prepare_data,
#     batched=True,
#     num_proc=4
#     #remove_columns=dataset.column_names
# )

In [4]:
import torch
from transformers import AutoTokenizer
from datasets import load_dataset, Dataset, load_from_disk

#tokenized_train.save_to_disk('datasets/predict_hyp_tokenized_train.dataset')
#tokenized_test.save_to_disk('datasets/predict_hyp_tokenized_test.dataset')

tokenized_train = load_from_disk('datasets/predict_hyp_tokenized_train.dataset')
tokenized_test = load_from_disk('datasets/predict_hyp_tokenized_test.dataset')

Loading dataset from disk:   0%|          | 0/30 [00:00<?, ?it/s]

In [5]:
from transformers import AutoModelForCausalLM, Trainer, TrainingArguments
#from huggingface_hub import login

#login(token="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")

model_name = "Qwen/Qwen2-1.5B"
model = AutoModelForCausalLM.from_pretrained(model_name)
model.resize_token_embeddings(len(tokenizer))

config.json:   0%|          | 0.00/662 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.09G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/138 [00:00<?, ?B/s]

Embedding(32002, 1536)

In [6]:
from transformers import Trainer, TrainingArguments
from datasets import load_dataset,load_metric
from transformers import AutoTokenizer, DataCollatorForLanguageModeling

data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

training_args = TrainingArguments(
    output_dir="datasets/predict-hyp-qwen-1.5b",
    evaluation_strategy="steps", #"epochs"
    learning_rate=1e-5,  # PAY ATTENTION TO LEARNING RATE!
    weight_decay=0.01,
    per_device_train_batch_size=12,
    per_device_eval_batch_size=2,
    num_train_epochs=4,
    bf16=True,
    max_grad_norm=1.0,
    save_steps=20000,
    eval_steps=20000,
    logging_steps=20000,
    save_total_limit=3,
    #load_best_model_at_end=True,
    push_to_hub=False
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_test,
    data_collator=data_collator,
)

cp_path = 'datasets/predict-hyp-gemma-2b-v0/checkpoint-200000'

trainer.train()



Step,Training Loss,Validation Loss
20000,0.3477,0.40878
40000,0.1994,0.376625
60000,0.1718,0.359299


We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)

KeyboardInterrupt



In [7]:
tokenizer.decode(tokenized_test[0]['input_ids'])

"<s> theorem LinearMap.exact_map_mkQ_range (R: Type u_1) (M: Type u_2) (N: Type u_3) (P: Type u_4) (inst‚úù‚Å∂: CommRing R) (inst‚úù‚Åµ: AddCommGroup M) (inst‚úù‚Å¥: AddCommGroup N) (inst‚úù¬≥: AddCommGroup P) (inst‚úù¬≤: Module R M) (inst‚úù¬π: Module R N) (inst‚úù: Module R P) (y‚úù: N) : y‚úù ‚àà Set.range ‚áëf ‚Üî 0 = (range f).mkQ y‚úù := <sep>  ['M ‚Üí‚Çó[R] N']<pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>"

In [8]:
def predict_hyp(instruction):
    input_ids = tokenizer.encode(instruction, return_tensors='pt').to('cuda')

    # Generate output
    with torch.no_grad():
        outputs = model.generate(input_ids, max_new_tokens=50) #max_length=MAX_LENGTH)

    # Decode the generated output and the true labels
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return generated_text


for i in range(10):
    print("\nCASE", i)
    test_case = tokenized_test[i]
    generated_text = predict_hyp(test_case['instruction'])
    #input_ids = tokenizer.encode(test_case['instruction'], return_tensors='pt').to('cuda')
    #print(input_ids)
    print("inputs:", test_case['instruction'])
    labels = [x for x in test_case['labels'] if x >= 0]
    labels = torch.tensor(labels).to('cuda')
    print("labels:", tokenizer.decode(labels, skip_special_tokens=True))
    #true_text = tokenizer.decode(labels, skip_special_tokens=True)
    # Compare the results
    print("Generated:", generated_text)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.



CASE 0


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


inputs: theorem LinearMap.exact_map_mkQ_range (R: Type u_1) (M: Type u_2) (N: Type u_3) (P: Type u_4) (inst‚úù‚Å∂: CommRing R) (inst‚úù‚Åµ: AddCommGroup M) (inst‚úù‚Å¥: AddCommGroup N) (inst‚úù¬≥: AddCommGroup P) (inst‚úù¬≤: Module R M) (inst‚úù¬π: Module R N) (inst‚úù: Module R P) (y‚úù: N) : y‚úù ‚àà Set.range ‚áëf ‚Üî 0 = (range f).mkQ y‚úù :=
labels:  ['M ‚Üí‚Çó[R] N']
Generated: theorem LinearMap.exact_map_mkQ_range (R: Type u_1) (M: Type u_2) (N: Type u_3) (P: Type u_4) (inst‚úù‚Å∂: CommRing R) (inst‚úù‚Åµ: AddCommGroup M) (inst‚úù‚Å¥: AddCommGroup N) (inst‚úù¬≥: AddCommGroup P) (inst‚úù¬≤: Module R M) (inst‚úù¬π: Module R N) (inst‚úù: Module R P) (y‚úù: N) : y‚úù ‚àà Set.range ‚áëf ‚Üî 0 = (range f).mkQ y‚úù :=   ['M ‚Üí‚Çó[R] N ‚Üí‚Çó[R] P']']']']']']']']']'].range']']']']']']']']']']'].range']']']']

CASE 1


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


inputs: theorem AdjoinRoot.smul_mk (R: Type u) (S: Type v) (K: Type w) (inst‚úù¬≤: CommRing R) (inst‚úù¬π: DistribSMul S R) (inst‚úù: IsScalarTower S R R) (a x ‚ä¢ Quotient.map' (fun => ‚Ä¢ x) ‚ãØ ((mk {: = toFinsupp‚úù }) x) = (mk { toFinsupp := toFinsupp‚úù }) (a ‚Ä¢ x)) (toFinsupp‚úù: AddMonoidAlgebra R ‚Ñï) :  :=
labels:  ['= toFinsupp‚úù }) x) = (mk { toFinsupp := toFinsupp‚úù }) (a ‚Ä¢ x)']
Generated: theorem AdjoinRoot.smul_mk (R: Type u) (S: Type v) (K: Type w) (inst‚úù¬≤: CommRing R) (inst‚úù¬π: DistribSMul S R) (inst‚úù: IsScalarTower S R R) (a x ‚ä¢ Quotient.map' (fun => ‚Ä¢ x) ‚ãØ ((mk {: = toFinsupp‚úù }) x) = (mk { toFinsupp := toFinsupp‚úù }) (a ‚Ä¢ x)) (toFinsupp‚úù: AddMonoidAlgebra R ‚Ñï) :  :=   ['= toFinsupp‚úù }) x) = (mk { toFinsupp := toFinsupp‚úù }) (a ‚Ä¢ x)']']']']']']']']']']']']']

CASE 2


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


inputs: theorem Finset.diffs_union_right (F: Type u_1) (Œ±: Type u_2) (Œ≤: Type u_3) (inst‚úù¬≤: DecidableEq Œ±) (inst‚úù¬π: DecidableEq Œ≤) (inst‚úù: GeneralizedBooleanAlgebra Œ±) (s‚ÇÅ s‚ÇÇ t t‚ÇÅ t‚ÇÇ u v: Finset Œ±) (a b c: Œ±) (val‚úù: Multiset Œ±) (nodup‚úù: val‚úù.Nodup) (‚ä¢: = val‚úù, nodup := nodup‚úù } \\ (t‚ÇÅ ‚à™ t‚ÇÇ \ t‚ÇÅ) =) ({: = val‚úù, nodup := nodup‚úù } \\ t‚ÇÅ ‚à™ { val := val‚úù, nodup := nodup‚úù } \\ t‚ÇÇ) :  :=
labels:  ['= val‚úù, nodup := nodup‚úù } \\\\ t‚ÇÅ ‚à™ { val := val‚úù, nodup := nodup‚úù } \\\\ t‚ÇÇ']
Generated: theorem Finset.diffs_union_right (F: Type u_1) (Œ±: Type u_2) (Œ≤: Type u_3) (inst‚úù¬≤: DecidableEq Œ±) (inst‚úù¬π: DecidableEq Œ≤) (inst‚úù: GeneralizedBooleanAlgebra Œ±) (s‚ÇÅ s‚ÇÇ t t‚ÇÅ t‚ÇÇ u v: Finset Œ±) (a b c: Œ±) (val‚úù: Multiset Œ±) (nodup‚úù: val‚úù.Nodup) (‚ä¢: = val‚úù, nodup := nodup‚úù } \\ (t‚ÇÅ ‚à™ t‚ÇÇ \ t‚ÇÅ) =) ({: = val‚úù, nodup := nodup‚úù } \\ t‚ÇÅ ‚à™ { val := val‚úù, nodup := nodup‚úù } \\ t‚ÇÇ) :  :=   ['= val

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


inputs: theorem RootPairing.coreflection_eq_flip_reflection (Œπ: Type u_1) (R: Type u_2) (M: Type u_3) (N: Type u_4) (inst‚úù‚Å¥: CommRing R) (inst‚úù¬≥: AddCommGroup M) (inst‚úù¬≤: Module R M) (inst‚úù¬π: AddCommGroup N) (inst‚úù: Module R N) (i j: Œπ) (f: N) (toPerfectPairing‚úù: PerfectPairing R M N) (root‚úù: Œπ ‚Ü™ M) (coroot‚úù: Œπ ‚Ü™ N) (root_coroot_two‚úù: ‚àÄ (i : Œπ), (toPerfectPairing‚úù.toLin (root‚úù i)) (coroot‚úù i) = 2) (mapsTo_preReflection_root‚úù mapsTo_preReflection_coroot‚úù: ) ((i: Œπ), MapsTo (‚áë(preReflection (coroot‚úù i) (toPerfectPairing‚úù.toLin (root‚úù i)))) (range ‚áëcoroot‚úù) (range ‚áëcoroot‚úù)) (-(({ ({: = toPerfectPairing‚úù, root := root‚úù, coroot := coroot‚úù, root_coroot_two := root_coroot_two‚úù,) (mapsTo_preReflection_root: = mapsTo_preReflection_root‚úù,) (root_coroot_two: = root_coroot_two‚úù, mapsTo_preReflection_root := mapsTo_preReflection_root‚úù,) ({: = toPerfectPairing‚úù, root := root‚úù, coroot := coroot‚úù,) : f + :=
labels: theor

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


inputs: theorem Finsupp.prod_ne_zero_iff (Œ±: Type u_1) (Œπ: Type u_2) (Œ≥: Type u_3) (A: Type u_4) (B: Type u_5) (C: Type u_6) (inst‚úù‚Å∂: AddCommMonoid A) (inst‚úù‚Åµ: AddCommMonoid B) (inst‚úù‚Å¥: AddCommMonoid C) (t: Œπ ‚Üí A ‚Üí C) (h0: ‚àÄ (i : Œπ), t i 0 = 0) (h1: ‚àÄ (i : Œπ) (x y : A), t i (x + y) = t i x + t i y) (s: Finset Œ±) (f‚úù: Œ± ‚Üí Œπ ‚Üí‚ÇÄ A) (i: Œπ) (g‚úù: Œπ ‚Üí‚ÇÄ A) (k: Œπ ‚Üí A ‚Üí Œ≥ ‚Üí B) (x: Œ≥) (Œ≤: Type u_7) (M: Type u_8) (M': Type u_9) (N: Type u_10) (P: Type u_11) (G: Type u_12) (R: Type u_14) (S: Type u_15) (inst‚úù¬≥: Zero Œ±) (inst‚úù¬≤: CommMonoidWithZero Œ≤) (inst‚úù¬π: Nontrivial Œ≤) (inst‚úù: NoZeroDivisors Œ≤) (f: Œπ ‚Üí‚ÇÄ Œ±) (a: Œ±) (g: Œπ ‚Üí Œ± ‚Üí Œ≤) (‚ä¢ (‚àÄ (i: Œπ), f i ‚â† 0 ‚Üí g i (f i) ‚â† ?m.111048 * 0) ‚Üî f.prod g ‚â† ?m.111048 * 0) : Œ≤ :=
labels: theorem Finsupp.prod_ne_zero_iff (Œ±: Type u_1) (Œπ: Type u_2) (Œ≥: Type u_3) (A: Type u_4) (B: Type u_5) (C: Type u_6) (inst‚úù‚Å∂: AddCommMonoid A) (inst‚úù‚Åµ: AddCommMonoid B) 

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


inputs: theorem unitInterval.volume_def (nvar0: Set ‚ÜëI) (nvar1: ‚Ñù) : ‚àÉ b, (Measure.comap Subtype.val volume) nvar0 = ‚Üëb ‚àß b ‚â§ ‚ü®nvar1, nvar2‚ü© :=
labels:  ['0 ‚â§ nvar1', 'volume nvar0 = ‚Üë‚ü®nvar1, nvar2‚ü©']
Generated: theorem unitInterval.volume_def (nvar0: Set ‚ÜëI) (nvar1: ‚Ñù) : ‚àÉ b, (Measure.comap Subtype.val volume) nvar0 = ‚Üëb ‚àß b ‚â§ ‚ü®nvar1, nvar2‚ü© :=   ['0 ‚â§ nvar1'] ‚àß 0 ‚â§ ‚ü®nvar1, nvar2‚ü©']', 'volume nvar0 = ‚Üë‚ü®nvar1, nvar2‚ü©']']

CASE 6


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


inputs: theorem interior_sInter_subset (X: Type u) (Y: Type v) (Œπ: Sort w) (Œ±: Type u_1) (Œ≤: Type u_2) (x nvar0: X) (s s‚ÇÅ s‚ÇÇ t: Set X) (p p‚ÇÅ p‚ÇÇ: X ‚Üí Prop) (inst‚úù: TopologicalSpace X) (S: Set (Set X)) : nvar0 ‚àà ‚ãÇ s ‚àà S, ‚ãÉ‚ÇÄ {t | IsOpen t ‚àß t ‚äÜ s} :=
labels:  ['nvar0 ‚àà ‚ãÉ‚ÇÄ {t | IsOpen t ‚àß t ‚äÜ ‚ãÇ‚ÇÄ S}']
Generated: theorem interior_sInter_subset (X: Type u) (Y: Type v) (Œπ: Sort w) (Œ±: Type u_1) (Œ≤: Type u_2) (x nvar0: X) (s s‚ÇÅ s‚ÇÇ t: Set X) (p p‚ÇÅ p‚ÇÇ: X ‚Üí Prop) (inst‚úù: TopologicalSpace X) (S: Set (Set X)) : nvar0 ‚àà ‚ãÇ s ‚àà S, ‚ãÉ‚ÇÄ {t | IsOpen t ‚àß t ‚äÜ s} :=   ['nvar0 ‚àà ‚ãÇ‚ÇÄ S'] ‚àÄ t ‚àà S, nvar0 ‚àà t'] ‚àß t ‚äÜ ‚ãÇ‚ÇÄ

CASE 7


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


inputs: theorem RootPairing.coreflection_eq_flip_reflection (Œπ: Type u_1) (R: Type u_2) (M: Type u_3) (N: Type u_4) (inst: CommRing R) (inst_1: AddCommGroup M) (inst_3: AddCommGroup N) (inst_4: Module R N) (i j: Œπ) (f: N) (root: Œπ ‚Ü™ M) (coroot: Œπ ‚Ü™ N) (root_coroot_two: ‚àÄ (i : Œπ), (toPerfectPairing.toLin (root i)) (coroot i) = 2) (mapsTo_preReflection_root: = mapsTo_preReflection_root,) ((i: Œπ), MapsTo (‚áë(preReflection (coroot i) (toPerfectPairing.toLin (root i)))) (range ‚áëcoroot) (range ‚áëcoroot)) (‚ä¢ ({ {: = toPerfectPairing, root := root, coroot := coroot, root_coroot_two := root_coroot_two,) :  :=
labels:  ['Module R M', '= toPerfectPairing, root := root, coroot := coroot,
Generated: theorem RootPairing.coreflection_eq_flip_reflection (Œπ: Type u_1) (R: Type u_2) (M: Type u_3) (N: Type u_4) (inst: CommRing R) (inst_1: AddCommGroup M) (inst_3: AddCommGroup N) (inst_4: Module R N) (i j: Œπ) (f: N) (root: Œπ ‚Ü™ M) (coroot: Œπ ‚Ü™ N) (root_coroot_two: ‚àÄ (i : Œπ), (t

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


inputs: theorem AdjoinRoot.smul_mk (R: Type u) (S: Type v) (K: Type w) (inst‚úù¬≤: CommRing R) (f: R[X]) (inst‚úù¬π: DistribSMul S R) (inst‚úù: IsScalarTower S R R) (a ‚ä¢ Quotient.map' (fun => x) ‚ãØ ((mk f) = (mk (a { toFinsupp: = x.1 })) :  :=
labels:  ['= x.1 })', '= x.1 })']
Generated: theorem AdjoinRoot.smul_mk (R: Type u) (S: Type v) (K: Type w) (inst‚úù¬≤: CommRing R) (f: R[X]) (inst‚úù¬π: DistribSMul S R) (inst‚úù: IsScalarTower S R R) (a ‚ä¢ Quotient.map' (fun => x) ‚ãØ ((mk f) = (mk (a { toFinsupp: = x.1 })) :  :=   ['= x.1 })', '= x.1 })']']']']']']']']']']']']']']']']']']']']']']']']']']']']']']']']']']

CASE 9
inputs: theorem LinearEquiv.comp_toLinearMap_symm_eq (R: Type u_1) (R‚ÇÅ: Type u_2) (R‚ÇÇ: Type u_3) (R‚ÇÉ: Type u_4) (k: Type u_5) (K: Type u_6) (S: Type u_7) (M: Type u_8) (M‚ÇÅ: Type u_9) (M‚ÇÇ: Type u_10) (M‚ÇÉ: Type u_11) (N‚ÇÅ: Type u_12) (N‚ÇÇ: Type u_13) (N‚ÇÉ: Type u_14) (N‚ÇÑ: Type u_15) (Œπ: Type u_16) (M‚ÇÑ: Type u_17) (inst‚úù¬π‚Å∑: Semiring R) (inst‚úù

In [25]:
instructions = [
    "theorem mul_right_inv (G: Type u_1) (inst‚úù : Group G) (a : G) : a * a‚Åª¬π = 1 :=",
    "theorem fact1 (a: ‚Ñù) (b: ‚Ñù) : a * b * 2 ‚â§ a ^ 2 + b ^ 2 :=",
    "theorem x_pos_neg_1 (x: ‚Ñù) : x = 1 ‚à® x = -1 :=",
    "theorem Equiv.embeddingFinSucc_fst (m n‚úù n: ‚Ñï) (Œπ: Type u_1) : ‚áë((embeddingFinSucc n Œπ) e).fst = ‚áëe ‚àò Fin.succ :="
    #"theorem monotone_f (a: ‚Ñù) (b: ‚Ñù) (f‚úù: ‚Ñù ‚Üí ‚Ñù) (h: ‚àÄ {f : ‚Ñù ‚Üí ‚Ñù}, Monotone f ‚Üí ‚àÄ {a b : ‚Ñù}, f a ‚â§ f b ‚Üí a ‚â§ b) (f: ‚Ñù ‚Üí ‚Ñù := fun x ‚Ü¶ 0) : Monotone f :="
]

predict_hyp(instructions[3])

"theorem Equiv.embeddingFinSucc_fst (m n‚úù n: ‚Ñï) (Œπ: Type u_1) : ‚áë((embeddingFinSucc n Œπ) e).fst = ‚áëe ‚àò Fin.succ :=   ['Fin (n + 1) ‚Ü™ Œπ'] 'n = 0'] (n + 1)'] (n + 1) = (n + 1)']']'] (n +"