In [None]:
# Install required packages
!pip install torch transformers tqdm accelerate requests huggingface_hub

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import json
import os
from tqdm import tqdm
import requests
import time
import subprocess
from huggingface_hub import snapshot_download

# Check GPU availability
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [None]:
from google.colab import files
uploaded = files.upload()  # Then select your finqa_predictions.json file

In [None]:
os.makedirs("outputs", exist_ok=True)
!mv finqa_predictions.json outputs/

mv: cannot stat 'finqa_predictions.json': No such file or directory


In [None]:
def download_finqa_dataset():
    """Download FinQA dataset if not present."""
    if not os.path.exists("data/finqa/test.json"):
        print("Downloading FinQA dataset...")
        os.makedirs("data/finqa", exist_ok=True)

        url = "https://raw.githubusercontent.com/czyssrs/FinQA/main/dataset/test.json"
        response = requests.get(url)
        if response.status_code == 200:
            with open("data/finqa/test.json", "wb") as f:
                f.write(response.content)
            print("Successfully downloaded FinQA test dataset")
        else:
            raise Exception(f"Failed to download FinQA dataset. Status code: {response.status_code}")

def download_evaluation_script():
    """Download FinQA evaluation script from correct subpath."""
    os.makedirs("finqa", exist_ok=True)
    path = "finqa/evaluate.py"
    url = "https://raw.githubusercontent.com/czyssrs/FinQA/main/code/evaluate/evaluate.py"

    print("Downloading evaluation script from:", url)
    response = requests.get(url)
    if response.status_code == 200:
        with open(path, "wb") as f:
            f.write(response.content)
        print("Successfully downloaded to:", path)
    else:
        raise Exception(f"Failed to download script. Status code: {response.status_code}")

def download_model_with_retry(model_name, max_retries=3, retry_delay=10):
    """Download model with retry logic."""
    for attempt in range(max_retries):
        try:
            print(f"Attempt {attempt + 1} to download model...")
            snapshot_download(
                repo_id=model_name,
                local_dir=f"./models/{model_name}",
                local_dir_use_symlinks=False,
                resume_download=True
            )
            return True
        except Exception as e:
            print(f"Attempt {attempt + 1} failed: {str(e)}")
            if attempt < max_retries - 1:
                print(f"Waiting {retry_delay} seconds before retrying...")
                time.sleep(retry_delay)
            else:
                raise Exception(f"Failed to download model after {max_retries} attempts")

In [None]:
download_finqa_dataset()
download_evaluation_script()


Downloading FinQA dataset...
Successfully downloaded FinQA test dataset
Downloading evaluation script from: https://raw.githubusercontent.com/czyssrs/FinQA/main/code/evaluate/evaluate.py
Successfully downloaded to: finqa/evaluate.py


In [None]:
def load_model_and_tokenizer():
    """Load Mistral 7B Instruct model and tokenizer."""
    print("Loading Mistral 7B Instruct model and tokenizer...")
    model_name = "mistralai/Mistral-7B-Instruct-v0.2"

    os.makedirs("./models", exist_ok=True)
    download_model_with_retry(model_name)

    model_path = f"./models/{model_name}"

    tokenizer = AutoTokenizer.from_pretrained(
        model_path,
        trust_remote_code=True  # Needed for Mistral tokenizer special handling
    )

    model = AutoModelForCausalLM.from_pretrained(
        model_path,
        torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
        device_map="auto",
        trust_remote_code=True  # Keep this True for full compatibility
    )

    print("Model and tokenizer loaded successfully")
    return model, tokenizer


def load_finqa_data(split="test"):
    """Load FinQA dataset."""
    if not os.path.exists(f"data/finqa/{split}.json"):
        download_finqa_dataset()

    print(f"Loading {split} dataset...")
    try:
        with open(f"data/finqa/{split}.json", "r") as f:
            data = json.load(f)
        print(f"Successfully loaded {len(data)} examples")
        return data
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON file: {e}")
        print("Attempting to fix JSON file...")
        with open(f"data/finqa/{split}.json", "r") as f:
            content = f.read().strip()
            if not content.endswith("]"):
                content += "]"
            try:
                data = json.loads(content)
                print(f"Successfully fixed and loaded {len(data)} examples")
                return data
            except json.JSONDecodeError as e2:
                raise Exception(f"Failed to fix JSON file: {e2}")

In [None]:
def format_table(table):
    """Format table for display."""
    col_widths = [max(len(str(row[i])) for row in table) for i in range(len(table[0]))]
    formatted_rows = []
    for row in table:
        formatted_row = " | ".join(str(cell).ljust(width) for cell, width in zip(row, col_widths))
        formatted_rows.append(formatted_row)
    formatted_rows.insert(1, "-" * len(formatted_rows[0]))
    return "\n".join(formatted_rows)

def format_prompt(item):
    """Format the input prompt for the model."""
    context = "\n".join(item["pre_text"] + item["post_text"])
    table_info = format_table(item["table"])
    question = item["qa"]["question"]

    return f"""You are a financial analysis assistant. Your task is to analyze the given financial text and table to answer the question.

            IMPORTANT INSTRUCTIONS:
            1. Carefully examine the text and table
            2. Perform any necessary calculations to find the answer
            3. Clearly show your reasoning step by step
            4. Always include the final answer in this exact format:
            [FINAL_ANSWER]your_numeric_answer[/FINAL_ANSWER]

            Examples:
            [FINAL_ANSWER]25.5[/FINAL_ANSWER]
            [FINAL_ANSWER]1234.56[/FINAL_ANSWER]
            [FINAL_ANSWER]0.75[/FINAL_ANSWER]

            Do NOT include:
            - Any units, symbols, or extra text after the final answer
            - Any follow-up explanations after the final answer tag

            Text:
            {context}

            Table:
            {table_info}

            Question: {question}

            Let's solve this step by step."""


def generate_answer(model, tokenizer, prompt, max_new_tokens=256):
    """Generate answer using Phi-2."""
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048).to(model.device)

    outputs = model.generate(
        **inputs,
        max_new_tokens=max_new_tokens,
        num_return_sequences=1,
        temperature=0.7,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id
    )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    answer = response[len(prompt):].strip()
    return answer

In [None]:
from huggingface_hub import login

login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
# Create output directory
os.makedirs("outputs", exist_ok=True)

# Load model and tokenizer
model, tokenizer = load_model_and_tokenizer()

# Load test data
test_data = load_finqa_data(split="test")

# Run inference
print("Running inference...")
predictions = []

for item in tqdm(test_data[:600]):
    prompt = format_prompt(item)
    answer = generate_answer(model, tokenizer, prompt)

    predictions.append({
        "id": item["id"],
        "answer": answer
    })

    # def extract_final_answer(text):
    # # Look for a number or yes/no near the end
    # matches = re.findall(r"[-+]?\d*\.\d+|\d+|yes|no", text.lower())
    # return matches[-1] if matches else "N/A"

    # predictions.append({
    #     "id": item["id"],
    #     "answer": extract_final_answer(answer)
    # })

# Save predictions
pred_file = "outputs/finqa_predictions.json"
print(f"Saving predictions to {pred_file}...")
with open(pred_file, "w") as f:
    json.dump(predictions, f, indent=2)
print("Predictions saved successfully")

import json

with open("outputs/finqa_predictions.json", "r") as f:
    preds = json.load(f)

print(f"Loaded {len(preds)} predictions.")
print(preds[:2])  # print first 2 predictions as a sample

from google.colab import files

# Download original predictions
files.download("outputs/finqa_predictions.json")


Loading Mistral 7B Instruct model and tokenizer...
Attempt 1 to download model...


Fetching 16 files:   0%|          | 0/16 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Model and tokenizer loaded successfully
Loading test dataset...
Successfully loaded 1147 examples
Running inference...


100%|██████████| 600/600 [1:36:13<00:00,  9.62s/it]

Saving predictions to outputs/finqa_predictions.json...
Predictions saved successfully
Loaded 600 predictions.
[{'id': 'ETR/2016/page_23.pdf-2', 'answer': "First, let's find the net revenue for both 2014 and 2015 from the table.\n\n            Net revenue in 2014: $ 5735 million\n            Net revenue in 2015: $ 5829 million\n\n            Next, we need to find the difference between the net revenue in 2015 and the net revenue in 2014 to determine the net change in net revenue.\n\n            Net change in net revenue = Net revenue in 2015 - Net revenue in 2014\n                                         = $ 5829 million - $ 5735 million\n                                         = $ 94 million\n\n            [FINAL_ANSWER]94[/FINAL_ANSWER]\n\n            Therefore, the net change in net revenue during 2015 for Entergy Corporation was $94 million."}, {'id': 'INTC/2015/page_41.pdf-4', 'answer': 'First, we need to find the total square footage of all facilities.\n\n            The total s




<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
import sys
sys.path.append("finqa")

from evaluate import evaluate_result

In [None]:
with open("outputs/finqa_predictions.json", "w") as f:
    json.dump(predictions, f, indent=2)

In [None]:
import json

with open("outputs/finqa_predictions.json", "r") as f:
    preds = json.load(f)

print(f"Loaded {len(preds)} predictions.")
print(preds[:2])  # print first 2 predictions as a sample

FileNotFoundError: [Errno 2] No such file or directory: 'outputs/finqa_predictions.json'

In [None]:
from google.colab import files

# Download original predictions
files.download("outputs/finqa_predictions.json")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
import json

# Load original prediction file
with open("outputs/finqa_predictions.json", "r") as f:
    data = json.load(f)

# Rename keys
for item in data:
    item["predicted"] = item.pop("answer")

# Save updated version
with open("outputs/finqa_predictions_fixed.json", "w") as f:
    json.dump(data, f, indent=2)

print("✅ Fixed prediction file saved to outputs/finqa_predictions_fixed.json")

✅ Fixed prediction file saved to outputs/finqa_predictions_fixed.json


In [None]:
pred_file = "outputs/finqa_predictions_fixed.json"
gold_file = "data/finqa/test.json"
evaluate_result(pred_file, gold_file)

structure error
structure error
structure error
structure error
structure error
structure error
structure error
structure error
structure error
structure error
structure error
structure error
structure error
structure error
structure error
structure error
structure error
structure error
structure error
structure error
structure error
structure error
structure error
structure error
structure error
structure error
structure error
structure error
structure error
structure error
structure error
structure error
structure error
structure error
structure error
structure error
structure error
structure error
structure error
structure error
structure error
structure error
structure error
structure error
structure error
structure error
structure error
structure error
structure error
structure error
structure error
structure error
structure error
structure error
structure error
structure error
structure error
structure error
structure error
structure error
structure error
structure error
structur

(0.0, 0.0)

In [None]:
with open("outputs/finqa_predictions_fixed.json") as f1, open("data/finqa/test.json") as f2:
    preds = {x["id"]: x["predicted"] for x in json.load(f1)}
    golds = {x["id"]: x["qa"]["exe_ans"] for x in json.load(f2)}

for qid in list(golds.keys())[:10]:  # Just print first 10
    print(f"ID: {qid}")
    print(f"Predicted: {preds.get(qid, '❌ MISSING')}")
    print(f"Gold:      {golds[qid]}")
    print("-" * 30)


ID: ETR/2016/page_23.pdf-2
Predicted: First, let's find the net revenue for 2014 and 2015 from the text and table.

            According to the text, the net revenue for 2014 was $ 5735 million.
            According to the table, the net revenue for 2015 was $ 5829 million.

            Next, let's calculate the net change in net revenue by subtracting the net revenue for 2014 from the net revenue for 2015.

            [FINAL_ANSWER]5829 - 5735[/FINAL_ANSWER} 94

            Therefore, the net change in net revenue during 2015 for Entergy Corporation was an increase of $ 94 million.
Gold:      94.0
------------------------------
ID: INTC/2015/page_41.pdf-4
Predicted: First, we need to find the total square footage of all facilities. We can do this by adding up the square footage for owned facilities and leased facilities:

            30.7 (owned, US) + 17.2 (owned, other) + 2.1 (leased, US) + 6.0 (leased, other) = 49.0 million square feet

            Next, we need to find the squa