### Download JSON files with evaluation data

In [1]:
! wget "https://raw.githubusercontent.com/winterForestStump/thesis/main/retrieval/evaluation/eval_1_cosine.json"
! wget "https://raw.githubusercontent.com/winterForestStump/thesis/main/retrieval/evaluation/eval_1_ip.json"
! wget "https://raw.githubusercontent.com/winterForestStump/thesis/main/retrieval/evaluation/eval_1_l2.json"
! wget "https://raw.githubusercontent.com/winterForestStump/thesis/main/retrieval/evaluation/eval_2_cosine.json"
! wget "https://raw.githubusercontent.com/winterForestStump/thesis/main/retrieval/evaluation/eval_2_ip.json"
! wget "https://raw.githubusercontent.com/winterForestStump/thesis/main/retrieval/evaluation/eval_2_l2.json"

--2024-06-04 07:24:46--  https://raw.githubusercontent.com/winterForestStump/thesis/main/retrieval/evaluation/eval_1_cosine.json
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1624588 (1.5M) [text/plain]
Saving to: 'eval_1_cosine.json'


2024-06-04 07:24:46 (63.4 MB/s) - 'eval_1_cosine.json' saved [1624588/1624588]

--2024-06-04 07:24:47--  https://raw.githubusercontent.com/winterForestStump/thesis/main/retrieval/evaluation/eval_1_ip.json
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1632016 (1.6M) [text/plain]
Saving to: 'eval

### Combine downloaded JSON files into one

In [2]:
import pandas as pd
import json

json_files = [
    'eval_1_cosine.json',
    'eval_1_ip.json',
    'eval_1_l2.json',
    'eval_2_cosine.json',
    'eval_2_ip.json',
    'eval_2_l2.json']

df_0 = pd.read_json('eval_1_cosine.json')
for file in json_files[1:]:
    df = pd.read_json(file)
    df_0 = pd.concat([df_0, df], ignore_index=True)

df_0.to_json('combined.json', orient='records', lines=True)
print("Files have been merged successfully!")

Files have been merged successfully!


### Filter Approach 1 instances

In [3]:
data = pd.read_json('combined.json',orient='records', lines=True)
data = data[data['approach'] == 1]
len(data)

2525

### Group the number of answers by questions

In [4]:
data_total = data.groupby('question')['answer'].count().reset_index()
data_total.rename(columns={'answer':'total'}, inplace=True)

### Filter instances with the 'yes' score

In [5]:
filtered_df = data[data['answer'].apply(lambda x: isinstance(x, dict) and x.get('score') == 'yes')]
len(filtered_df)

1440

### Group the number of correct answers by questions

In [6]:
data_filtered = filtered_df.groupby('question')['answer'].count().reset_index()
data_filtered.rename(columns={'answer':'correct'}, inplace=True)

### Merge two tables: all and correct answers grouped by questions

In [7]:
new_data = data_total.merge(data_filtered, how='left', on='question')
new_data['correct'] = new_data['correct'].fillna(0)
len(new_data)

35

In [8]:
new_data['percentage,%'] = new_data['correct'] / new_data['total'] * 100
new_data.sort_values(by='percentage,%', inplace=True, ascending=False)
new_data['correct'] = new_data['correct'].astype(int)
new_data['percentage,%'] = new_data['percentage,%'].astype(int)

In [13]:
new_data.style

Unnamed: 0,question,total,correct,"percentage,%"
16,What does the company foresee in terms of future growth and challenges and are there any strategic plans outlined for the upcoming years?,74,71,95
31,What is the effective tax rate for the company?,73,70,95
0,Are there any ongoing legal proceedings against the company?,73,68,93
29,What is the company's operating income and how does it compare to the previous years?,68,61,89
33,What potential impact could legal issues have on the business of the company?,77,69,89
1,Are there any tax-related risks or benefits for the company mentioned?,75,66,88
34,Who are the company's main competitors and how does the company differentiate itself?,66,56,84
5,How does the company manage currency risk and are there impacts on financials due to currency fluctuations?,71,58,81
32,What is the total revenue generated by the company and how has the revenue changed over the past few years?,67,54,80
11,What are the company's critical accounting policies disclosed and how might changes in these policies affect financial statements?,70,56,80


### LateX syntax

In [10]:
latex_table = new_data.to_latex(index=False)
print(latex_table)

\begin{tabular}{lrrr}
\toprule
question & total & correct & percentage,% \\
\midrule
What does the company foresee in terms of future growth and challenges and are there any strategic plans outlined for the upcoming years? & 74 & 71 & 95 \\
What is the effective tax rate for the company? & 73 & 70 & 95 \\
Are there any ongoing legal proceedings against the company? & 73 & 68 & 93 \\
What is the company's operating income and how does it compare to the previous years? & 68 & 61 & 89 \\
What potential impact could legal issues have on the business of the company? & 77 & 69 & 89 \\
Are there any tax-related risks or benefits for the company mentioned? & 75 & 66 & 88 \\
Who are the company's main competitors and how does the company differentiate itself? & 66 & 56 & 84 \\
How does the company manage currency risk and are there impacts on financials due to currency fluctuations? & 71 & 58 & 81 \\
What is the total revenue generated by the company and how has the revenue changed over the pas