In [1]:
import torch
from pathlib import Path
from src.prompt_manager import PromptManager

from src.data_manager import DataManager
from src import data_processing
import pandas as pd

In [2]:
df_qa = pd.read_parquet('../data/raw/MCGILL_QA_FEEDBACK.parquet')
df_qa['token_count'] = (df_qa['question'] +" "+ df_qa['answer']).str.split().str.len()


df_deals = pd.read_parquet('../data/raw/BARTER_DEALS.parquet')
df_deals['token_count'] = df_deals.deal_text.str.split().str.len()

dfs = [df_qa, df_deals]

# McGill FeedbackQA


## Features

- disagreement

In [3]:
qa_relevant_columns = ['question', 'answer', 'rating', 'explanation_1', 'explanation_2', 'human_disagreement']

df_qa['human_disagreement'] = abs(df_qa.score_1 - df_qa.score_2)
df_qa.human_disagreement.value_counts()

human_disagreement
0    2701
1    1896
2     756
3     307
Name: count, dtype: int64

- Question frequency
    - Some questions are asked twice, e.g. see figure 1, where the question on respiratory shortages is posed twice with both a bad and a good answer. This would make for an interesting comparison. We see that each question is asked at least twice. 

In [4]:
df_qa['question_count'] = df_qa.groupby('question')['question'].transform('count')
df_qa['question_disagreement'] = df_qa.groupby('question')['human_disagreement'].transform('sum')
df_qa['question'].value_counts()

question
Is it safe for me to manually replace my IUD at home?                                                                              3
How do I get help finding a job?                                                                                                   3
If I am in Australia on a worker holiday marker visa, can I apply for a second visa because I work in a critical COVID-19 area?    3
Do immigration detention centers have proper PPE?                                                                                  3
information for job seekers  during the COVID-19 outbreak                                                                          3
                                                                                                                                  ..
What do Family Day Care service providers need to know when their educators don't have an ABN?                                     3
Who can provide telehealth services and how do they bill for

## Analysis

In [5]:
large_disagreement = df_qa[df_qa.human_disagreement == 3]
large_disagreement_clean = large_disagreement[qa_relevant_columns]
large_disagreement_clean

Unnamed: 0,question,answer,rating,explanation_1,explanation_2,human_disagreement
31,Is it still possible to access medical appoint...,If you are struggling to breathe or seriously ...,"['Excellent', 'Bad']",Explains the different ways doctors may handle...,This answer relates to emergency situations an...,3
35,what qualifies one to go into quarantine duri...,Quarantine is when you are well but may have b...,"['Excellent', 'Bad']",Brief but clear explanation of when people nee...,This does not answer the question. This answer...,3
99,Can I still have my clinical education?,As health service providers respond to the COV...,"['Excellent', 'Bad']",Says that some student educational programs ha...,This answer does not relate to clinical educat...,3
141,Does there exist any visa to support people wh...,If you have evidence from your employer that y...,"['Excellent', 'Bad']",It provides helpful information on what needs ...,This does not answer the question. This inform...,3
158,What should household members and caregivers d...,You should only share a house with a person wi...,"['Bad', 'Excellent']",Answer refers to people who are staying with s...,Has clear information about what steps people ...,3
...,...,...,...,...,...,...
5641,What are the different ways through which I ca...,There is currently no evidence that people can...,"['Excellent', 'Bad']",Describes how conronavirus does and does not s...,This answer does not relate to other ways that...,3
5645,Are tuberculosis testing supplies disrupted by...,Appropriate planning and monitoring are essent...,"['Bad', 'Excellent']",This answer does not provide any information o...,This answer talks about the supply of TB tests...,3
5646,Are tuberculosis testing supplies disrupted by...,The diagnostic methods for tuberculosis (TB) a...,"['Excellent', 'Bad']",This answer addresses the fact that the two te...,This answer does not provide any information o...,3
5654,Can you explain to me what self-care is,Self-care interventions recommended by WHO are...,"['Excellent', 'Bad']",This answer does a decent job at explaining wh...,This answer is irrelevant to the question. Thi...,3


In [6]:
no_disagreement = df_qa[df_qa.human_disagreement == 0]
no_disagreement_clean = no_disagreement[qa_relevant_columns]
no_disagreement_clean

Unnamed: 0,question,answer,rating,explanation_1,explanation_2,human_disagreement
1,How do I get help finding a job?,In this rapidly changing jobs market the Austr...,"['Excellent', 'Excellent']","A link to a job search website is included, as...","Includes a link to a Jobs Hub page, which is b...",0
5,If I am in Australia on a worker holiday marke...,You can apply for a further WHM visa if you ha...,"['Excellent', 'Excellent']",Answer instructs the requester to apply for a ...,Has a detailed description of the circumstance...,0
6,Do immigration detention centers have proper PPE?,If clinically indicated detainees will be test...,"['Bad', 'Bad']",Does not address the matter at issue in the qu...,The question is about the availability of PPE ...,0
7,Do immigration detention centers have proper PPE?,It has been observed that most religious group...,"['Bad', 'Bad']",Does not address the matter at issue in the qu...,The question is about prevalence of PPE in det...,0
8,Do immigration detention centers have proper PPE?,PPE levels are being closely monitored to ensu...,"['Acceptable', 'Acceptable']",Claims it monitors PPE levels in the immigrant...,This gives a general answer to the question bu...,0
...,...,...,...,...,...,...
5652,How can practioners provide services to chroni...,It is important to assure continuous access to...,"['Excellent', 'Excellent']",Has detailed information about providing servi...,This answer gives a lot of information. The i...,0
5655,Can you explain to me what self-care is,WHO’s definition of self care is the ability o...,"['Excellent', 'Excellent']",This answer explains what self care is directly.,This is a direct answer to the question. This ...,0
5657,Is it safe for me to manually replace my IUD a...,"If you do not want to become pregnant, you sho...","['Bad', 'Bad']",Does not address whether or not replacing an I...,The answer is irrelevant to the question. This...,0
5658,Is it safe for me to manually replace my IUD a...,No. Disposable medical face masks are intended...,"['Bad', 'Bad']",Does not answer whether or not IUD can be safe...,This answer is irrelevant to the question. Thi...,0


In [7]:
df_qa['token_count'].describe()

count    5660.000000
mean      166.071731
std       143.826120
min         6.000000
25%        80.000000
50%       124.000000
75%       200.000000
max      2306.000000
Name: token_count, dtype: float64

In [8]:
df_qa[df_qa['question'] == "Is it still possible to access medical appointments and medications through my doctor"][qa_relevant_columns]

Unnamed: 0,question,answer,rating,explanation_1,explanation_2,human_disagreement
30,Is it still possible to access medical appoint...,For more information for patients on accessing...,"['Acceptable', 'Acceptable']",Doesn't say whether you can set up medical app...,"This answer, while related to the question, gi...",0
31,Is it still possible to access medical appoint...,If you are struggling to breathe or seriously ...,"['Excellent', 'Bad']",Explains the different ways doctors may handle...,This answer relates to emergency situations an...,3
32,Is it still possible to access medical appoint...,You can access bulk-billed appointments with y...,"['Excellent', 'Acceptable']",Gives information on how patients can access a...,This answer relates to accessing medical appoi...,1


In [9]:
df_qa

Unnamed: 0.1,Unnamed: 0,question,passage,feedback,rating,domain,review_1,explanation_1,review_2,explanation_2,score_1,score_2,answer,input_id,token_count,human_disagreement,question_count,question_disagreement
0,0,How do I get help finding a job?,"{'passage_id': 140, 'reference': {'page_title'...",['Has a link to detailed information about gov...,"['Excellent', 'Could be Improved']",Australia,Excellent,Has a link to detailed information about gover...,Could be Improved,"This answer provides a link for job searches, ...",4,2,If you are a current job seeker or participant...,eff9e000675931f5,153,2,3,4
1,1,How do I get help finding a job?,"{'passage_id': 139, 'reference': {'page_title'...","['A link to a job search website is included, ...","['Excellent', 'Excellent']",Australia,Excellent,"A link to a job search website is included, as...",Excellent,"Includes a link to a Jobs Hub page, which is b...",4,4,In this rapidly changing jobs market the Austr...,c8be913323f10444,88,0,3,4
2,2,How do I get help finding a job?,"{'passage_id': 126, 'reference': {'page_title'...",['Talks about tax credits for businesses that ...,"['Bad', 'Acceptable']",Australia,Bad,Talks about tax credits for businesses that hi...,Acceptable,"This answer discusses the Employment Fund, whi...",1,3,To further assist job seekers to prepare for a...,31effc925bc04105,103,2,3,4
3,3,If I am in Australia on a worker holiday marke...,"{'passage_id': 581, 'reference': {'page_title'...","[""Answer is about Working Holiday Makers, but ...","['Could be Improved', 'Acceptable']",Australia,Could be Improved,"Answer is about Working Holiday Makers, but do...",Acceptable,Answer is rather cut and dry but is also a lit...,2,3,No. Existing arrangements for specified work w...,610d0764ed04054d,180,1,3,2
4,4,If I am in Australia on a worker holiday marke...,"{'passage_id': 577, 'reference': {'page_title'...","[""Discusses pandemic visas. Doesn't mention th...","['Bad', 'Could be Improved']",Australia,Bad,Discusses pandemic visas. Doesn't mention the ...,Could be Improved,This answer is very vague and does not answer ...,1,2,The COVID-19 Pandemic event visa can only be g...,033e9fcef5d75297,39,1,3,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5655,5655,Can you explain to me what self-care is,"{'passage_id': 159, 'reference': {'page_title'...",['This answer explains what self care is direc...,"['Excellent', 'Excellent']",WHO,Excellent,This answer explains what self care is directly.,Excellent,This is a direct answer to the question. This ...,4,4,WHO’s definition of self care is the ability o...,87acd0f1a053ee69,44,0,3,6
5656,5656,Can you explain to me what self-care is,"{'passage_id': 158, 'reference': {'page_title'...","[""This answer talks about what self care is an...","['Excellent', 'Bad']",WHO,Excellent,This answer talks about what self care is and ...,Bad,This does not answer the question. This answer...,4,1,With the major disruptions to the normal funct...,77641257547b9e1a,94,3,3,6
5657,5657,Is it safe for me to manually replace my IUD a...,"{'passage_id': 70, 'reference': {'page_title':...",['Does not address whether or not replacing an...,"['Bad', 'Bad']",WHO,Bad,Does not address whether or not replacing an I...,Bad,The answer is irrelevant to the question. This...,1,1,"If you do not want to become pregnant, you sho...",16b4ea5c8b2b6721,84,0,3,0
5658,5658,Is it safe for me to manually replace my IUD a...,"{'passage_id': 103, 'reference': {'page_title'...",['Does not answer whether or not IUD can be sa...,"['Bad', 'Bad']",WHO,Bad,Does not answer whether or not IUD can be safe...,Bad,This answer is irrelevant to the question. Thi...,1,1,No. Disposable medical face masks are intended...,f2610cfa76f5bbde,81,0,3,0


# Barter Deals

In [10]:
df_deals['token_count'].describe()

count    5111.000000
mean       79.271180
std        73.645539
min         0.000000
25%        32.500000
50%        59.000000
75%       105.000000
max       886.000000
Name: token_count, dtype: float64

## Company popularity analysis

Findings:
- Some partners that post multiple successful deals are active users of the platform, and obtain many applications
    - There is likely a relationship between "platform activity" and "deal success". This leads to simultaneity:
        - We predict applicants using marketing copy quality. Companies that experience success on the platform will post more deals of higher quality (since they will be more motivated to do so by their success). 
            - -> High application deals will automatically have better copy quality, because the locations were better to begin with. 
    - We solve this with the Mundlak decomposition by accounting for the unobserved Location effect and modeling the within-company effect
- Some partners post deals in multiple categories, which we can subdivide by using location_id

In [11]:
a = df_deals.groupby('partner_id')['applicants_applications_count'].sum().sort_values(ascending=False)[:20]

In [12]:
top_10_companies = df_deals[df_deals['partner_id'].isin(a.index)]
top_10_companies['company_total_apps'] = top_10_companies.groupby('partner_id')['applicants_applications_count'].transform("sum")
top_10_companies

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  top_10_companies['company_total_apps'] = top_10_companies.groupby('partner_id')['applicants_applications_count'].transform("sum")


Unnamed: 0,applicants_applications_count,content_types,deal_id,main_image,min_social_media_followers,deal_tags,live_since,created_at,updated_at,deleted_at,...,gender,featured_image,company_id,test_nr_of_apps,update_at_date,diff,apps_after_7_days,apps_after_7_days_OLD,token_count,company_total_apps
0,39,"[{'id': 41, 'name': 'Activities', 'slug': 'per...",0196edc7-e6a8-00c8-6ff5-ea60fa3f2895,uploads/deals/0196edc7-5969-ffff-6700-7ae699b8...,2500,,2025-05-20 15:21:28.249567,2025-05-20 13:00:23.080212,2025-10-28 10:55:34.719187,NaT,...,,,0199f1d4-b501-015e-5c45-1bc8300b3949,39,2025-10-28,0,39,39.0,107,3706
2,23,"[{'id': 29, 'name': 'UGC', 'slug': 'Film-Strip'}]",019689e2-bfb5-00c8-6485-23352d64b0e5,uploads/deals/019689e2-c06c-ffff-baf0-c7e9decd...,5000,,2025-03-04 11:46:18.176627,2025-03-04 11:46:18.176627,2025-05-01 03:27:41.316646,NaT,...,,,,23,2025-05-01,0,13,13.0,63,1160
5,73,"[{'id': 31, 'name': 'Lifestyle', 'slug': 'Mart...",019689e2-efa8-00c8-0aff-9b643cb6337b,uploads/deals/019689e2-f073-ffff-29f7-5fc60421...,5000,,2025-03-05 12:10:51.501958,2025-03-05 12:10:51.501958,2025-05-01 03:27:53.621989,NaT,...,,,,73,2025-05-01,0,33,33.0,64,1160
12,0,"[{'id': 41, 'name': 'Activities', 'slug': 'per...",01985615-068a-00c8-502e-689ad4f46f4f,uploads/deals/01985615-0778-ffff-5f39-475ebacc...,1500,,NaT,2025-07-29 12:07:55.274571,2025-07-29 12:08:05.638372,NaT,...,,,,0,2025-07-29,0,0,,3,3706
52,0,"[{'id': 39, 'name': 'Experiences', 'slug': 'Te...",019689e4-3a57-00c8-7868-45557942d2d8,uploads/deals/019689e4-3af8-ffff-5d5b-d3e82ea9...,2500,,NaT,2023-10-17 16:28:45.927833,2025-05-01 03:29:18.311782,2024-05-31 08:26:35.053459+00:00,...,,,,0,2025-05-01,0,0,,32,3706
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4964,9,"[{'id': 35, 'name': 'Mom', 'slug': 'Baby'}]",01989848-a2ce-00c8-23e3-c6768bcb7246,uploads/deals/01989848-6fe4-ffff-12d8-4b5eeba6...,2500,,2025-08-11 08:39:14.081970,2025-08-11 08:39:13.872022,2025-08-11 08:39:14.114160,NaT,...,,,,9,2025-08-11,0,8,3.0,134,1473
4974,12,"[{'id': 29, 'name': 'UGC', 'slug': 'Film-Strip...",019689e2-d0fe-00c8-59e6-903960feec2b,uploads/deals/019689e2-d21d-ffff-6b5b-b1d9c169...,2500,,2025-04-07 11:09:44.777404,2025-04-07 11:09:44.777404,2025-10-08 07:50:50.593540,NaT,...,,,,12,2025-10-08,0,3,0.0,52,1473
5032,36,"[{'id': 29, 'name': 'UGC', 'slug': 'Film-Strip...",019702fb-c43e-00c8-952c-9fab143a893f,uploads/deals/019702fb-4778-ffff-63d6-1893bce7...,1500,,2025-05-24 15:49:03.852349,2025-05-24 15:49:03.678463,2025-05-24 15:49:03.872785,NaT,...,,,,36,2025-05-24,0,10,20.0,22,1473
5099,74,"[{'id': 39, 'name': 'Experiences', 'slug': 'Te...",01985afa-00dd-00c8-9469-b4e1d4090baf,uploads/deals/01985af9-a0f9-ffff-39de-179c9d4a...,1500,,2025-07-30 10:56:30.624179,2025-07-30 10:56:30.429868,2025-08-18 18:02:35.564690,NaT,...,,,,74,2025-08-18,0,53,53.0,164,2748


In [15]:
df_deals[df_deals.partner_id == 85]

Unnamed: 0,applicants_applications_count,content_types,deal_id,main_image,min_social_media_followers,deal_tags,live_since,created_at,updated_at,deleted_at,...,tags,gender,featured_image,company_id,test_nr_of_apps,update_at_date,diff,apps_after_7_days,apps_after_7_days_OLD,token_count
0,39,"[{'id': 41, 'name': 'Activities', 'slug': 'per...",0196edc7-e6a8-00c8-6ff5-ea60fa3f2895,uploads/deals/0196edc7-5969-ffff-6700-7ae699b8...,2500,,2025-05-20 15:21:28.249567,2025-05-20 13:00:23.080212,2025-10-28 10:55:34.719187,NaT,...,,,,0199f1d4-b501-015e-5c45-1bc8300b3949,39,2025-10-28,0,39,39.0,107
12,0,"[{'id': 41, 'name': 'Activities', 'slug': 'per...",01985615-068a-00c8-502e-689ad4f46f4f,uploads/deals/01985615-0778-ffff-5f39-475ebacc...,1500,,NaT,2025-07-29 12:07:55.274571,2025-07-29 12:08:05.638372,NaT,...,,,,,0,2025-07-29,0,0,,3
52,0,"[{'id': 39, 'name': 'Experiences', 'slug': 'Te...",019689e4-3a57-00c8-7868-45557942d2d8,uploads/deals/019689e4-3af8-ffff-5d5b-d3e82ea9...,2500,,NaT,2023-10-17 16:28:45.927833,2025-05-01 03:29:18.311782,2024-05-31 08:26:35.053459+00:00,...,,,,,0,2025-05-01,0,0,,32
130,27,"[{'id': 41, 'name': 'Activities', 'slug': 'per...",0197f38c-7b86-00c8-5f68-b04a6261f5fb,uploads/deals/0197f38e-0f5c-ffff-d415-284934d8...,2500,,2025-07-10 08:55:59.711606,2025-07-10 08:55:59.622417,2025-08-04 13:36:29.925546,NaT,...,,,,,27,2025-08-04,0,24,24.0,133
192,5,"[{'id': 41, 'name': 'Activities', 'slug': 'per...",0197110f-b551-00c8-bb33-cc3e6cb9e35d,uploads/deals/0197110e-b95f-ffff-d561-18611b4a...,2500,,2025-05-27 09:25:31.782749,2025-05-27 09:25:31.601943,2025-10-28 10:49:53.144061,NaT,...,,,,0199f1a1-0907-015e-1fc9-9934663110e1,5,2025-10-28,0,5,5.0,144
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4325,43,"[{'id': 41, 'name': 'Activities', 'slug': 'per...",01987abe-ce4d-00c8-ce82-dafcfa794b38,uploads/deals/01987abe-cec7-ffff-5cef-0d4e8b68...,2500,,2025-08-05 14:59:55.828209,2025-08-05 14:59:41.773835,2025-10-28 10:55:37.013826,NaT,...,,,,0199f1d4-b501-015e-5c45-1bc8300b3949,43,2025-10-28,0,21,18.0,42
4345,109,"[{'id': 41, 'name': 'Activities', 'slug': 'per...",01982c75-c255-00c8-ea44-eae4c5f1b2c7,uploads/deals/01982c6f-e76d-ffff-8407-b576b39a...,2500,,2025-07-21 10:09:31.872188,2025-07-21 10:09:31.733940,2025-07-21 10:09:31.872261,NaT,...,,,,,109,2025-07-21,0,71,49.0,99
4412,6,"[{'id': 41, 'name': 'Activities', 'slug': 'per...",0198d0d3-612e-00c8-7f13-b352e4d24f27,uploads/deals/0198d0d3-61a2-ffff-f642-96a6c3c1...,2500,,2025-08-22 08:10:00.315465,2025-08-22 08:09:30.670778,2025-10-28 10:50:27.950474,NaT,...,,,,0199f1a5-e7d8-015e-73f9-509bb6bef028,6,2025-10-28,0,5,4.0,43
4674,18,"[{'id': 41, 'name': 'Activities', 'slug': 'per...",0197f874-bd80-00c8-d3c1-eb615d28429b,uploads/deals/0197f866-4cd4-ffff-919e-e4991862...,2500,,2025-07-11 07:48:09.918566,2025-07-11 07:48:09.728658,2025-10-28 11:03:58.435879,NaT,...,,,,0199f189-390d-015e-a393-817d2f4f3640,18,2025-10-28,0,16,14.0,131


# Comparison