In [125]:
import os
import torch
import gc
import logging

gc.collect()
if torch.cuda.is_available():
    torch.cuda.empty_cache()        
    torch.cuda.ipc_collect()


try:
    %run setup_paths
except:
    %run notebooks/setup_paths
    

logging.basicConfig(
    level=logging.INFO,  # or DEBUG, WARNING, etc.
    format='%(asctime)s - %(levelname)s - %(message)s',
    stream=sys.stdout
)

logging.info(f"current dir: {os.getcwd()}")

2025-08-22 08:51:35,959 - INFO - current dir: c:\Projects\scmsim


In [126]:
%run src/utils
%run src/loaders


In [127]:
## Configuration and User-Item data selection

In [128]:
cfg = read_cfg('configs/config.yaml')
paths = PathProvider(cfg['paths']['results'], cfg['paths']['products'])
uidata = MovieLensData(get_uidata_loader(cfg, 'ml-1m'))


## Create IPW Estimates for a single model

- A few variants of IPW variants on top of MF30 model
- The results would results/MovieLens1M/estimations/MoviesCausalGPT/MF30.IPW
- The model location is results/MovieLens1M/models/MF30.0 (version 0)

In [129]:
%run src/estimators
%run src/pipeline

model_name = 'MF30'
model = load_model(paths, uidata.name(), model_name)
ipw_params = [IPWParams(0, False), IPWParams(0, True), IPWParams(0.2, True), IPWParams(0, False,False), IPWParams(0, True,False)]
ipwe = MFIPWEstimator(model_name, model, ipw_params)    
create_estimations(paths, uidata, f'{model_name}.IPW', 'MoviesCausalGPT', ipwe, reset_ids=True)


2025-08-22 08:51:36,627 - INFO - loading model MF30 (version 0) at results\MovieLens1M\models\MF30.0\weights.pt
2025-08-22 08:51:36,758 - INFO - loading info
2025-08-22 08:51:36,807 - INFO - processing: 0:8384 / 8384


  return torch.load(model_path)


2025-08-22 08:51:36,876 - INFO - loading ratings
2025-08-22 08:51:45,217 - INFO - saving estimations at results\MovieLens1M\estimations\MoviesCausalGPT\MF30.IPW


Unnamed: 0,MF30.IPW.t,MF30.IPW.s.t,MF30.IPW.clp0.2.s.t,MF30.IPW,MF30.IPW.s,treatment_idx,resp_idx
0,0.015020,0.042058,0.008546,0.028184,0.055672,1254,1515
1,-0.021982,-0.019971,0.017242,0.018783,0.028116,166,1515
2,-0.221694,-0.095694,-0.059571,-0.050651,0.054039,1254,648
3,-1.152552,-0.448462,-0.120088,0.011353,0.011939,1197,648
4,-0.438400,-0.293443,-0.174653,-0.012410,-0.045047,1193,648
...,...,...,...,...,...,...,...
8379,-0.043582,-0.026719,-0.004838,-0.018208,0.029421,107,1395
8380,-0.045813,-0.034192,-0.001815,-0.028392,0.007306,76,1395
8381,-0.187699,-0.151900,-0.043671,-0.018897,-0.001099,736,1125
8382,-0.399466,-0.267101,-0.124562,-0.016525,0.188066,1480,527


- Lets do the same for additional models

In [130]:
%run src/estimators
%run src/pipeline

for model_name in ['MF30','MF40', 'MF50', 'MF60']:
    model = load_model(paths, uidata.name(), model_name)
    ipwe = MFIPWEstimator(model_name, model, ipw_params)    
    ## reset_ids mean we re-match the item-ids from the title name 
    create_estimations(paths, uidata, f'{model_name}.IPW', 'MoviesCausalGPT', ipwe, reset_ids=True)    
    create_estimations(paths, uidata, f'{model_name}.CosSim' , 'MoviesCausalGPT', CosineSimilarityEstimator(model_name, model), reset_ids=True)



2025-08-22 08:51:45,339 - INFO - loading model MF30 (version 0) at results\MovieLens1M\models\MF30.0\weights.pt
2025-08-22 08:51:45,394 - INFO - processing: 0:8384 / 8384


  return torch.load(model_path)


2025-08-22 08:51:48,234 - INFO - saving estimations at results\MovieLens1M\estimations\MoviesCausalGPT\MF30.IPW
2025-08-22 08:51:48,374 - INFO - processing: 0:8384 / 8384
2025-08-22 08:51:48,379 - INFO - saving estimations at results\MovieLens1M\estimations\MoviesCausalGPT\MF30.CosSim
2025-08-22 08:51:48,412 - INFO - loading model MF40 (version 0) at results\MovieLens1M\models\MF40.0\weights.pt
2025-08-22 08:51:48,595 - INFO - processing: 0:8384 / 8384


  return torch.load(model_path)


2025-08-22 08:51:51,458 - INFO - saving estimations at results\MovieLens1M\estimations\MoviesCausalGPT\MF40.IPW
2025-08-22 08:51:51,580 - INFO - processing: 0:8384 / 8384
2025-08-22 08:51:51,835 - INFO - saving estimations at results\MovieLens1M\estimations\MoviesCausalGPT\MF40.CosSim
2025-08-22 08:51:51,861 - INFO - loading model MF50 (version 0) at results\MovieLens1M\models\MF50.0\weights.pt
2025-08-22 08:51:51,924 - INFO - processing: 0:8384 / 8384


  return torch.load(model_path)


2025-08-22 08:51:54,563 - INFO - saving estimations at results\MovieLens1M\estimations\MoviesCausalGPT\MF50.IPW
2025-08-22 08:51:54,681 - INFO - processing: 0:8384 / 8384
2025-08-22 08:51:54,900 - INFO - saving estimations at results\MovieLens1M\estimations\MoviesCausalGPT\MF50.CosSim
2025-08-22 08:51:54,926 - INFO - loading model MF60 (version 0) at results\MovieLens1M\models\MF60.0\weights.pt
2025-08-22 08:51:54,987 - INFO - processing: 0:8384 / 8384


  return torch.load(model_path)


2025-08-22 08:51:57,653 - INFO - saving estimations at results\MovieLens1M\estimations\MoviesCausalGPT\MF60.IPW
2025-08-22 08:51:57,789 - INFO - processing: 0:8384 / 8384
2025-08-22 08:51:57,815 - INFO - saving estimations at results\MovieLens1M\estimations\MoviesCausalGPT\MF60.CosSim


## Basic Estimations


In [131]:
basee = BasicEstimator()
create_estimations(paths, uidata, 'Basic' , 'MoviesCausalGPT', basee, reset_ids=True)


2025-08-22 08:51:57,920 - INFO - processing: 0:8384 / 8384
2025-08-22 08:51:59,201 - INFO - saving estimations at results\MovieLens1M\estimations\MoviesCausalGPT\Basic


Unnamed: 0,CORR,LIFT,SATE,treatment_idx,resp_idx
0,-0.065683,2.136280,0.036812,1254,1515
1,0.215599,7.658727,0.201577,166,1515
2,-0.079580,1.493124,0.134777,1254,648
3,0.334042,1.549424,0.225409,1197,648
4,0.151793,1.238234,0.084307,1193,648
...,...,...,...,...,...
8379,-0.008285,2.559322,0.079530,107,1395
8380,0.004858,2.760617,0.088601,76,1395
8381,-0.081011,2.190710,0.092987,736,1125
8382,-0.106751,1.661950,0.264092,1480,527


## Show all pair estimations

In [132]:
%run src/utils
load_all_estimations(paths, 'MovieLens1M', 'MoviesCausalGPT')

Unnamed: 0,CORR,LIFT,SATE,treatment_idx,resp_idx,MF30.CosSim,MF30.IPW.t,MF30.IPW.s.t,MF30.IPW.clp0.2.s.t,MF30.IPW,...,MF50.IPW.s.t,MF50.IPW.clp0.2.s.t,MF50.IPW,MF50.IPW.s,MF60.CosSim,MF60.IPW.t,MF60.IPW.s.t,MF60.IPW.clp0.2.s.t,MF60.IPW,MF60.IPW.s
0,-0.065683,2.136280,0.036812,1254,1515,-0.169277,0.015020,0.042058,0.008546,0.028184,...,0.111575,0.009561,0.057367,0.126374,-0.160851,0.036162,0.122169,0.010515,0.045310,0.132845
1,0.215599,7.658727,0.201577,166,1515,-0.040867,-0.021982,-0.019971,0.017242,0.018783,...,-0.018613,0.007725,-0.011006,0.028502,0.085878,-0.029931,-0.019434,0.006967,-0.013626,0.042138
2,-0.079580,1.493124,0.134777,1254,648,0.088290,-0.221694,-0.095694,-0.059571,-0.050651,...,-0.110356,-0.060794,-0.072534,0.063354,0.091324,-0.254248,-0.120403,-0.051876,-0.109737,0.058494
3,0.334043,1.549424,0.225409,1197,648,0.064547,-1.152552,-0.448462,-0.120088,0.011353,...,-0.561666,-0.112525,0.019081,0.061206,0.092403,-1.781494,-0.504733,-0.107773,0.010673,0.061051
4,0.151793,1.238234,0.084307,1193,648,-0.237523,-0.438400,-0.293443,-0.174653,-0.012410,...,-0.254083,-0.148387,0.018515,-0.005871,-0.025139,-0.464211,-0.294169,-0.171591,-0.046326,-0.038307
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9909,-0.008285,2.559322,0.079530,107,1395,-0.066038,-0.043582,-0.026719,-0.004838,-0.018208,...,0.009851,0.003271,-0.016750,0.059263,0.032057,-0.033582,0.023486,-0.003230,-0.016479,0.058875
9910,0.004858,2.760617,0.088601,76,1395,0.036705,-0.045813,-0.034192,-0.001815,-0.028392,...,-0.038567,-0.001003,-0.040241,-0.010641,-0.001178,-0.043730,-0.031256,0.001982,-0.034364,-0.011039
9911,-0.081011,2.190710,0.092986,736,1125,0.020021,-0.187699,-0.151900,-0.043671,-0.018897,...,-0.161794,-0.034604,-0.010390,0.027006,-0.061266,-0.284668,-0.206326,-0.035761,-0.012639,0.028029
9912,-0.106751,1.661950,0.264092,1480,527,0.175378,-0.399466,-0.267101,-0.124562,-0.016525,...,-0.209164,-0.121491,-0.047368,0.203732,-0.005993,-0.344995,-0.125904,-0.128250,-0.032739,0.250724


## Method Comparison

In [133]:

%run src/pipeline
get_causal_gpt_scores(paths, uidata)

Unnamed: 0,name,corr,corr_pos,zero_mse,spearman
0,CORR,0.101491,0.222163,0.035263,0.184856
1,LIFT,0.138465,0.224162,14.707763,
2,SATE,0.060729,0.230424,0.067099,
3,MF30.CosSim,0.224895,0.359665,0.055984,0.246913
4,MF30.IPW.t,-0.027104,-0.125568,0.083061,0.059718
5,MF30.IPW.s.t,-0.018621,-0.110938,0.015593,
6,MF30.IPW.clp0.2.s.t,0.029697,0.114183,0.007732,
7,MF30.IPW,0.026399,0.093257,0.002158,-0.01871
8,MF30.IPW.s,0.010117,0.161001,0.008588,
9,MF40.CosSim,0.183258,0.375879,0.042438,0.169194


In [134]:
group_path = paths.get_product_csv('MoviesCausalGPT')
pdf = pd.read_csv(group_path)
pdf = enrich_cause_indexes(pdf, uidata.info)


In [135]:
counts = pdf['causal_effect'].value_counts() 
pd.DataFrame(counts / counts.sum()).sort_values(by="causal_effect")

Unnamed: 0_level_0,count
causal_effect,Unnamed: 1_level_1
-10,0.000239
-9,0.000835
-8,0.00167
-7,0.001908
-6,0.00167
-5,0.002266
-4,0.003101
-3,0.003578
-2,0.007872
-1,0.005487
