In [1]:
import os
import torch
import gc
import logging

gc.collect()
if torch.cuda.is_available():
    torch.cuda.empty_cache()        
    torch.cuda.ipc_collect()


try:
    %run setup_paths
except:
    %run notebooks/setup_paths
    

logging.basicConfig(
    level=logging.INFO,  # or DEBUG, WARNING, etc.
    format='%(asctime)s - %(levelname)s - %(message)s',
    stream=sys.stdout
)

logging.info(f"current dir: {os.getcwd()}")

2025-08-18 02:44:04,141 - INFO - current dir: c:\Projects\scmsim


# General

uidata -> user-item-data

Data, Models, etc... are accessed by name. Models per data-name


# First Train Model

In [3]:
%run src/datasets
%run src/models
%run src/estimators
%run src/loaders


In [4]:

paths = PathProvider('results','products')
uidata = MovieLensData(MovieLens100KLoader('ml-100k'))

REFIT = True
model_name = 'MF40'

if REFIT or not model_exists(paths, uidata.name(), model_name):
    train_params = MFTrainParams(
        lr = 1e-2,
        gamma = 0.95,
        batch_size = 2**12,
        n_epochs  = 0, #5,
        shuffle = True)
        
    mft = MatrixFactorizationTrain(paths, 'MF40', n_factors=40, train_params=train_params)
    mft.fit(uidata)
                         

2025-08-18 02:44:27,341 - INFO - loading ratings
2025-08-18 02:44:27,525 - INFO - fitting model MF40; num_items=1682; num_users=943; params=MFTrainParams(lr=0.01, gamma=0.95, batch_size=4096, n_epochs=0, shuffle=True); device=cuda
2025-08-18 02:44:29,448 - INFO - saving model at: results\MovieLens100K\models\MF40.0


In [5]:
%run src/pipeline
%run src/models
%run src/utils


## Create Estimations

Estimations are created for a list (DataFrame) of item pairs treatment->response

Then they are saved under method-name

In [8]:

uidata = MovieLensData(MovieLens100KLoader('ml-100k'))
model_name = 'MF40'
group_name = 'MoviesCausalGPT'
method_name = f'{model_name}.IPW'

model = load_model(paths, uidata.name(), model_name)

ipw_params = [IPWParams(), IPWParams(0, True), IPWParams(0.2, True)]
ipwe = MFIPWEstimator(model_name, model, ipw_params)

create_estimations(paths, uidata, method_name, group_name, ipwe, reset_ids=True)


2025-08-18 02:44:44,022 - INFO - loading info
2025-08-18 02:44:44,080 - INFO - processing: 0:4096 / 9442
2025-08-18 02:44:44,115 - INFO - loading ratings


  return torch.load(dir_path / paths.model_filename())


2025-08-18 02:44:44,361 - INFO - processing: 4096:8192 / 9442
2025-08-18 02:44:44,582 - INFO - processing: 8192:9442 / 9442


Unnamed: 0,MF40.IPW,MF40.IPW.s,MF40.IPW.clp0.2.s,treatment_idx,resp_idx
0,-0.227996,-0.113840,-0.113840,519,678
1,-0.232238,-0.115922,-0.115922,34,678
2,-0.332980,-0.166259,-0.166259,519,405
3,0.110286,0.055004,0.055004,181,405
4,-0.224814,-0.112417,-0.112417,173,405
...,...,...,...,...,...
9437,-0.119830,-0.059934,-0.059934,756,242
9438,-0.040297,-0.020139,-0.020139,21,844
9439,-0.029692,-0.014844,-0.014844,118,163
9440,-0.296925,-0.148305,-0.148305,244,318


## Basic Estimations

In [9]:
%run src/pipeline
%run src/models
%run src/utils


basee = BasicEstimator()
create_estimations(paths, uidata, 'Basic' , group_name, basee, reset_ids=True)


2025-08-18 02:44:46,296 - INFO - processing: 0:4096 / 9442
2025-08-18 02:44:46,475 - INFO - processing: 4096:8192 / 9442
2025-08-18 02:44:46,587 - INFO - processing: 8192:9442 / 9442


Unnamed: 0,CORR,LIFT,SATE,treatment_idx,resp_idx
0,-0.066988,0.915011,-0.021567,519,678
1,-0.105345,1.230268,0.053877,34,678
2,-0.135126,1.233576,0.093105,519,405
3,0.502863,1.481480,0.379883,181,405
4,0.348364,1.489090,0.271804,173,405
...,...,...,...,...,...
9437,-0.077004,0.566707,-0.062203,756,242
9438,0.075903,2.940193,0.094864,21,844
9439,-0.036143,1.574232,0.081276,118,163
9440,-0.224930,1.120735,0.040200,244,318


# Show all estimations

In [10]:
%run src/utils
#dir(cfg)
load_all_estimations(paths, 'MovieLens100K', 'MoviesCausalGPT')

Unnamed: 0,CORR,LIFT,SATE,treatment_idx,resp_idx,MF40.IPW,MF40.IPW.s,MF40.IPW.clp0.2.s
0,-0.066988,0.915011,-0.021567,519,678,-0.227996,-0.113840,-0.113840
1,-0.105345,1.230268,0.053877,34,678,-0.232238,-0.115922,-0.115922
2,-0.135126,1.233576,0.093105,519,405,-0.332980,-0.166259,-0.166259
3,0.502863,1.481480,0.379883,181,405,0.110286,0.055004,0.055004
4,0.348364,1.489090,0.271804,173,405,-0.224814,-0.112417,-0.112417
...,...,...,...,...,...,...,...,...
9449,-0.077004,0.566707,-0.062203,756,242,-0.119830,-0.059934,-0.059934
9450,0.075903,2.940193,0.094864,21,844,-0.040297,-0.020139,-0.020139
9451,-0.036143,1.574232,0.081276,118,163,-0.029692,-0.014844,-0.014844
9452,-0.224930,1.120735,0.040200,244,318,-0.296925,-0.148305,-0.148305


## Comparison (metrics) with MoviesCausalGPT

In [11]:
%run src/pipeline
%run src/models
%run src/utils


In [12]:
%run src/pipeline
get_causal_gpt_scores(paths, uidata)

Unnamed: 0,name,corr,corr_pos,zero_mse
0,CORR,0.076206,0.176908,0.047855
1,LIFT,0.037209,0.07061,27.693341
2,SATE,0.095675,0.141985,0.032063
3,MF40.IPW,-0.01921,0.0324,0.023926
4,MF40.IPW.s,-0.019204,0.032402,0.005969
5,MF40.IPW.clp0.2.s,-0.019204,0.032402,0.005969


# Leftovers

In [95]:
print(model.Q.shape)
aaa = torch.rand(2,4) * 10
aaa.norm(dim=1, keepdim=True)
tidx = torch.arange(10)
ridx = torch.arange(10) + 20

Qt = model.Q[tidx] 

Qr = model.Q[ridx] 

cosim = (Qt * Qr).sum(dim=1) / (Qt.norm(dim=1) * Qr.norm(dim=1))
#dp = (Qt / Qt.norm(dim=1, keepdim=True)) * (Qr / Qr.norm(dim=1, keepdim=True))
#dp.sum(dim=1)

torch.Size([1682, 40])


tensor([ 0.0091, -0.1013, -0.1563,  0.0016,  0.2534, -0.2155, -0.0383,  0.0072,
        -0.1072, -0.3055], grad_fn=<DivBackward0>)

In [91]:
watch = uidata.get_watch_matrix()
timestamp = uidata.get_watch_matrix(timestamps=True)


In [115]:
Wr = watch[:,ridx] * 1.0
Wt = watch[:,tidx] * 1.0
Tr = timestamp[:,ridx]
Tt = timestamp[:,tidx]

with_time = False
if with_time:
    treatment_mask = (Wr > 0.5) & ( (Wr < 0.5) | (Tt < Tr))
else:
    treatment_mask = (Wt > 0.5)
    
control_mask = ~treatment_mask
(Wr * treatment_mask).sum(dim=0) / treatment_mask.sum(dim=0) - (Wr * control_mask).sum(dim=0) / control_mask.sum(dim=0)


tensor([0.1349, 0.5030, 0.1920, 0.2670, 0.2467, 0.2368, 0.1017, 0.4455, 0.0385,
        0.1304])

In [116]:
Wr

tensor([[1., 1., 1.,  ..., 1., 1., 1.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 1., 1.,  ..., 1., 0., 0.]])

In [117]:
## correlation
Mt = (Wt*1.0).mean(dim=1, keepdim=True)
Mr = (Wr*1.0).mean(dim=1, keepdim=True)

((Wt - Mt)*(Wr - Mr)).sum(dim=0) / torch.sqrt(((Wt-Mt)**2).sum(dim=0) * ((Wr-Mr)**2).sum(dim=0))

tensor([-0.1303, -0.0147, -0.0558, -0.0743, -0.2495,  0.3940, -0.2405,  0.2008,
        -0.1920,  0.4156])

In [119]:
(Wr * Wt).mean(dim=0) / (Wr.mean(dim=0) * Wt.mean(dim=0))

tensor([1.7882, 2.3753, 1.8998, 2.1263, 1.7215, 3.9747, 1.9836, 2.1686, 1.2173,
        4.0091])

In [112]:
#estaa