# Modeling GNNePCSAFT


## Starting point


In [None]:
import os, shutil

from kaggle_secrets import UserSecretsClient

user_secrets = UserSecretsClient()
secret_value_0 = user_secrets.get_secret("email")
secret_value_1 = user_secrets.get_secret("gcp_key")
secret_value_2 = user_secrets.get_secret("ghtk")
secret_value_3 = user_secrets.get_secret("kaggle")
secret_value_4 = user_secrets.get_secret("username")
secret_value_5 = user_secrets.get_secret("wandbkey")


os.environ["KAGGLE_USERNAME"] = secret_value_4

os.environ["KAGGLE_KEY"] = secret_value_3

os.environ["WANDB_API_KEY"] = secret_value_5

os.environ["USER_EMAIL"] = secret_value_0

os.environ["GHTK"] = secret_value_2

os.environ["GCP_KEY"] = secret_value_1

os.environ["PATH"] = "/root/.local/bin:" + os.environ["PATH"]

In [None]:
%%bash

python -m pip install --upgrade pip ml_collections -q



# pip install pipenv --user -q


In [None]:
%%bash

# for the cloud 

rm -rf .* *

git clone --branch dev --single-branch -q https://$GHTK@github.com/wildsonbbl/gnnepcsaft.git .

In [None]:
%%capture req_output

%%bash

pip install -r requirements.txt

echo "*********  Finished pip installing requirements.txt  *********"

# pip install torch-scatter

echo "*********  Finished pip installing torch-scatter  *********"



# install pcsaft

curl -O -L https://gitlab.com/libeigen/eigen/-/archive/master/eigen-master.zip
curl -O -L https://github.com/zmeri/PC-SAFT/archive/refs/tags/v1.5.0.zip
unzip -q eigen-master.zip
unzip -q v1.5.0.zip
sed -i "s/np.float_/np.float64/g" PC-SAFT-1.5.0/pcsaft.pyx 
cp -rf eigen-master/. PC-SAFT-1.5.0/externals/eigen
pip install ./PC-SAFT-1.5.0

# install pcsaft

echo "*********  Finished pip installing pcsaft  *********"


In [None]:
!echo '{req_output}' > log.txt

In [None]:
%%bash

echo $GCP_KEY > /kaggle/gcp_key.json
dvc remote modify --local storage credentialpath '/kaggle/gcp_key.json'
dvc pull

In [None]:
%%bash

pip show pcsaft | grep -E "Version|Name"



lscpu | grep -E "Model name|Core|core" 



nvidia-smi

## Training


In [None]:
from gnnepcsaft.configs.default import get_config
import os

config = get_config()

model_name = config.model_name

checkpoint = config.checkpoint

model_suffix = str(config.num_train_steps / 1e6).replace(".", "_")

os.environ["MODEL_NAME"] = model_name

os.environ["MODEL_SUFFIX"] = model_suffix

In [None]:
# if checkpoint:

#     shutil.copy(f"gnnepcsaft/train/checkpoints/{checkpoint}", "gnnepcsaft/train/checkpoints/last_checkpoint.pth")

In [None]:
%%bash

python -m gnnepcsaft.train.train \
--workdir=./gnnepcsaft --config=gnnepcsaft/configs/default.py \
--framework="lightning"  \
--tags="parallel" --num_cpu=1.0 --num_cpu_trainer=0.33 --num_gpus=0.33 --num_workers=3 --verbose=2 \
--config.accelerator=gpu


### Updating kaggle dataset


In [None]:
%%bash

# !kaggle datasets metadata -p /kaggle/working/gnnepcsaft/train/checkpoints wildsonbblima/gnnepcsaft

# shutil.copy("gnnepcsaft/train/checkpoints/last_checkpoint.pth", f"gnnepcsaft/train/checkpoints/{model_name}-{model_suffix}e6.pth")

# os.remove("gnnepcsaft/train/checkpoints/last_checkpoint.pth")

# !kaggle datasets version -p /kaggle/working/gnnepcsaft/train/checkpoints -m "{model_name} at {model_suffix}e6 steps"



echo "*****  Push to data repo  *****"

dvc add gnnepcsaft/train/checkpoints

dvc push



git config --global user.email $USER_EMAIL

git config --global user.name $KAGGLE_USERNAME



echo "*****  Push to code repo  *****"

git pull

git commit gnnepcsaft/train/checkpoints.dvc -m "$MODEL_NAME at $MODEL_SUFFIX e6 steps"

git push



echo "*****  Finished  *****"

### Clear workspace


In [None]:
!rm -rf * .*