# Modeling GNNePCSAFT

## Starting point

In [None]:
import os, shutil
from kaggle_secrets import UserSecretsClient

user_secrets = UserSecretsClient()
secret_value_0 = user_secrets.get_secret("awskeyid")
secret_value_1 = user_secrets.get_secret("awssecretkey")
secret_value_2 = user_secrets.get_secret("ghtk")
secret_value_3 = user_secrets.get_secret("kaggle")
secret_value_4 = user_secrets.get_secret("username")
secret_value_5 = user_secrets.get_secret("wandbkey")
secret_value_6 = user_secrets.get_secret("email")

os.environ["KAGGLE_USERNAME"] = secret_value_4
os.environ["KAGGLE_KEY"] = secret_value_3
os.environ["WANDB_API_KEY"] = secret_value_5
os.environ["AWS_ACCESS_KEY_ID"] = secret_value_0
os.environ["AWS_SECRET_ACCESS_KEY"] = secret_value_1
os.environ['PATH'] = "/root/.local/bin:" + os.environ['PATH']

In [None]:
%%bash
python -m pip install --upgrade pip -q

pip install pipenv --user -q


In [None]:
%%bash
# for the cloud 
rm -rf .* *
git clone --branch dev --single-branch -q https://{secret_value_2}@github.com/wildsonbbl/gnnepcsaft.git .

In [None]:
%%capture --no-stdout req_output
%%bash
pipenv install wheel ml_collections -q
pipenv install -r requirements.txt -q
echo "Finished pip installing requirements.txt"

In [None]:
%%capture --no-stdout pcsaft_out
%%bash

# install pcsaft
curl -O -L https://gitlab.com/libeigen/eigen/-/archive/master/eigen-master.zip
curl -O -L https://github.com/zmeri/PC-SAFT/archive/refs/tags/v1.4.1.zip
unzip -q eigen-master.zip
unzip -q v1.4.1.zip
cp -rf eigen-master/. PC-SAFT-1.4.1/externals/eigen
pipenv install ./PC-SAFT-1.4.1 -q
# install pcsaft
echo "Finished pip installing pcsaft"

In [None]:
%%bash

echo AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID > .env
echo AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY >>.env
echo WANDB_API_KEY=$WANDB_API_KEY >> .env

In [None]:
!pipenv run dvc pull -f -q

In [None]:
%%bash
pipenv run pip show pcsaft | grep -E "Version|Name"

lscpu | grep -E "Model name|Core|core" 

nvidia-smi

## Training

In [None]:
from gnnepcsaft.configs.default import get_config
config = get_config()
dataset = config.dataset
model_name = config.model_name
checkpoint = config.checkpoint
model_suffix = str(config.num_train_steps / 1e6).replace(".", "_")
os.environ['dataset']=dataset

In [None]:
# if checkpoint:
#     shutil.copy(f"gnnepcsaft/train/checkpoints/{checkpoint}", "gnnepcsaft/train/checkpoints/last_checkpoint.pth")

In [None]:
%%bash
pipenv run python -m gnnepcsaft.train.train --workdir=./gnnepcsaft --config=gnnepcsaft/configs/default.py --dataset=$dataset \
--lightning=True --config.model=PNAL

### Updating kaggle dataset

In [None]:
%%bash
# !kaggle datasets metadata -p /kaggle/working/gnnepcsaft/train/checkpoints wildsonbblima/gnnepcsaft
# shutil.copy("gnnepcsaft/train/checkpoints/last_checkpoint.pth", f"gnnepcsaft/train/checkpoints/{model_name}-{model_suffix}e6.pth")
# os.remove("gnnepcsaft/train/checkpoints/last_checkpoint.pth")
# !kaggle datasets version -p /kaggle/working/gnnepcsaft/train/checkpoints -m "{model_name} at {model_suffix}e6 steps"

echo "*****Push to data repo*****"
pipenv run dvc add gnnepcsaft/train/checkpoints
pipenv run dvc push

git config --global user.email {secret_value_6}
git config --global user.name {secret_value_4}

echo "*****Push to code repo*****"
git pull
git commit gnnepcsaft/train/checkpoints.dvc -m "{model_name} at {model_suffix}e6 steps"
git push

echo "*****Finished*****"

### Clear workspace

In [None]:
!rm -rf * .*