In [1]:
from dotenv import dotenv_values
import os

envs = ["secret.env", "predict.env"]

for fenv in envs:
    file = os.path.join("env", fenv)
    config = dotenv_values(file)  # load sensitive variables
    print(config.keys())
    for c, v in config.items():
        os.environ[c] = v

odict_keys(['WANDB_API_KEY'])
odict_keys(['WANDB_NAME', 'WANDB_NOTES', 'WANDB_NOTEBOOK_NAME'])


In [9]:
import torch
from src.data import HousePricingDataModule

import os
import wandb
import torch
from lightning import Trainer
from pytorch_lightning.loggers import WandbLogger
from lightning.pytorch.callbacks import ModelCheckpoint
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from src.model import NeuralNetwork
from src.data import HousePricingDataModule
from src.util import fake_name


# Preparing data to be used
data_module = HousePricingDataModule()
data_module.prepare_data()

# Requesting to data module the number of different features that are in the data
in_features = data_module.in_features()

best_model = f"Nicole_YIL69KUL-17rqq9jc"
project_name = os.environ["WANDB_NAME"]

accelerator = "gpu" if torch.cuda.is_available() else "cpu"

# Setting up the training configuration
config = {
    "accelerator": accelerator,
    "used_model": best_model,
    "in_features": in_features
}


run = wandb.init(
    job_type="training",
    name=best_model,
    project=project_name,
    config=config,
)


artifact = run.use_artifact(f"deepsat/House Pricing/{best_model}:latest", type='model')
artifact_dir = artifact.download()

# Defines the training instance
trainer = Trainer(
    accelerator=wandb.config["accelerator"],
)

# Defining the model to be training
model = NeuralNetwork.load_from_checkpoint(
    checkpoint_path=os.path.join(artifact_dir, "model.ckpt"),
    input_size=wandb.config["in_features"]
)

[INFO]: Preprocessing training dataframe...
[INFO]: Dropping columns with full of NA or Identifiers. Current dataframe shape: (1460, 81)
[INFO]: Dropped columns with full of NA or Identifiers. Current dataframe shape: (1460, 74)
[INFO]: Dropping categories types with few ocurrence. Current dataframe shape: (1460, 74)
[INFO]: Few ocurrences removed. Current dataframe shape: (1443, 74)
[INFO]: Dropping columns which contains just one type of category. Current dataframe shape: (1443, 74)
[INFO]: Columns with just one type of categroy dropped. Current dataframe shape: (1443, 73)
[INFO]: Preprocessing predict dataframe...
[INFO]: Dropping columns with full of NA or Identifiers. Current dataframe shape: (1459, 80)
[INFO]: Dropped columns with full of NA or Identifiers. Current dataframe shape: (1459, 73)


[34m[1mwandb[0m:   1 of 1 files downloaded.  
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


[INFO]: Input size: 244


In [12]:
model

NeuralNetwork(
  (net): Sequential(
    (0): Linear(in_features=244, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=128, bias=True)
    (3): ReLU()
    (4): Linear(in_features=128, out_features=128, bias=True)
    (5): ReLU()
    (6): Linear(in_features=128, out_features=64, bias=True)
    (7): ReLU()
    (8): Linear(in_features=64, out_features=1, bias=True)
  )
)

In [14]:
preds = trainer.predict(model, datamodule=data_module)

[INFO]: Preprocessing training dataframe...
[INFO]: Dropping columns with full of NA or Identifiers. Current dataframe shape: (1460, 81)
[INFO]: Dropped columns with full of NA or Identifiers. Current dataframe shape: (1460, 74)
[INFO]: Dropping categories types with few ocurrence. Current dataframe shape: (1460, 74)
[INFO]: Few ocurrences removed. Current dataframe shape: (1443, 74)
[INFO]: Dropping columns which contains just one type of category. Current dataframe shape: (1443, 74)
[INFO]: Columns with just one type of categroy dropped. Current dataframe shape: (1443, 73)
[INFO]: Preprocessing predict dataframe...
[INFO]: Dropping columns with full of NA or Identifiers. Current dataframe shape: (1459, 80)
[INFO]: Dropped columns with full of NA or Identifiers. Current dataframe shape: (1459, 73)


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
/root/miniconda3/envs/pricehousing/lib/python3.9/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


[INFO]: Setting up predict dataset/s
[INFO]: Predict dataloader size: 12
Predicting DataLoader 0: 100%|██████████| 12/12 [00:00<00:00, 142.35it/s]


In [15]:
preds

[tensor([[117800.6484],
         [143995.1719],
         [185659.3750],
         [196890.5938],
         [184152.2344],
         [174317.0469],
         [167866.8281],
         [166938.2500],
         [198901.4219],
         [132654.9688],
         [149630.6250],
         [106291.5078],
         [102372.6016],
         [138025.0000],
         [101455.5078],
         [378144.6562],
         [234458.0781],
         [309181.5000],
         [329270.2188],
         [492475.3750],
         [318222.5312],
         [206395.5469],
         [174274.1406],
         [182183.4531],
         [188698.3594],
         [198766.8906],
         [366488.9062],
         [215800.1562],
         [203336.1250],
         [262104.7031],
         [202339.5156],
         [ 90801.7578],
         [201140.4062],
         [336541.1875],
         [293990.8438],
         [250998.1406],
         [172684.5469],
         [196927.5156],
         [183139.7812],
         [179069.4375],
         [204432.0781],
         [156934

In [16]:
wandb.finish()