In [251]:
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import cv2
import torch
from torchvision.transforms import transforms
import numpy as np
import torchvision
import pandas as pd
from sklearn import preprocessing
class DonedealAttributeDataset(Dataset):
    def __init__(self, filename, \
        transform=transforms.Compose([transforms.ToTensor()]), label_mapping={}):
        self.transform = transform
        self.label_mapping = label_mapping
        self.filename = filename
        self.df = pd.read_csv(filename)
        self.df["Mileage"] = self.df["Mileage"] / 10000
        self.df["Year"] = self.df["Year"] - 2000
        self.df["Power"] = self.df["Power"] /100
        self.df["Price"] = self.df["Price"] /1000

    def __len__(self):
        return self.df.shape[0]

    def __getitem__(self, idx):
        
        # X = self.df.loc[idx, ["Mileage", "Year", "EngSize", "Power"]].to_numpy()
        X = self.df.loc[idx, ["Mileage", "Year"]].to_numpy()
        y = self.df.loc[idx, "Price"]

        X = torch.tensor(X).float()
        y = torch.tensor(y).float()
        
        return X, y



transform = transforms.Compose([transforms.ToTensor()])
     
trainset = DonedealAttributeDataset(filename='./DoneDealCars4Regression.csv', transform=transform)
# testset = DonedealAttributeDataset(filename='./DoneDealCars4Regression.csv', transform=transform)
# batch_size = trainset.df.size
trainloader = DataLoader(trainset, batch_size=int(trainset.df.size), shuffle=True)
# testloader = DataLoader(testset, batch_size=batch_size, shuffle=True)

In [252]:
import torch.nn as nn

class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.block = nn.Sequential(
            # nn.LazyLinear(20, bias=True),
            # nn.Sigmoid(),
            nn.LazyLinear(1, bias=True),
        )
    def forward(self, x):
        return self.block(x)
        
net = Net().to("cuda")



Lazy modules are a new feature under heavy development so changes to the API or functionality can happen at any moment.



In [253]:
import torch.optim as optim

criterion = nn.MSELoss()
optimizer = optim.SGD(net.parameters(), lr=0.0001)

In [254]:
for epoch in range(100):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        Xs, ys = data[0].to("cuda"), data[1].to("cuda")
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        ys_hat = net(Xs)
        loss = criterion(ys_hat, ys.reshape(-1, 1))
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        print("running_loss:", running_loss)

print('Finished Training')


running_loss: 409.741943359375
running_loss: 357.9724426269531
running_loss: 314.45733642578125
running_loss: 277.541259765625
running_loss: 246.36407470703125
running_loss: 220.21885681152344
running_loss: 198.14022827148438
running_loss: 179.2808074951172
running_loss: 163.5158233642578
running_loss: 149.97991943359375
running_loss: 138.6575927734375
running_loss: 128.87692260742188
running_loss: 120.67411804199219
running_loss: 113.72225952148438
running_loss: 107.84961700439453
running_loss: 102.7208480834961
running_loss: 98.24919891357422
running_loss: 94.51969909667969
running_loss: 91.27244567871094
running_loss: 88.59869384765625
running_loss: 86.13754272460938
running_loss: 84.00078582763672
running_loss: 82.14950561523438
running_loss: 80.5466537475586
running_loss: 79.16197967529297
running_loss: 77.86334991455078
running_loss: 76.74259185791016
running_loss: 75.77764129638672
running_loss: 74.8602294921875
running_loss: 74.07169342041016
running_loss: 73.31584167480469
run

In [255]:
loss

tensor(56.8134, device='cuda:0', grad_fn=<MseLossBackward>)

In [262]:
dataiter = iter(trainloader)
Xs, ys = next(dataiter)
ys_hat = net(Xs.to("cuda"))
import plotly.graph_objects as go

fig = go.Figure()
ys_hat_np = ys_hat.detach().to("cpu").numpy().flatten()
ys_np = ys.numpy().flatten()
fig.add_trace(go.Line(x = np.arange(len(ys_hat_np)), y = ys_hat_np, name="Prediction"))
fig.add_trace(go.Line(x = np.arange(len(ys_np)), y = ys_np, name="Actual"))



plotly.graph_objs.Line is deprecated.
Please replace it with one of the following more specific types
  - plotly.graph_objs.scatter.Line
  - plotly.graph_objs.layout.shape.Line
  - etc.




In [257]:
from plotly import express as px

In [258]:
df1 = trainset.df.copy()
df1["Price"] = net(torch.tensor(df1[["Mileage", "Year", "EngSize", "Power"]].to_numpy()).float().to("cuda")).detach().to("cpu").numpy().flatten()
df1["Tag"] = "Predict"

In [259]:
df2 = trainset.df.copy()
df2["Tag"] = "Actual"
dft = pd.concat((df1, df2), axis=0)

In [260]:
fig = px.scatter_3d(dft, x='Year', y='Mileage', z='Price', color='Tag', width=800, height=400)
fig.update_layout(
    scene = dict(
        xaxis = dict(range=[0,22],),
        yaxis = dict(range=[0,40],),
        zaxis = dict(range=[0,140],),
    )
)
fig.update_traces(marker=dict(size=3,
                              line=dict(width=2,
                                        color='DarkSlateGrey')),selector=dict(mode='markers'))
fig.layout.scene.aspectratio = {'x':1, 'y':1, 'z':1}

fig.show()

In [261]:
fig = px.scatter_3d(trainset.df, x='Year', y='Mileage', z='Price')
fig.show()