In [None]:
import sys
sys.path.append('../')
import torch
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
import pandas as pd
from io import BytesIO
from torch.utils.data import DataLoader
from tqdm import tqdm
from utils import Dataset, test_transform

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = torch.load('../m-EfficientNetV2-S.pth').eval().to(device)
df = pd.read_parquet('../train.pqt')
dataset = Dataset(df, transforms=test_transform)
dataloader = DataLoader(dataset, batch_size=8, shuffle=False)

In [None]:
diffs = np.array([], dtype=np.float32)
with torch.no_grad():
    for x, y, _ in tqdm(dataloader):
        x = x.to(device)
        y = y.to(device)

        result = model(x)
        diffs = np.concatenate([diffs, (y - result).flatten().cpu().numpy()])

In [None]:
df['diff'] = diffs
df['l1_loss'] = df['diff'].abs()
df['predict'] = diffs + df['rate'].to_numpy()
df['range'] = pd.cut(df['rate'], bins=np.arange(0.0, 1.001, 0.01)).apply(lambda x: x.right)
df = df.sort_values('l1_loss').reset_index(drop=True)

In [None]:
i = -3000
diff = df['diff'].iloc[i]
rate = df['rate'].iloc[i]
img = Image.open(BytesIO(df['img'].iloc[i]))
print(diff, rate)
img

In [None]:
df.hist('predict')

In [None]:
df[['rate', 'l1_loss', 'range']].groupby('range', as_index=False).mean().plot.bar(x='rate', y='l1_loss', rot=0)

In [None]:
ranges = df[['rate', 'diff', 'range']].groupby('range', as_index=False).mean().sort_values('rate').reset_index(drop=True)
plt.plot(ranges['rate'], ranges['diff'])