In [1]:
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from predictor import ThroughputPredictor

In [2]:
def load_dataset(file_path):
  df = pd.read_csv(file_path).dropna()
  df.sort_values(by=['TestID', 'ElapsedTime'], inplace=True)
  
  df['AverageThroughput'] = df['BytesAcked'] / df['ElapsedTime'] * 8
  df['DeltaTime'] = df['ElapsedTime'] - df.groupby('TestID')['ElapsedTime'].shift(1).fillna(0)
  df['DeltaBytesSent'] = df['BytesSent'] - df.groupby('TestID')['BytesSent'].shift(1).fillna(0)
  df['DeltaBytesAcked'] = df['BytesAcked'] - df.groupby('TestID')['BytesAcked'].shift(1).fillna(0)
  df['DeltaBytesRetrans'] = df['BytesRetrans'] - df.groupby('TestID')['BytesRetrans'].shift(1).fillna(0)
  
  df.drop(columns=['TestID'], inplace=True)
  labels = df.pop('FinalSpeed')
  
  return df, labels

In [3]:
features, labels = load_dataset('./dataset.csv')
features

Unnamed: 0,ElapsedTime,BytesSent,BytesAcked,BytesRetrans,RTT,RTTVar,RWndLimited,SndBufLimited,MinRTT,AverageThroughput,DeltaTime,DeltaBytesSent,DeltaBytesAcked,DeltaBytesRetrans
0,604312,475115,231219,0,147065,20593,0,0,100210,3.060922,604312.0,475115.0,231219.0,0.0
1,1137446,1192623,840959,0,294836,6386,0,0,100210,5.914718,533134.0,717508.0,609740.0,0.0
2,2811147,2190895,1632203,26942,549209,5117,0,0,100210,4.644945,1673701.0,998272.0,791244.0,26942.0
3,4441521,2987811,1928565,324722,373350,23212,329000,0,100210,3.473702,1630374.0,796916.0,296362.0,297780.0
4,6096471,3791817,3182077,411220,534825,1683,422000,0,100210,4.175631,1654950.0,804006.0,1253512.0,86498.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19088,8684411,1277171122,1180949676,94916886,2228,591,3952000,0,161,1087.880043,520165.0,100535452.0,95076342.0,7972790.0
19089,9233330,1388421730,1283556156,103943874,2797,346,4146000,0,161,1112.106818,548919.0,111250608.0,102606480.0,9026988.0
19090,9602935,1436655000,1324500906,108148244,2391,1217,4374000,0,161,1103.413409,369605.0,48233270.0,40944750.0,4204370.0
19091,9805854,1449789452,1340189176,108932398,2298,171,4534000,0,161,1093.378854,202919.0,13134452.0,15688270.0,784154.0


In [4]:
X, y = features.values, labels.values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=True)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
regression = ThroughputPredictor(num_features=len(features.columns))
regression.to(device)
regression.fit(X_train, y_train)

Epoch 1/100, Loss: 6283.4456922272175
Epoch 2/100, Loss: 1434.3717184035402
Epoch 3/100, Loss: 1539.8843053405365
Epoch 4/100, Loss: 1233.2836466634103
Epoch 5/100, Loss: 1240.8263385412795
Epoch 6/100, Loss: 1110.3591118021445
Epoch 7/100, Loss: 1131.8016613790294
Epoch 8/100, Loss: 1084.7254084268825
Epoch 9/100, Loss: 1044.6903777551709
Epoch 10/100, Loss: 1065.4166699017635
Epoch 11/100, Loss: 969.2981259168336
Epoch 12/100, Loss: 987.3851439189968
Epoch 13/100, Loss: 864.7908297845621
Epoch 14/100, Loss: 1006.9836499276058
Epoch 15/100, Loss: 862.85615520329
Epoch 16/100, Loss: 918.8027803835116
Epoch 17/100, Loss: 818.0974374814182
Epoch 18/100, Loss: 880.2740645656745
Epoch 19/100, Loss: 875.3864975108199
Epoch 20/100, Loss: 887.0668367158853
Epoch 21/100, Loss: 810.8531830119745
Epoch 22/100, Loss: 885.3652978900232
Epoch 23/100, Loss: 773.716761403677
Epoch 24/100, Loss: 815.8723360994501
Epoch 25/100, Loss: 765.847530775093
Epoch 26/100, Loss: 756.826647391017
Epoch 27/100, L

ThroughputPredictor(
  (stack): Sequential(
    (0): Linear(in_features=14, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=64, bias=True)
    (3): ReLU()
    (4): Linear(in_features=64, out_features=1, bias=True)
  )
)

In [6]:
regression.predict(X_test)

array([1120.1779   ,    6.9114766,    6.9114766, ...,    6.9114766,
          6.9114766,    6.9114766], shape=(5728,), dtype=float32)