In [49]:
import pandas as pd
import numpy as np
import requests
from io import StringIO
import seaborn as sns
from sklearn.model_selection import train_test_split

In [50]:
def async_read_csv(url):
    response = requests.get(url, stream=True)
    response.raise_for_status() # will raise error if request is not successful
    return pd.read_csv(StringIO(response.text))
    
# Read the streamed content
df = async_read_csv("https://raw.githubusercontent.com/uy-seng/cs4375/main/assignment-1/scripts/convert_to_csv/abalone.csv")
df.head(), df.shape

(  sex  length  diameter  height  whole_weight  shucked_weight  viscera_weight  \
 0   M   0.455     0.365   0.095        0.5140          0.2245          0.1010   
 1   M   0.350     0.265   0.090        0.2255          0.0995          0.0485   
 2   F   0.530     0.420   0.135        0.6770          0.2565          0.1415   
 3   M   0.440     0.365   0.125        0.5160          0.2155          0.1140   
 4   I   0.330     0.255   0.080        0.2050          0.0895          0.0395   
 
    shell_weight  rings  
 0         0.150     15  
 1         0.070      7  
 2         0.210      9  
 3         0.155     10  
 4         0.055      7  ,
 (4177, 9))

In [51]:
df.columns

Index(['sex', 'length', 'diameter', 'height', 'whole_weight', 'shucked_weight',
       'viscera_weight', 'shell_weight', 'rings'],
      dtype='object')

In [52]:
df = df.replace({"M":1, "F": 2, "I": 3})

  df = df.replace({"M":1, "F": 2, "I": 3})


In [53]:
df.corr()["rings"]

sex              -0.351822
length            0.556720
diameter          0.574660
height            0.557467
whole_weight      0.540390
shucked_weight    0.420884
viscera_weight    0.503819
shell_weight      0.627574
rings             1.000000
Name: rings, dtype: float64

In [54]:
features = ['sex', 'length', 'diameter', 'height', 'whole_weight', 'shucked_weight',
       'viscera_weight', 'shell_weight']

x = df[features]
y = df["rings"]

In [55]:
x_train, x_test, y_train, y_test =  train_test_split(x, y, test_size=0.2)

In [56]:
from sklearn.linear_model import SGDRegressor

sgd = SGDRegressor(max_iter=1000, tol=1e-3)

In [57]:
sgd.fit(x_train, y_train)

In [58]:
sgd.score(x_test, y_test)

0.5396893813396818

In [59]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, explained_variance_score
y_pred = sgd.predict(x_test)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
ev = explained_variance_score(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

In [60]:
mse, mae, ev, r2

(4.636813973101339, 1.577724096977246, 0.5400531692831272, 0.5396893813396818)