In [19]:
# importing necessary libraries
import pandas as pd
import numpy as np
import boto3
from sklearn.model_selection import train_test_split
import sagemaker
from sagemaker import Session
import io
import sagemaker.amazon.common as smac
import os
from sagemaker.amazon.amazon_estimator import get_image_uri

In [20]:
#read the csv file
df=pd.read_csv("student_scores.csv")

In [21]:
#head of the dataframe
df.head()

Unnamed: 0,Hours,Scores
0,2.5,21
1,5.1,47
2,3.2,27
3,8.5,75
4,3.5,30


In [22]:
#shape of the data
df.shape

(25, 2)

In [23]:
#separate x and y
x=df[["Hours"]]
y=df[["Scores"]]

In [24]:
#checking data types
x.dtypes
x=x.astype("float32")
y=y.astype("float32")

In [25]:
y.dtypes

Scores    float32
dtype: object

In [26]:
#split the data
X_train,X_test,y_train,y_test=train_test_split(x,y,test_size=0.2)

In [27]:
#reset index
X_train=X_train.reset_index(drop=True)
y_train=y_train.reset_index(drop=True)
X_test=X_test.reset_index(drop=True)
y_test=y_test.reset_index(drop=True)

In [28]:
#we need to take label column as vector
y_train=y_train.iloc[:,0]

In [29]:
y_test=y_test.iloc[:,0]

In [30]:
#lets create sagemaker session
sagemaker_session=sagemaker.Session()
#define the bucket name
bucket_name="bappy-sagemaker"
#define the prefix
prefix="linear-learner"
#get the execution role
role=sagemaker.get_execution_role()

In [31]:
#convert X_train into numpy aray
X_train=np.array(X_train)

In [32]:
#create the buffer
buf=io.BytesIO()
smac.write_numpy_to_dense_tensor(buf,X_train,y_train)
buf.seek(0)

0

In [33]:
#define the name of the file
key="student-data"

#code to upload in s3
boto3.resource('s3').Bucket(bucket_name).Object(os.path.join(prefix,'train',key)).upload_fileobj(buf)

#path of our data
s3_train_data=f"s3://{bucket_name}/{prefix}/train/{key}"

print("Data uploaded",s3_train_data)

Data uploaded s3://bappy-sagemaker/linear-learner/train/student-data


In [34]:
#convert X_train into numpy aray
X_test=np.array(X_test)
#create the buffer
buf=io.BytesIO()
smac.write_numpy_to_dense_tensor(buf,X_test,y_test)
buf.seek(0)
#define the name of the file
key="student-data-test"

#code to upload in s3
boto3.resource('s3').Bucket(bucket_name).Object(os.path.join(prefix,'test',key)).upload_fileobj(buf)

#path of our data
s3_train_data=f"s3://{bucket_name}/{prefix}/test/{key}"

print("Data uploaded",s3_train_data)

Data uploaded s3://bappy-sagemaker/linear-learner/test/student-data-test


In [35]:
#output location
output_location=f"s3://{bucket_name}/{prefix}/output"

In [36]:
output_location

's3://bappy-sagemaker/linear-learner/output'

In [37]:
#bring the container
container=sagemaker.image_uris.retrieve("linear-learner",boto3.Session().region_name)

In [38]:
#define the estimator
linear=sagemaker.estimator.Estimator(container,role,instance_count=1,instance_type="ml.c4.xlarge",output_path=output_location,sagemaker_session=sagemaker_session)

In [39]:
#setting up the hyperparameters
linear.set_hyperparameters(feature_dim=1,predictor_type="regressor",mini_batch_size=4,epochs=6,num_models=32,loss="absolute_loss")

In [40]:
#fit the model
linear.fit({"train":s3_train_data})

2025-03-18 06:31:12 Starting - Starting the training job......
..25-03-18 06:32:16 Downloading - Downloading input data.
...........06:32:41 Downloading - Downloading the training image.
.[34mDocker entrypoint called with argument(s): train[0mpleted. Training in progress..
[34mRunning default environment configuration script[0m
[34m[03/18/2025 06:34:57 INFO 140333394597696] Reading default configuration from /opt/amazon/lib/python3.8/site-packages/algorithm/resources/default-input.json: {'mini_batch_size': '1000', 'epochs': '15', 'feature_dim': 'auto', 'use_bias': 'true', 'binary_classifier_model_selection_criteria': 'accuracy', 'f_beta': '1.0', 'target_recall': '0.8', 'target_precision': '0.8', 'num_models': 'auto', 'num_calibration_samples': '10000000', 'init_method': 'uniform', 'init_scale': '0.07', 'init_sigma': '0.01', 'init_bias': '0.0', 'optimizer': 'auto', 'loss': 'auto', 'margin': '1.0', 'quantile': '0.5', 'loss_insensitivity': '0.01', 'huber_delta': '1.0', 'num_classes':

In [41]:
#deploy the model
linear_regresor=linear.deploy(initial_instance_count=1,instance_type="ml.m4.xlarge")

----------!

In [42]:
linear_regresor.serializer=sagemaker.serializers.CSVSerializer()
linear_regresor.deserializer=sagemaker.deserializers.JSONDeserializer()

In [43]:
#prediction
results=linear_regresor.predict(X_test)

In [44]:
results

{'predictions': [{'score': 72.52403259277344},
  {'score': 49.063316345214844},
  {'score': 42.360252380371094},
  {'score': 47.05239486694336},
  {'score': 51.0742301940918}]}

In [45]:
predictions=np.array([i["score"] for i in results["predictions"]])

In [46]:
predictions

array([72.52403259, 49.06331635, 42.36025238, 47.05239487, 51.07423019])