# Hate Speech Detection Project

In [22]:
import sagemaker
import pandas as pd
import numpy as np

In [24]:
tweets_amlo = pd.read_csv("data/tweets_amlo.txt", sep=";")

In [169]:
tweets_amlo.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13586 entries, 0 to 13585
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   user             13586 non-null  object 
 1   date_time        13586 non-null  object 
 2   likes            13586 non-null  int64  
 3   tweet            13586 non-null  object 
 4   tokenized_tweet  13586 non-null  object 
 5   offensive        13586 non-null  int64  
 6   sentiment        13586 non-null  float64
 7   hate_speech      13586 non-null  int64  
dtypes: float64(1), int64(3), object(4)
memory usage: 849.2+ KB


In [106]:
tweets_amlo["hate_speech"].value_counts()

0    11477
1     2109
Name: hate_speech, dtype: int64

In [108]:
data_input = tweets_amlo[['offensive', 'sentiment', 'hate_speech']]

In [134]:
x = data_input[['offensive', 'sentiment']]
y = data_input.hate_speech

In [135]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=42)

In [136]:
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)

(10868, 2) (2718, 2) (10868,) (2718,)


In [137]:
def make_csv(x, y, filename, data_dir):
    '''Merges features and labels and converts them into one csv file with labels in the first column.
       :param x: Data features
       :param y: Data labels
       :param file_name: Name of csv file, ex. 'train.csv'
       :param data_dir: The directory where files will be saved
       '''

    if not os.path.exists(data_dir):
        os.makedirs(data_dir)
    
    x_df = pd.DataFrame(x)
    y_df = pd.Series(y)
    csv_file = pd.concat([y_df, x_df], axis = 1)
    csv_file = csv_file.dropna()
    csv_file.to_csv(os.path.join(data_dir, filename), header = False, index = False)
    
    print('Path created: '+str(data_dir)+'/'+str(filename))

In [138]:
make_csv(x_train, y_train, filename='train.csv', data_dir='hate_speech_data')
make_csv(x_test, y_test, filename='test.csv', data_dir='hate_speech_data')

Path created: hate_speech_data/train.csv
Path created: hate_speech_data/test.csv


In [139]:
# session and role
sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role()

# create an S3 bucket
bucket = sagemaker_session.default_bucket()

In [140]:
data_dir = 'hate_speech_data'

prefix = 'hate_speech_detection'

# upload all data to S3
input_data = sagemaker_session.upload_data(path=data_dir, bucket=bucket, key_prefix=prefix)
print(input_data)

s3://sagemaker-us-east-1-991156428171/hate_speech_detection


In [145]:
from sagemaker.pytorch import PyTorch

output_path = 's3//{}/{}'.format(bucket, prefix)

estimator = PyTorch(entry_point = 'train.py',
                    source_dir = 'source',
                    role = role,
                    framework_version = '1.0',
                    train_instance_count = 1,
                    train_instance_type = 'ml.c4.xlarge',
                    sagemaker_session = sagemaker_session,
                    hyperparameters = {
                        'input_features':2,
                        'hidden_dim':20,
                        'output_dim':1,
                        'epochs':3000
                    })

In [146]:
%%time

estimator.fit({'train':input_data})

'create_image_uri' will be deprecated in favor of 'ImageURIProvider' class in SageMaker Python SDK v2.
's3_input' class will be renamed to 'TrainingInput' in SageMaker Python SDK v2.
'create_image_uri' will be deprecated in favor of 'ImageURIProvider' class in SageMaker Python SDK v2.


2021-02-28 09:22:12 Starting - Starting the training job...
2021-02-28 09:22:15 Starting - Launching requested ML instances.........
2021-02-28 09:23:47 Starting - Preparing the instances for training...
2021-02-28 09:24:39 Downloading - Downloading input data...
2021-02-28 09:25:07 Training - Training image download completed. Training in progress..[34mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[34mbash: no job control in this shell[0m
[34m2021-02-28 09:25:08,678 sagemaker-containers INFO     Imported framework sagemaker_pytorch_container.training[0m
[34m2021-02-28 09:25:08,681 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2021-02-28 09:25:08,693 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[34m2021-02-28 09:25:09,326 sagemaker_pytorch_container.training INFO     Invoking user training script.[0m
[34m2021-02-28 09:25:09,597 sagemaker-containers INFO    

[34mEpoch: 41, Loss: 0.049686706912648426[0m
[34mEpoch: 42, Loss: 0.0485274362559226[0m
[34mEpoch: 43, Loss: 0.04742681197793328[0m
[34mEpoch: 44, Loss: 0.046380797492677296[0m
[34mEpoch: 45, Loss: 0.045385627330725944[0m
[34mEpoch: 46, Loss: 0.044437634431649005[0m
[34mEpoch: 47, Loss: 0.04353361933615074[0m
[34mEpoch: 48, Loss: 0.04267072431227638[0m
[34mEpoch: 49, Loss: 0.04184624478723956[0m
[34mEpoch: 50, Loss: 0.041058589689447825[0m
[34mEpoch: 51, Loss: 0.04030512847322284[0m
[34mEpoch: 52, Loss: 0.03958365578893731[0m
[34mEpoch: 53, Loss: 0.0388920453943691[0m
[34mEpoch: 54, Loss: 0.03822835812087266[0m
[34mEpoch: 55, Loss: 0.03759107235863259[0m
[34mEpoch: 56, Loss: 0.036978564790366986[0m
[34mEpoch: 57, Loss: 0.03638952578833401[0m
[34mEpoch: 58, Loss: 0.035822736811218524[0m
[34mEpoch: 59, Loss: 0.035277007301536635[0m
[34mEpoch: 60, Loss: 0.034751270646562464[0m
[34mEpoch: 61, Loss: 0.03424430784060974[0m
[34mEpoch: 62, Loss: 0.033

[34mEpoch: 220, Loss: 0.012777324063632273[0m
[34mEpoch: 221, Loss: 0.012737183685175376[0m
[34mEpoch: 222, Loss: 0.01269736726469913[0m
[34mEpoch: 223, Loss: 0.012657859348213495[0m
[34mEpoch: 224, Loss: 0.012618664847206348[0m
[34mEpoch: 225, Loss: 0.012579779259002818[0m
[34mEpoch: 226, Loss: 0.012541204950711132[0m
[34mEpoch: 227, Loss: 0.012502935289704939[0m
[34mEpoch: 228, Loss: 0.012464957629978396[0m
[34mEpoch: 229, Loss: 0.012427278822205267[0m
[34mEpoch: 230, Loss: 0.012389895723262604[0m
[34mEpoch: 231, Loss: 0.012352800938601409[0m
[34mEpoch: 232, Loss: 0.012315983342347929[0m
[34mEpoch: 233, Loss: 0.012279452323897246[0m
[34mEpoch: 234, Loss: 0.012243195979204665[0m
[34mEpoch: 235, Loss: 0.01220721099748621[0m
[34mEpoch: 236, Loss: 0.012171491902217248[0m
[34mEpoch: 237, Loss: 0.012136044744338699[0m
[34mEpoch: 238, Loss: 0.012100868037233274[0m
[34mEpoch: 239, Loss: 0.012065946588356182[0m
[34mEpoch: 240, Loss: 0.01203127236194315

[34mEpoch: 407, Loss: 0.008408641083437493[0m
[34mEpoch: 408, Loss: 0.00839484025604152[0m
[34mEpoch: 409, Loss: 0.008381092018828555[0m
[34mEpoch: 410, Loss: 0.00836740103477725[0m
[34mEpoch: 411, Loss: 0.00835376452131767[0m
[34mEpoch: 412, Loss: 0.008340185431997165[0m
[34mEpoch: 413, Loss: 0.00832665993407641[0m
[34mEpoch: 414, Loss: 0.008313193451410454[0m
[34mEpoch: 415, Loss: 0.008299783526985646[0m
[34mEpoch: 416, Loss: 0.008286426254189431[0m
[34mEpoch: 417, Loss: 0.008273119729129669[0m
[34mEpoch: 418, Loss: 0.008259866752063938[0m
[34mEpoch: 419, Loss: 0.008246669838611357[0m
[34mEpoch: 420, Loss: 0.008233528309516258[0m
[34mEpoch: 421, Loss: 0.008220439517219513[0m
[34mEpoch: 422, Loss: 0.008207399860650418[0m
[34mEpoch: 423, Loss: 0.008194417755807634[0m
[34mEpoch: 424, Loss: 0.008181491448906406[0m
[34mEpoch: 425, Loss: 0.008168618061855764[0m
[34mEpoch: 426, Loss: 0.008155788698848074[0m
[34mEpoch: 427, Loss: 0.008143013710401487

[34mEpoch: 578, Loss: 0.006657994860352163[0m
[34mEpoch: 579, Loss: 0.006650387734146428[0m
[34mEpoch: 580, Loss: 0.006642802449922963[0m
[34mEpoch: 581, Loss: 0.006635237754186029[0m
[34mEpoch: 582, Loss: 0.006627696456454344[0m
[34mEpoch: 583, Loss: 0.006620172637009537[0m
[34mEpoch: 584, Loss: 0.0066126678688911[0m
[34mEpoch: 585, Loss: 0.006605184208218031[0m
[34mEpoch: 586, Loss: 0.00659772214654806[0m
[34mEpoch: 587, Loss: 0.0065902844901146946[0m
[34mEpoch: 588, Loss: 0.006582867700624974[0m
[34mEpoch: 589, Loss: 0.006575470937122732[0m
[34mEpoch: 590, Loss: 0.006568093067155055[0m
[34mEpoch: 591, Loss: 0.006560735549034736[0m
[34mEpoch: 592, Loss: 0.006553404106108409[0m
[34mEpoch: 593, Loss: 0.006546094516867961[0m
[34mEpoch: 594, Loss: 0.006538806830150844[0m
[34mEpoch: 595, Loss: 0.006531545559604072[0m
[34mEpoch: 596, Loss: 0.006524305399777182[0m
[34mEpoch: 597, Loss: 0.006517085287456436[0m
[34mEpoch: 598, Loss: 0.00650988948956162

[34mEpoch: 757, Loss: 0.005574053380830391[0m
[34mEpoch: 758, Loss: 0.005569231868315932[0m
[34mEpoch: 759, Loss: 0.0055644259251964[0m
[34mEpoch: 760, Loss: 0.0055596293573448845[0m
[34mEpoch: 761, Loss: 0.005554840519764771[0m
[34mEpoch: 762, Loss: 0.005550062007323229[0m
[34mEpoch: 763, Loss: 0.005545297014864924[0m
[34mEpoch: 764, Loss: 0.0055405409765625436[0m
[34mEpoch: 765, Loss: 0.005535795570844174[0m
[34mEpoch: 766, Loss: 0.005531063427604785[0m
[34mEpoch: 767, Loss: 0.005526339083040811[0m
[34mEpoch: 768, Loss: 0.005521631191740269[0m
[34mEpoch: 769, Loss: 0.005516933992915861[0m
[34mEpoch: 770, Loss: 0.005512244855977534[0m
[34mEpoch: 771, Loss: 0.005507568507064019[0m
[34mEpoch: 772, Loss: 0.005502895648466977[0m
[34mEpoch: 773, Loss: 0.005498238539763746[0m
[34mEpoch: 774, Loss: 0.005493596353144401[0m
[34mEpoch: 775, Loss: 0.0054889622566450256[0m
[34mEpoch: 776, Loss: 0.0054843344564412325[0m
[34mEpoch: 777, Loss: 0.0054797172350

[34mEpoch: 928, Loss: 0.004880671182875308[0m
[34mEpoch: 929, Loss: 0.004877255020094598[0m
[34mEpoch: 930, Loss: 0.004873847296939268[0m
[34mEpoch: 931, Loss: 0.004870446497263129[0m
[34mEpoch: 932, Loss: 0.004867055092295311[0m
[34mEpoch: 933, Loss: 0.004863667791636288[0m
[34mEpoch: 934, Loss: 0.004860285183764887[0m
[34mEpoch: 935, Loss: 0.004856912040173302[0m
[34mEpoch: 936, Loss: 0.004853543566805297[0m
[34mEpoch: 937, Loss: 0.00485017659458115[0m
[34mEpoch: 938, Loss: 0.004846815941927921[0m
[34mEpoch: 939, Loss: 0.004843458333441549[0m
[34mEpoch: 940, Loss: 0.004840110649827213[0m
[34mEpoch: 941, Loss: 0.0048367696913126255[0m
[34mEpoch: 942, Loss: 0.0048334358893989[0m
[34mEpoch: 943, Loss: 0.004830101847213502[0m
[34mEpoch: 944, Loss: 0.004826779784427259[0m
[34mEpoch: 945, Loss: 0.004823465319832698[0m
[34mEpoch: 946, Loss: 0.004820155318172709[0m
[34mEpoch: 947, Loss: 0.004816851179257556[0m
[34mEpoch: 948, Loss: 0.00481355754598840

[34mEpoch: 1100, Loss: 0.004372046524810635[0m
[34mEpoch: 1101, Loss: 0.004369488615652286[0m
[34mEpoch: 1102, Loss: 0.004366933458236909[0m
[34mEpoch: 1103, Loss: 0.004364378676258809[0m
[34mEpoch: 1104, Loss: 0.0043618335946844795[0m
[34mEpoch: 1105, Loss: 0.004359290792830835[0m
[34mEpoch: 1106, Loss: 0.004356754509906496[0m
[34mEpoch: 1107, Loss: 0.004354217613980454[0m
[34mEpoch: 1108, Loss: 0.00435168854871628[0m
[34mEpoch: 1109, Loss: 0.004349163434865094[0m
[34mEpoch: 1110, Loss: 0.004346640011440637[0m
[34mEpoch: 1111, Loss: 0.004344120565700531[0m
[34mEpoch: 1112, Loss: 0.004341606239984599[0m
[34mEpoch: 1113, Loss: 0.004339092153855187[0m
[34mEpoch: 1114, Loss: 0.004336583892172881[0m
[34mEpoch: 1115, Loss: 0.004334079217580507[0m
[34mEpoch: 1116, Loss: 0.004331580390497127[0m
[34mEpoch: 1117, Loss: 0.004329083375296763[0m
[34mEpoch: 1118, Loss: 0.00432659416597332[0m
[34mEpoch: 1119, Loss: 0.0043241005345293826[0m
[34mEpoch: 1120, Lo

[34mEpoch: 1271, Loss: 0.003984511293623603[0m
[34mEpoch: 1272, Loss: 0.003982509304778557[0m
[34mEpoch: 1273, Loss: 0.003980509780426871[0m
[34mEpoch: 1274, Loss: 0.003978511750067588[0m
[34mEpoch: 1275, Loss: 0.003976517351700547[0m
[34mEpoch: 1276, Loss: 0.003974527246994947[0m
[34mEpoch: 1277, Loss: 0.003972534007725378[0m
[34mEpoch: 1278, Loss: 0.003970546947379054[0m
[34mEpoch: 1279, Loss: 0.003968561322336097[0m
[34mEpoch: 1280, Loss: 0.003966579293882736[0m
[34mEpoch: 1281, Loss: 0.003964600542870613[0m
[34mEpoch: 1282, Loss: 0.003962624458029076[0m
[34mEpoch: 1283, Loss: 0.003960649905147799[0m
[34mEpoch: 1284, Loss: 0.0039586791767005644[0m
[34mEpoch: 1285, Loss: 0.003956711551776866[0m
[34mEpoch: 1286, Loss: 0.003954738860256771[0m
[34mEpoch: 1287, Loss: 0.003952775680290666[0m
[34mEpoch: 1288, Loss: 0.003950812850941107[0m
[34mEpoch: 1289, Loss: 0.003948856991440736[0m
[34mEpoch: 1290, Loss: 0.003946901451720949[0m
[34mEpoch: 1291, L

[34mEpoch: 1450, Loss: 0.0036636544063704847[0m
[34mEpoch: 1451, Loss: 0.003662051227540887[0m
[34mEpoch: 1452, Loss: 0.003660446895847739[0m
[34mEpoch: 1453, Loss: 0.0036588457304904355[0m
[34mEpoch: 1454, Loss: 0.0036572503449360623[0m
[34mEpoch: 1455, Loss: 0.0036556530079010258[0m
[34mEpoch: 1456, Loss: 0.003654056977827486[0m
[34mEpoch: 1457, Loss: 0.003652462088099125[0m
[34mEpoch: 1458, Loss: 0.003650872386104319[0m
[34mEpoch: 1459, Loss: 0.0036492889406311217[0m
[34mEpoch: 1460, Loss: 0.0036477017674755936[0m
[34mEpoch: 1461, Loss: 0.003646114884572472[0m
[34mEpoch: 1462, Loss: 0.0036445337842376654[0m
[34mEpoch: 1463, Loss: 0.003642955140471547[0m
[34mEpoch: 1464, Loss: 0.0036413724713190458[0m
[34mEpoch: 1465, Loss: 0.00363979343229845[0m
[34mEpoch: 1466, Loss: 0.0036382095942446[0m
[34mEpoch: 1467, Loss: 0.003636634897616348[0m
[34mEpoch: 1468, Loss: 0.00363506084238213[0m
[34mEpoch: 1469, Loss: 0.003633491016156526[0m
[34mEpoch: 1470

[34mEpoch: 1621, Loss: 0.003414249796391438[0m
[34mEpoch: 1622, Loss: 0.0034129250894815322[0m
[34mEpoch: 1623, Loss: 0.003411600151188358[0m
[34mEpoch: 1624, Loss: 0.0034102783706913475[0m
[34mEpoch: 1625, Loss: 0.0034089531177185555[0m
[34mEpoch: 1626, Loss: 0.0034076288693005337[0m
[34mEpoch: 1627, Loss: 0.00340630785881042[0m
[34mEpoch: 1628, Loss: 0.0034049888354873805[0m
[34mEpoch: 1629, Loss: 0.003403673665409861[0m
[34mEpoch: 1630, Loss: 0.0034023605593809392[0m
[34mEpoch: 1631, Loss: 0.0034010435890262478[0m
[34mEpoch: 1632, Loss: 0.0033997302028784897[0m
[34mEpoch: 1633, Loss: 0.0033984163627344705[0m
[34mEpoch: 1634, Loss: 0.003397104928730543[0m
[34mEpoch: 1635, Loss: 0.003395792449169682[0m
[34mEpoch: 1636, Loss: 0.0033944849430534137[0m
[34mEpoch: 1637, Loss: 0.0033931776306545987[0m
[34mEpoch: 1638, Loss: 0.00339187286306322[0m
[34mEpoch: 1639, Loss: 0.0033905670744878193[0m
[34mEpoch: 1640, Loss: 0.003389262475179348[0m
[34mEpoch

[34mEpoch: 1801, Loss: 0.0031957111569789125[0m
[34mEpoch: 1802, Loss: 0.00319460156155735[0m
[34mEpoch: 1803, Loss: 0.003193488930482489[0m
[34mEpoch: 1804, Loss: 0.0031923790090368736[0m
[34mEpoch: 1805, Loss: 0.003191266462878942[0m
[34mEpoch: 1806, Loss: 0.0031901547117079918[0m
[34mEpoch: 1807, Loss: 0.0031890497619010455[0m
[34mEpoch: 1808, Loss: 0.0031879458924300723[0m
[34mEpoch: 1809, Loss: 0.003186841974391718[0m
[34mEpoch: 1810, Loss: 0.0031857351065429115[0m
[34mEpoch: 1811, Loss: 0.003184631956842214[0m
[34mEpoch: 1812, Loss: 0.0031835307385388698[0m
[34mEpoch: 1813, Loss: 0.00318243076302581[0m
[34mEpoch: 1814, Loss: 0.0031813320909110086[0m
[34mEpoch: 1815, Loss: 0.0031802340640690917[0m
[34mEpoch: 1816, Loss: 0.003179141771566028[0m
[34mEpoch: 1817, Loss: 0.003178045958713484[0m
[34mEpoch: 1818, Loss: 0.0031769500770885513[0m
[34mEpoch: 1819, Loss: 0.0031758573380762357[0m
[34mEpoch: 1820, Loss: 0.0031747680441194777[0m
[34mEpoch

[34mEpoch: 1972, Loss: 0.003019800153370885[0m
[34mEpoch: 1973, Loss: 0.003018849431185355[0m
[34mEpoch: 1974, Loss: 0.00301789402516101[0m
[34mEpoch: 1975, Loss: 0.0030169449667373986[0m
[34mEpoch: 1976, Loss: 0.0030159937367903555[0m
[34mEpoch: 1977, Loss: 0.003015044674224412[0m
[34mEpoch: 1978, Loss: 0.003014093229940695[0m
[34mEpoch: 1979, Loss: 0.0030131450290578734[0m
[34mEpoch: 1980, Loss: 0.003012198324883475[0m
[34mEpoch: 1981, Loss: 0.0030112508884181507[0m
[34mEpoch: 1982, Loss: 0.0030103036280540387[0m
[34mEpoch: 1983, Loss: 0.0030093605585145167[0m
[34mEpoch: 1984, Loss: 0.003008415699384098[0m
[34mEpoch: 1985, Loss: 0.0030074720229119924[0m
[34mEpoch: 1986, Loss: 0.003006525000450335[0m
[34mEpoch: 1987, Loss: 0.0030055820592956883[0m
[34mEpoch: 1988, Loss: 0.003004640303747617[0m
[34mEpoch: 1989, Loss: 0.0030037003357224635[0m
[34mEpoch: 1990, Loss: 0.0030027600925871963[0m
[34mEpoch: 1991, Loss: 0.0030018198894006553[0m
[34mEpoch

[34mEpoch: 2143, Loss: 0.0028675995166192584[0m
[34mEpoch: 2144, Loss: 0.0028667661344525513[0m
[34mEpoch: 2145, Loss: 0.002865934163407965[0m
[34mEpoch: 2146, Loss: 0.002865106955109908[0m
[34mEpoch: 2147, Loss: 0.002864279807507313[0m
[34mEpoch: 2148, Loss: 0.0028634557710319484[0m
[34mEpoch: 2149, Loss: 0.0028626315082630276[0m
[34mEpoch: 2150, Loss: 0.002861806795314332[0m
[34mEpoch: 2151, Loss: 0.002860982392690757[0m
[34mEpoch: 2152, Loss: 0.0028601602751760374[0m
[34mEpoch: 2153, Loss: 0.0028593371863423777[0m
[34mEpoch: 2154, Loss: 0.0028585149323936084[0m
[34mEpoch: 2155, Loss: 0.002857696060862329[0m
[34mEpoch: 2156, Loss: 0.0028568810707698925[0m
[34mEpoch: 2157, Loss: 0.002856061537660181[0m
[34mEpoch: 2158, Loss: 0.0028552419541679106[0m
[34mEpoch: 2159, Loss: 0.0028544276824397263[0m
[34mEpoch: 2160, Loss: 0.002853614990356354[0m
[34mEpoch: 2161, Loss: 0.0028527991168110192[0m
[34mEpoch: 2162, Loss: 0.002851984490187143[0m
[34mEpoc

[34mEpoch: 2322, Loss: 0.002729103711619208[0m
[34mEpoch: 2323, Loss: 0.002728386249990224[0m
[34mEpoch: 2324, Loss: 0.002727666691815335[0m
[34mEpoch: 2325, Loss: 0.0027269450302011295[0m
[34mEpoch: 2326, Loss: 0.002726222237840686[0m
[34mEpoch: 2327, Loss: 0.002725504018434951[0m
[34mEpoch: 2328, Loss: 0.0027247842742535314[0m
[34mEpoch: 2329, Loss: 0.002724066044210619[0m
[34mEpoch: 2330, Loss: 0.00272334395341892[0m
[34mEpoch: 2331, Loss: 0.0027226275216639868[0m
[34mEpoch: 2332, Loss: 0.0027219074221080885[0m
[34mEpoch: 2333, Loss: 0.0027211923518186336[0m
[34mEpoch: 2334, Loss: 0.0027204769327743325[0m
[34mEpoch: 2335, Loss: 0.0027197608404958993[0m
[34mEpoch: 2336, Loss: 0.0027190448902289135[0m
[34mEpoch: 2337, Loss: 0.00271832707156668[0m
[34mEpoch: 2338, Loss: 0.002717613309282933[0m
[34mEpoch: 2339, Loss: 0.0027168936209116964[0m
[34mEpoch: 2340, Loss: 0.0027161782902296516[0m
[34mEpoch: 2341, Loss: 0.002715470152537061[0m
[34mEpoch: 

[34mEpoch: 2494, Loss: 0.0026119776174441146[0m
[34mEpoch: 2495, Loss: 0.00261133770540407[0m
[34mEpoch: 2496, Loss: 0.0026106975285460675[0m
[34mEpoch: 2497, Loss: 0.002610061022236347[0m
[34mEpoch: 2498, Loss: 0.002609425242203642[0m
[34mEpoch: 2499, Loss: 0.0026087869484049627[0m
[34mEpoch: 2500, Loss: 0.0026081484356026974[0m
[34mEpoch: 2501, Loss: 0.002607512356446829[0m
[34mEpoch: 2502, Loss: 0.00260687450750519[0m
[34mEpoch: 2503, Loss: 0.0026062386138130493[0m
[34mEpoch: 2504, Loss: 0.002605604048380554[0m
[34mEpoch: 2505, Loss: 0.0026049689958193797[0m
[34mEpoch: 2506, Loss: 0.002604332988553068[0m
[34mEpoch: 2507, Loss: 0.002603698086835691[0m
[34mEpoch: 2508, Loss: 0.0026030615962143195[0m
[34mEpoch: 2509, Loss: 0.002602425343901089[0m
[34mEpoch: 2510, Loss: 0.0026017923165000518[0m
[34mEpoch: 2511, Loss: 0.0026011569781183162[0m
[34mEpoch: 2512, Loss: 0.0026005223207600505[0m
[34mEpoch: 2513, Loss: 0.0025998886630637747[0m
[34mEpoch:

[34mEpoch: 2664, Loss: 0.0025086723019901083[0m
[34mEpoch: 2665, Loss: 0.0025080921096981863[0m
[34mEpoch: 2666, Loss: 0.0025075114124175997[0m
[34mEpoch: 2667, Loss: 0.002506932184616328[0m
[34mEpoch: 2668, Loss: 0.002506349861686324[0m
[34mEpoch: 2669, Loss: 0.0025057678524296988[0m
[34mEpoch: 2670, Loss: 0.0025051891313331667[0m
[34mEpoch: 2671, Loss: 0.0025046112017812943[0m
[34mEpoch: 2672, Loss: 0.0025040309641738883[0m
[34mEpoch: 2673, Loss: 0.0025034583084897633[0m
[34mEpoch: 2674, Loss: 0.0025028788604710473[0m
[34mEpoch: 2675, Loss: 0.002502299191338093[0m
[34mEpoch: 2676, Loss: 0.0025017226508715403[0m
[34mEpoch: 2677, Loss: 0.0025011472592392334[0m
[34mEpoch: 2678, Loss: 0.0025005682046723007[0m
[34mEpoch: 2679, Loss: 0.0024999877141038728[0m
[34mEpoch: 2680, Loss: 0.0024994129520231156[0m
[34mEpoch: 2681, Loss: 0.002498838546302419[0m
[34mEpoch: 2682, Loss: 0.0024982633231644623[0m
[34mEpoch: 2683, Loss: 0.0024976910694917307[0m
[34

[34mEpoch: 2836, Loss: 0.0024145446754404083[0m
[34mEpoch: 2837, Loss: 0.002414027601627179[0m
[34mEpoch: 2838, Loss: 0.0024135087811286956[0m
[34mEpoch: 2839, Loss: 0.0024129947187122415[0m
[34mEpoch: 2840, Loss: 0.0024124765880772425[0m
[34mEpoch: 2841, Loss: 0.0024119582142005467[0m
[34mEpoch: 2842, Loss: 0.0024114414360805335[0m
[34mEpoch: 2843, Loss: 0.002410927030270208[0m
[34mEpoch: 2844, Loss: 0.0024104079919856566[0m
[34mEpoch: 2845, Loss: 0.00240989357080354[0m
[34mEpoch: 2846, Loss: 0.002409377403077166[0m
[34mEpoch: 2847, Loss: 0.0024088624557365977[0m
[34mEpoch: 2848, Loss: 0.002408348120673228[0m
[34mEpoch: 2849, Loss: 0.002407831948647541[0m
[34mEpoch: 2850, Loss: 0.002407320391685084[0m
[34mEpoch: 2851, Loss: 0.0024068063190769043[0m
[34mEpoch: 2852, Loss: 0.0024062937908310832[0m
[34mEpoch: 2853, Loss: 0.0024057817367835305[0m
[34mEpoch: 2854, Loss: 0.0024052680934983028[0m
[34mEpoch: 2855, Loss: 0.0024047543682565366[0m
[34mEpo


2021-02-28 09:57:27 Uploading - Uploading generated training model
2021-02-28 09:57:27 Completed - Training job completed
Training seconds: 1968
Billable seconds: 1968
CPU times: user 5.44 s, sys: 279 ms, total: 5.72 s
Wall time: 35min 46s


In [147]:
%%time

from sagemaker.pytorch import PyTorchModel

model = PyTorchModel(model_data = estimator.model_data,
                         role = role,
                         framework_version = '1.0',
                         entry_point = 'predict.py',
                         source_dir = 'source')


predictor = model.deploy(initial_instance_count=1, instance_type='ml.t2.medium')

Parameter image will be renamed to image_uri in SageMaker Python SDK v2.
'create_image_uri' will be deprecated in favor of 'ImageURIProvider' class in SageMaker Python SDK v2.


-----------------!CPU times: user 452 ms, sys: 47.3 ms, total: 499 ms
Wall time: 8min 33s


In [148]:
test_data = pd.read_csv(os.path.join(data_dir, "test.csv"), header=None, names=None)

test_y = test_data.iloc[:,0]
test_x = test_data.iloc[:,1:]

In [164]:
test_y_preds = np.squeeze(predictor.predict(test_x))

In [150]:
accuracy = np.mean(test_y_preds == test_y)

print(accuracy)


## print out the array of predicted and true labels, if you want
print('\nPredicted class labels: ')
print(test_y_preds)
print('\nTrue class labels: ')
print(test_y.values)

1.0

Predicted class labels: 
[0. 0. 0. ... 0. 0. 0.]

True class labels: 
[0 0 0 ... 0 0 0]


In [162]:
from sklearn.metrics import recall_score
recall_score(test_y.values, test_y_preds)

1.0

In [163]:
from sklearn.metrics import precision_score
precision_score(test_y.values, test_y_preds)

1.0

In [168]:
from sklearn.metrics import classification_report
print(classification_report(test_y.values, test_y_preds))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00      2285
           1       1.00      1.00      1.00       433

    accuracy                           1.00      2718
   macro avg       1.00      1.00      1.00      2718
weighted avg       1.00      1.00      1.00      2718

