# Import Necessary Libraries

In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()

# Training Dataset

In [2]:
# Read the text file
with open('../data/raw/train.txt', 'r') as file:
    lines = file.readlines()

# Split each line by semicolon
data = [line.strip().split(';') for line in lines]

# Create DataFrame
train_df = pd.DataFrame(data, columns=['Sentence', 'Labels'])

# Display the DataFrame
train_df


Unnamed: 0,Sentence,Labels
0,i didnt feel humiliated,sadness
1,i can go from feeling so hopeless to so damned...,sadness
2,im grabbing a minute to post i feel greedy wrong,anger
3,i am ever feeling nostalgic about the fireplac...,love
4,i am feeling grouchy,anger
...,...,...
15995,i just had a very brief time in the beanbag an...,sadness
15996,i am now turning and i feel pathetic that i am...,sadness
15997,i feel strong and good overall,joy
15998,i feel like this was such a rude comment and i...,anger


In [3]:
train_df.describe()

Unnamed: 0,Sentence,Labels
count,16000,16000
unique,15969,6
top,i feel on the verge of tears from weariness i ...,joy
freq,2,5362


In [4]:
train_df.isnull().sum()

Sentence    0
Labels      0
dtype: int64

In [5]:
train_df.dtypes

Sentence    object
Labels      object
dtype: object

In [6]:
train_df['Labels'].unique()

array(['sadness', 'anger', 'love', 'surprise', 'fear', 'joy'],
      dtype=object)

In [7]:
train_df['Labels_Encoded'] = label_encoder.fit_transform(train_df['Labels'])
train_df

Unnamed: 0,Sentence,Labels,Labels_Encoded
0,i didnt feel humiliated,sadness,4
1,i can go from feeling so hopeless to so damned...,sadness,4
2,im grabbing a minute to post i feel greedy wrong,anger,0
3,i am ever feeling nostalgic about the fireplac...,love,3
4,i am feeling grouchy,anger,0
...,...,...,...
15995,i just had a very brief time in the beanbag an...,sadness,4
15996,i am now turning and i feel pathetic that i am...,sadness,4
15997,i feel strong and good overall,joy,2
15998,i feel like this was such a rude comment and i...,anger,0


In [57]:
# Specify the file path where you want to save the CSV file
file_path = '../data/processed/train_data.csv'

# Save the DataFrame to a CSV file
train_df.to_csv(file_path, index=False)

# Validation Dataset

In [58]:
# Read the text file
with open('../data/raw/val.txt', 'r') as file:
    lines = file.readlines()

# Split each line by semicolon
data = [line.strip().split(';') for line in lines]

# Create DataFrame
val_df = pd.DataFrame(data, columns=['Sentence', 'Labels'])

# Display the DataFrame
val_df


Unnamed: 0,Sentence,Labels
0,im feeling quite sad and sorry for myself but ...,sadness
1,i feel like i am still looking at a blank canv...,sadness
2,i feel like a faithful servant,love
3,i am just feeling cranky and blue,anger
4,i can have for a treat or if i am feeling festive,joy
...,...,...
1995,im having ssa examination tomorrow in the morn...,sadness
1996,i constantly worry about their fight against n...,joy
1997,i feel its important to share this info for th...,joy
1998,i truly feel that if you are passionate enough...,joy


In [59]:
val_df.describe()

Unnamed: 0,Sentence,Labels
count,2000,2000
unique,1998,6
top,i feel so tortured by it,joy
freq,2,704


In [60]:
val_df.isnull().sum()

Sentence    0
Labels      0
dtype: int64

In [61]:
val_df.dtypes

Sentence    object
Labels      object
dtype: object

In [62]:
val_df['Labels'].unique()

array(['sadness', 'love', 'anger', 'joy', 'fear', 'surprise'],
      dtype=object)

In [63]:
val_df['Labels_Encoded'] = label_encoder.transform(val_df['Labels'])
val_df

Unnamed: 0,Sentence,Labels,Labels_Encoded
0,im feeling quite sad and sorry for myself but ...,sadness,4
1,i feel like i am still looking at a blank canv...,sadness,4
2,i feel like a faithful servant,love,3
3,i am just feeling cranky and blue,anger,0
4,i can have for a treat or if i am feeling festive,joy,2
...,...,...,...
1995,im having ssa examination tomorrow in the morn...,sadness,4
1996,i constantly worry about their fight against n...,joy,2
1997,i feel its important to share this info for th...,joy,2
1998,i truly feel that if you are passionate enough...,joy,2


In [64]:
# Specify the file path where you want to save the CSV file
file_path = '../data/processed/val_data.csv'

# Save the DataFrame to a CSV file
val_df.to_csv(file_path, index=False)

# Test Dataset

In [65]:
# Read the text file
with open('../data/raw/test.txt', 'r') as file:
    lines = file.readlines()

# Split each line by semicolon
data = [line.strip().split(';') for line in lines]

# Create DataFrame
test_df = pd.DataFrame(data, columns=['Sentence', 'Labels'])

# Display the DataFrame
test_df


Unnamed: 0,Sentence,Labels
0,im feeling rather rotten so im not very ambiti...,sadness
1,im updating my blog because i feel shitty,sadness
2,i never make her separate from me because i do...,sadness
3,i left with my bouquet of red and yellow tulip...,joy
4,i was feeling a little vain when i did this one,sadness
...,...,...
1995,i just keep feeling like someone is being unki...,anger
1996,im feeling a little cranky negative after this...,anger
1997,i feel that i am useful to my people and that ...,joy
1998,im feeling more comfortable with derby i feel ...,joy


In [66]:
test_df.describe()

Unnamed: 0,Sentence,Labels
count,2000,2000
unique,2000,6
top,im feeling rather rotten so im not very ambiti...,joy
freq,1,695


In [67]:
test_df.isnull().sum()

Sentence    0
Labels      0
dtype: int64

In [68]:
test_df.dtypes

Sentence    object
Labels      object
dtype: object

In [69]:
test_df['Labels'].unique()

array(['sadness', 'joy', 'fear', 'anger', 'love', 'surprise'],
      dtype=object)

In [70]:
test_df['Labels_Encoded'] = label_encoder.transform(test_df['Labels'])
test_df

Unnamed: 0,Sentence,Labels,Labels_Encoded
0,im feeling rather rotten so im not very ambiti...,sadness,4
1,im updating my blog because i feel shitty,sadness,4
2,i never make her separate from me because i do...,sadness,4
3,i left with my bouquet of red and yellow tulip...,joy,2
4,i was feeling a little vain when i did this one,sadness,4
...,...,...,...
1995,i just keep feeling like someone is being unki...,anger,0
1996,im feeling a little cranky negative after this...,anger,0
1997,i feel that i am useful to my people and that ...,joy,2
1998,im feeling more comfortable with derby i feel ...,joy,2


In [71]:
# Specify the file path where you want to save the CSV file
file_path = '../data/processed/test_data.csv'

# Save the DataFrame to a CSV file
test_df.to_csv(file_path, index=False)