import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import accuracy_score

# Define GCN model
class GCNLayer(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(GCNLayer, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)

    def forward(self, adjacency_matrix, feature_matrix):
        aggregate = torch.matmul(adjacency_matrix, feature_matrix)
        output = self.linear(aggregate)
        return output

class GCN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(GCN, self).__init__()
        self.gcn1 = GCNLayer(input_dim, hidden_dim)
        self.gcn2 = GCNLayer(hidden_dim, output_dim)

    def forward(self, adjacency_matrix, feature_matrix):
        h1 = F.relu(self.gcn1(adjacency_matrix, feature_matrix))
        output = self.gcn2(adjacency_matrix, h1)
        return output

# Define training function
def train_model(model, X_train, y_train, epochs=100, learning_rate=0.01):
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.CrossEntropyLoss()

    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        output = model(torch.eye(X_train.shape[0]), X_train)
        loss = criterion(output, y_train)
        loss.backward()
        optimizer.step()
        if epoch % 10 == 0:
            print(f'Epoch {epoch + 1}/{epochs}, Loss: {loss.item()}')

# Define evaluation function
def evaluate_model(model, X_test, y_test):
    model.eval()
    with torch.no_grad():
        output = model(torch.eye(X_test.shape[0]), X_test)
        _, predicted = torch.max(output, 1)
        accuracy = accuracy_score(y_test, predicted)
        print(f'Test Accuracy: {accuracy}')

# Load the dataset
df = pd.read_csv("your_dataset.csv", error_bad_lines=False)

# Preprocess the data
X = df['text'].values
y = df['sentiment'].map({'negative': 0, 'neutral': 1, 'positive': 2}).values

# Convert data to PyTorch tensors
vectorizer = CountVectorizer(stop_words='english')
X = vectorizer.fit_transform(X).toarray()
y = torch.LongTensor(y)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Instantiate the model
model = GCN(input_dim=X_train.shape[1], hidden_dim=16, output_dim=3)

# Train the model
train_model(model, X_train, y_train)

# Evaluate the model
evaluate_model(model, X_test, y_test)

In [None]:
# !mkdir -p ~/.kaggle
# !cp kaggle.json ~/.kaggle/
# !chmod 600 ~/.kaggle/kaggle.json

In [None]:
# !kaggle datasets download -d kazanova/sentiment140

In [None]:
# from zipfile import ZipFile
# dataset = '/content/sentiment140.zip'

# with ZipFile(dataset, 'r') as zip:
#   zip.extractall()
#   print('extraction done')

In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import accuracy_score

In [None]:

import re
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
# from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression


In [None]:
import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [None]:
#loading the data from csv file to pandas dataframe
twitter_data = pd.read_csv('/content/Twitter_Data.csv', encoding= 'ISO-8859-1',nrows=9000)


In [None]:
twitter_data

Unnamed: 0,clean_text,category
0,when modi promised âminimum government maxim...,-1
1,talk all the nonsense and continue all the dra...,0
2,what did just say vote for modi welcome bjp t...,1
3,asking his supporters prefix chowkidar their n...,1
4,answer who among these the most powerful world...,1
...,...,...
8995,they same thing what modi done india two devil...,0
8996,account temporarily unavailable because violat...,1
8997,they are modi hater they will not understand l...,0
8998,india the largest democracy the world but with...,-1


In [None]:
column_name = {'clean_text': 'text', 'category': 'target'}
twitter_data.rename(columns=column_name, inplace=True)

In [None]:

twitter_data.head()

Unnamed: 0,text,target
0,when modi promised âminimum government maxim...,-1
1,talk all the nonsense and continue all the dra...,0
2,what did just say vote for modi welcome bjp t...,1
3,asking his supporters prefix chowkidar their n...,1
4,answer who among these the most powerful world...,1


In [None]:
twitter_data['target'].value_counts()

target
 1    3721
 0    3152
-1    2127
Name: count, dtype: int64

In [None]:
twitter_data.replace({'target':{-1:2}}, inplace=True)

In [None]:
twitter_data['target'].value_counts()



target
1    3721
0    3152
2    2127
Name: count, dtype: int64

In [None]:
# port_stem=PorterStemmer()

In [None]:
# def stemming(content):
#   stemmed_content=re.sub('[^a-zA-Z]',' ', content)
#   stemmed_content=stemmed_content.lower()
#   stemmed_content=stemmed_content.split()
#   stemmed_content=[port_stem.stem(word) for word in stemmed_content if not word in stopwords.words('english')]
#   stemmed_content=' '.join(stemmed_content)

#   return stemmed_content

In [None]:
# twitter_data['stemmed_content']= twitter_data['text'].apply(stemming)

In [None]:
twitter_data.head()

Unnamed: 0,text,target
0,when modi promised âminimum government maxim...,2
1,talk all the nonsense and continue all the dra...,0
2,what did just say vote for modi welcome bjp t...,1
3,asking his supporters prefix chowkidar their n...,1
4,answer who among these the most powerful world...,1


In [None]:
print(twitter_data['text'])

0       when modi promised âminimum government maxim...
1       talk all the nonsense and continue all the dra...
2       what did just say vote for modi  welcome bjp t...
3       asking his supporters prefix chowkidar their n...
4       answer who among these the most powerful world...
                              ...                        
8995    they same thing what modi done india two devil...
8996    account temporarily unavailable because violat...
8997    they are modi hater they will not understand l...
8998    india the largest democracy the world but with...
8999                   would you listen your predecessor 
Name: text, Length: 9000, dtype: object


In [None]:
twitter_data.dropna(subset=['text'], inplace=True)

# Initialize CountVectorizer
vectorizer = CountVectorizer()

# Fit the vectorizer on the text data to learn the vocabulary
vectorizer.fit(twitter_data['text'])

# Transform the text data into numerical features using the learned vocabulary
X = vectorizer.transform(twitter_data['text']).toarray()

In [None]:


Y = twitter_data['target'].values

In [None]:
missing_indices = np.isnan(Y)
X_cleaned = X[~missing_indices]
Y_cleaned = Y[~missing_indices]

In [None]:
print(X_cleaned)

[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]


In [None]:
print(Y_cleaned)

[2 0 1 ... 0 2 0]


In [None]:
# X_train, X_test, Y_train, Y_test = train_test_split(X_cleaned, Y_cleaned, test_size=0.2, stratify=Y , random_state=2)
X_train, X_test, Y_train, Y_test = train_test_split(X_cleaned, Y_cleaned, test_size=0.2, stratify=Y_cleaned, random_state=2)


In [None]:
X_train = torch.FloatTensor(X_train)
X_test = torch.FloatTensor(X_test)
Y_train = torch.LongTensor(Y_train)
Y_test = torch.LongTensor(Y_test)

In [None]:
Y_train.shape

torch.Size([7199])

In [None]:
Y_test.shape

torch.Size([1800])

In [None]:
print(Y_train)

tensor([0, 2, 0,  ..., 0, 2, 1])


In [None]:
input_dim = X_train.shape[1]
print("Input dimension:", input_dim)

Input dimension: 18570


In [None]:
class GCNLayer(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(GCNLayer, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)

    def forward(self, adjacency_matrix, feature_matrix):
        aggregate = torch.matmul(adjacency_matrix, feature_matrix)
        output = self.linear(aggregate)
        return output

class GCN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(GCN, self).__init__()
        self.gcn1 = GCNLayer(input_dim, hidden_dim)
        self.gcn2 = GCNLayer(hidden_dim, output_dim)

    def forward(self, adjacency_matrix, feature_matrix):
        h1 = F.relu(self.gcn1(adjacency_matrix, feature_matrix))
        output = self.gcn2(adjacency_matrix, h1)
        return output

In [None]:
def train_model(model, X_train, y_train, epochs=50, learning_rate=0.01):
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.CrossEntropyLoss()

    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        output = model(torch.eye(X_train.shape[0]), X_train)
        loss = criterion(output, y_train)
        loss.backward()
        optimizer.step()
        if epoch % 10 == 0:
            print(f'Epoch {epoch + 1}/{epochs}, Loss: {loss.item()}')

In [None]:
def evaluate_model(model, X_test, y_test):
    model.eval()
    with torch.no_grad():
        output = model(torch.eye(X_test.shape[0]), X_test)
        _, predicted = torch.max(output, 1)
        accuracy = accuracy_score(y_test, predicted)
        print(f'Test Accuracy: {accuracy}')

system crashed

In [None]:
# input_dime = min(X_train.shape[0], 1000)

In [None]:
model = GCN(input_dim=input_dim, hidden_dim=8, output_dim=3)

In [None]:
train_model(model, X_train, Y_train)

Epoch 1/50, Loss: 1.1263835430145264
Epoch 11/50, Loss: 0.8635871410369873
Epoch 21/50, Loss: 0.45557719469070435
Epoch 31/50, Loss: 0.19151316583156586
Epoch 41/50, Loss: 0.08422284573316574


In [None]:
evaluate_model(model, X_test, Y_test)

Test Accuracy: 0.6755555555555556


In [None]:
from sklearn.metrics import classification_report

def evaluate_model(model, X_test, y_test):
    model.eval()
    with torch.no_grad():
        output = model(torch.eye(X_test.shape[0]), X_test)
        _, predicted = torch.max(output, 1)
        accuracy = accuracy_score(y_test, predicted)
        print(f'Test Accuracy: {accuracy}')

        # Calculate confusion matrix
        conf_matrix = confusion_matrix(y_test, predicted)
        print("Confusion Matrix:")
        print(pd.DataFrame(conf_matrix))

        # Calculate precision, recall, and F1-score
        report = classification_report(y_test, predicted, output_dict=True)
        report_df = pd.DataFrame(report).transpose()

        # Print precision, recall, and F1-score in table form
        print("Precision, Recall, and F1-score:")
        print(report_df[['precision', 'recall', 'f1-score', 'support']])

# Call evaluate_model function
evaluate_model(model, X_test, Y_test)


Test Accuracy: 0.6755555555555556
Confusion Matrix:
     0    1    2
0  434  126   70
1  110  538   96
2   86   96  244
Precision, Recall, and F1-score:
              precision    recall  f1-score      support
0              0.688889  0.688889  0.688889   630.000000
1              0.707895  0.723118  0.715426   744.000000
2              0.595122  0.572770  0.583732   426.000000
accuracy       0.675556  0.675556  0.675556     0.675556
macro avg      0.663969  0.661592  0.662682  1800.000000
weighted avg   0.674553  0.675556  0.674970  1800.000000


In [None]:
from sklearn.metrics import classification_report

def evaluate_model(model, X_test, y_test):
    model.eval()
    with torch.no_grad():
        output = model(torch.eye(X_test.shape[0]), X_test)
        _, predicted = torch.max(output, 1)
        accuracy = accuracy_score(y_test, predicted)
        print(f'Test Accuracy: {accuracy}')

        # Calculate confusion matrix
        conf_matrix = confusion_matrix(y_test, predicted)
        print("Confusion Matrix:")
        print(pd.DataFrame(conf_matrix, index=['Actual 0', 'Actual 1', 'Actual 2'], columns=['Predicted 0', 'Predicted 1', 'Predicted 2']))

        # Calculate classification report
        report = classification_report(y_test, predicted, output_dict=True)
        report_df = pd.DataFrame(report).transpose()

        # Print precision, recall, F1-score, and support in table form
        print("Precision, Recall, F1-score, and Support:")
        print(report_df[['precision', 'recall', 'f1-score', 'support']].to_string())

# Call evaluate_model function
evaluate_model(model, X_test, Y_test)


Test Accuracy: 0.6755555555555556
Confusion Matrix:
          Predicted 0  Predicted 1  Predicted 2
Actual 0          434          126           70
Actual 1          110          538           96
Actual 2           86           96          244
Precision, Recall, F1-score, and Support:
              precision    recall  f1-score      support
0              0.688889  0.688889  0.688889   630.000000
1              0.707895  0.723118  0.715426   744.000000
2              0.595122  0.572770  0.583732   426.000000
accuracy       0.675556  0.675556  0.675556     0.675556
macro avg      0.663969  0.661592  0.662682  1800.000000
weighted avg   0.674553  0.675556  0.674970  1800.000000


new

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.metrics import accuracy_score

class ANN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(ANN, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

def train_model_ann(model, X_train, y_train, epochs=50, learning_rate=0.01):
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.CrossEntropyLoss()

    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        output = model(X_train)
        loss = criterion(output, y_train)
        loss.backward()
        optimizer.step()
        if epoch % 10 == 0:
            print(f'Epoch {epoch + 1}/{epochs}, Loss: {loss.item()}')

def evaluate_model_ann(model, X_test, y_test):
    model.eval()
    with torch.no_grad():
        output = model(X_test)
        _, predicted = torch.max(output, 1)
        accuracy = accuracy_score(y_test, predicted)
        print(f'Test Accuracy: {accuracy}')


In [None]:
model = ANN(input_dim=input_dim, hidden_dim=8, output_dim=3)

In [None]:
train_model_ann(model, X_train, Y_train)

Epoch 1/50, Loss: 1.133617877960205
Epoch 11/50, Loss: 0.8380170464515686
Epoch 21/50, Loss: 0.4329952597618103
Epoch 31/50, Loss: 0.17799267172813416
Epoch 41/50, Loss: 0.07713112980127335


In [None]:
evaluate_model_ann(model, X_test, Y_test)

Test Accuracy: 0.6888888888888889


In [None]:
from sklearn.metrics import confusion_matrix, classification_report

# Assuming you have already trained your ANN model and obtained predictions
# model = ...
# X_test = ...
# Y_test = ...

# Get predictions from the model
with torch.no_grad():
    model.eval()
    output = model(X_test)
    _, predicted = torch.max(output, 1)

# Calculate confusion matrix
conf_matrix = confusion_matrix(Y_test, predicted)
print("Confusion Matrix:")
print(conf_matrix)

# Calculate classification report
report = classification_report(Y_test, predicted)
print("Classification Report:")
print(report)


Confusion Matrix:
[[455 117  58]
 [103 554  87]
 [ 86 109 231]]
Classification Report:
              precision    recall  f1-score   support

           0       0.71      0.72      0.71       630
           1       0.71      0.74      0.73       744
           2       0.61      0.54      0.58       426

    accuracy                           0.69      1800
   macro avg       0.68      0.67      0.67      1800
weighted avg       0.69      0.69      0.69      1800



new cnn
