In [None]:
from sklearn.datasets import make_circles
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
import numpy as np
import matplotlib.pyplot as plt

# Step 1: Generate a toy dataset
X, y = make_circles(n_samples=100, factor=0.5, noise=0.1, random_state=42)

# Step 2: Train the Model With Activation Function (Sigmoid)
clf_with_activation = MLPClassifier(activation='logistic', max_iter=1000, hidden_layer_sizes=(10,), random_state=42)
clf_with_activation.fit(X, y)

# Step 3: Train the Model Without Activation Function (Linear)
clf_without_activation = MLPClassifier(activation='identity', max_iter=1000, hidden_layer_sizes=(10,), random_state=42)
clf_without_activation.fit(X, y)

# Step 4: Visualize the Decision Boundary
def plot_decision_boundary(clf, X, y, ax, title):
    # Plotting ranges
    x_min, x_max = X[:, 0].min() - 0.1, X[:, 0].max() + 0.1
    y_min, y_max = X[:, 1].min() - 0.1, X[:, 1].max() + 0.1
    xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100), np.linspace(y_min, y_max, 100))
    
    # Predictions to obtain the decision boundary
    Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]
    Z = Z.reshape(xx.shape)
    
    # Plotting the decision boundary
    ax.contourf(xx, yy, Z, alpha=0.8, levels=np.linspace(0, 1, 3))
    ax.scatter(X[:, 0], X[:, 1], c=y, s=20, edgecolor='k')
    ax.set_title(title)

# Create subplots
fig, ax = plt.subplots(1, 2, figsize=(12, 6))

# Plot decision boundary with and without activation function
plot_decision_boundary(clf_with_activation, X, y, ax[0], "With Sigmoid Activation Function")
plot_decision_boundary(clf_without_activation, X, y, ax[1], "Without Activation Function (Linear)")

plt.tight_layout()
plt.show()


In [None]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score, classification_report

# Example statements for training
train_statements = [
    "I love sunny days",
    "What a wonderful world",
    "I hate being stuck in traffic",
    "This is a terrible situation",
    "I am very happy with the service",
    "The food was bad",
    "I am delighted to be part of the team",
    "This is the worst movie I have ever seen",
    "I am not satisfied with the product",
    "He is my best friend"
]

# Corresponding labels, 1 for positive and 0 for negative
train_labels = [1, 1, 0, 0, 1, 0, 1, 0, 0, 1]

# Splitting the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(train_statements, train_labels, test_size=0.2, random_state=42)

# Creating a pipeline that first creates bag of words (after applying tokenization and stopwords removal) and then applies Naive Bayes classifier
model = make_pipeline(CountVectorizer(), MultinomialNB())

# Training the model with the training data
model.fit(X_train, y_train)

# Making predictions with the test data
y_pred = model.predict(X_test)

# Evaluating the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

accuracy, report


In [None]:
import torch
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
from collections import Counter
from sklearn.model_selection import train_test_split
from torch.nn.utils.rnn import pad_sequence

# Custom Dataset class for handling text
class TextDataset(Dataset):
    def __init__(self, texts, labels):
        self.texts = texts
        self.labels = labels
        self.vocab = self.build_vocab(self.texts)

    @staticmethod
    def build_vocab(texts):
        tokens = [word for text in texts for word in text.split()]
        vocab = {word: i+1 for i, word in enumerate(set(tokens))}  # +1 for padding index
        return vocab
    
    def encode_text(self, text):
        return [self.vocab[word] for word in text.split()]

    def __len__(self):
        return len(self.texts)
    
    def __getitem__(self, index):
        encoded_text = self.encode_text(self.texts[index])
        return torch.tensor(encoded_text, dtype=torch.long), torch.tensor(self.labels[index], dtype=torch.float32)

    @staticmethod
    def collate_fn(batch):
        texts, labels = zip(*batch)
        texts = pad_sequence(texts, batch_first=True, padding_value=0)  # Padding the sequences
        labels = torch.tensor(labels, dtype=torch.float32)
        return texts, labels

# Simple neural network model
class SimpleNN(nn.Module):
    def __init__(self, vocab_size):
        super(SimpleNN, self).__init__()
        self.embedding = nn.Embedding(vocab_size, 10)
        self.fc = nn.Linear(10, 1)
    
    def forward(self, x):
        x = self.embedding(x)
        x = torch.mean(x, dim=1)
        x = self.fc(x)
        x = torch.sigmoid(x)  # Activation function
        return x

# Sample data and labels
texts = ["I love sunny days", "I hate rain", "This is great", "I am sad", "What a beautiful view"]
labels = [1, 0, 1, 0, 1]  # 1 for positive, 0 for negative

# Train-test split
train_texts, test_texts, train_labels, test_labels = train_test_split(texts, labels, test_size=0.2, random_state=0)

# Creating datasets
train_data = TextDataset(train_texts, train_labels)
test_data = TextDataset(test_texts, test_labels)
vocab_size = len(train_data.vocab) + 1  # +1 for padding index

# Data loaders
train_loader = DataLoader(train_data, batch_size=2, shuffle=True, collate_fn=TextDataset.collate_fn)
test_loader = DataLoader(test_data, batch_size=2, collate_fn=TextDataset.collate_fn)

# Initialize the neural network
model = SimpleNN(vocab_size)
criterion = nn.BCELoss()  # Binary cross-entropy loss for binary classification
optimizer = Adam(model.parameters(), lr=0.001)  # Using Adam optimizer

# Training the model
for epoch in range(5):  # Loop over the dataset multiple times
    for i, (inputs, labels) in enumerate(train_loader, 0):
        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs).squeeze()
        loss = criterion(outputs, labels)

        # Backward pass and optimize
        loss.backward()
        optimizer.step()

# Testing the model
correct = 0
total = 0
with torch.no_grad():  # Inference mode, no gradients
    for inputs, labels in test_loader:
        outputs = model(inputs).squeeze()
        predicted = outputs.round()  # Threshold predictions to get binary classification
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = correct / total
print(f'Accuracy: {accuracy}')


In [None]:
def predict_sentiment(model, vocab, texts):
    # Tokenize and encode the new text
    tokens = [[vocab.get(word, 0) for word in text.split()] for text in texts]
    lengths = [len(token) for token in tokens]

    # Pad the sequences
    padded_tokens = torch.zeros(len(tokens), max(lengths)).long()
    for i, token in enumerate(tokens):
        padded_tokens[i, :lengths[i]] = torch.tensor(token)
    
    # Make predictions
    with torch.no_grad():
        outputs = model(padded_tokens).squeeze()
    
    # Debugging prints
    print("Encoded texts:", tokens)
    print("Model raw outputs:", outputs)
    
    predictions = outputs.round().numpy()  # Convert to numpy array
    
    # Convert predictions to text labels
    labels = ['Positive' if pred == 1 else 'Negative' for pred in predictions]
    return labels


# Example usage:
new_texts = ["This product is really good", "I'm not happy with this service"]
predictions = predict_sentiment(model, train_data.vocab, new_texts)
print(predictions)


In [19]:

# Assume you have downloaded the Sentiment140 dataset
# and it is in a CSV file called 'sentiment140.csv'
import pandas as pd

# Load the dataset
df = pd.read_csv('../../data-sets/sentiment140.csv', encoding='latin1', usecols=[0, 5], names=['sentiment', 'text'])

# Preprocess the tweets
# ... here you would add your preprocessing steps, like removing URLs, Twitter handles, etc.

# Encode the sentiments (0 for negative, 1 for positive)
df['sentiment'] = df['sentiment'].replace(4, 1)

# Split the dataset
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

# Proceed with creating your TextDataset instances and DataLoaders
train_data = TextDataset(train_df['text'].tolist(), train_df['sentiment'].tolist())
train_data = torch.utils.data.Subset(train_data, range(1000))
test_data = TextDataset(test_df['text'].tolist(), test_df['sentiment'].tolist())
train_data = torch.utils.data.Subset(train_data, range(1000, 2000))

# The rest of the code for training and evaluation follows...


In [27]:
for i in range(2):
    print(i)

2
3
4
