# Dataset Exploration

This notebook is used for exploring the synthetic dataset generated for the Rank-Distance Perceptron. It includes visualizations and analyses of the features.

In [None]:
import random
import math
import matplotlib.pyplot as plt
import pandas as pd

# Function to generate synthetic dataset
def generate_dataset(num_samples=1000, num_features=32):
    data = []
    labels = []
    for _ in range(num_samples):
        features = [random.uniform(0, 1) for _ in range(num_features)]
        label = 1 if sum(features) > num_features / 2 else 0
        data.append(features)
        labels.append(label)
    return pd.DataFrame(data, columns=[f'feature_{i}' for i in range(num_features)]), labels

# Generate the dataset
df, labels = generate_dataset()

# Display the first few rows of the dataset
df.head()

In [None]:
# Visualizing feature distributions
def plot_feature_distribution(df):
    df.hist(bins=30, figsize=(15, 10))
    plt.suptitle('Feature Distributions')
    plt.show()

plot_feature_distribution(df)

In [None]:
# Visualizing relationships between features
def plot_feature_relationships(df):
    plt.figure(figsize=(10, 6))
    plt.scatter(df['feature_0'], df['feature_1'], alpha=0.5)
    plt.title('Feature 0 vs Feature 1')
    plt.xlabel('Feature 0')
    plt.ylabel('Feature 1')
    plt.grid()
    plt.show()

plot_feature_relationships(df)