In [11]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import time

In [12]:
def generate_data(n, num_points=100):
    np.random.seed(42)
    X = np.random.rand(num_points, n)
    
    # Make the criteria for assigning y more non-linear
    y = ((np.sum(X[:, :n//2] >= 0.5, axis=1) > n//4) & (np.sum(X[:, :n//2] >= 0.7, axis=1) > n//6)).astype(int)
    
    return X, y

def save_test_data_to_file(filename, X_test, y_test):
    with open(filename, 'w') as file:
        for i in range(X_test.shape[0]):
            line = ' '.join(map(str, X_test[i])) + f' {y_test[i]}\n'
            file.write(line)

In [13]:
dimensions = [2, 4, 8, 16, 32]

for n in dimensions:
    X, y = generate_data(n)

    # Split the dataset into training and test sets (70% train, 30% test)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

    # Train a logistic regression classifier
    clf = LogisticRegression(random_state=42)
    clf.fit(X_train, y_train)

    # Test the classifier on the test set and measure prediction time
    start_time = time.time()
    y_pred = clf.predict(X_test)
    prediction_time = time.time() - start_time

    # Calculate accuracy
    test_accuracy = accuracy_score(y_test, y_pred)

    # Save test data to a file
    test_data_filename = f'test_data_{n}D.txt'
    save_test_data_to_file(test_data_filename, X_test, y_test)

    result_filename = f'results_{n}D.txt'
    with open(result_filename, 'w') as file:
        file.write(f'Test Accuracy: {test_accuracy:.4f}\n')
        file.write(f'Time to predict on test data: {prediction_time:.6f} seconds\n')

    print(f'Test data with {n} dimensions processed.')


Test data with 2 dimensions processed.
Test data with 4 dimensions processed.
Test data with 8 dimensions processed.
Test data with 16 dimensions processed.
Test data with 32 dimensions processed.
