In [1]:
import os
import pandas as pd
import ast

In [2]:
# Colab warns and provides remediation steps if the GPUs is not compatible with RAPIDS.

!git clone https://github.com/rapidsai/rapidsai-csp-utils.git
!python rapidsai-csp-utils/colab/pip-install.py

Cloning into 'rapidsai-csp-utils'...
remote: Enumerating objects: 553, done.[K
remote: Counting objects: 100% (284/284), done.[K
remote: Compressing objects: 100% (182/182), done.[K
remote: Total 553 (delta 179), reused 147 (delta 100), pack-reused 269 (from 1)[K
Receiving objects: 100% (553/553), 178.44 KiB | 2.38 MiB/s, done.
Resolving deltas: 100% (281/281), done.
Collecting pynvml
  Downloading pynvml-12.0.0-py3-none-any.whl.metadata (5.4 kB)
Collecting nvidia-ml-py<13.0.0a0,>=12.0.0 (from pynvml)
  Downloading nvidia_ml_py-12.560.30-py3-none-any.whl.metadata (8.6 kB)
Downloading pynvml-12.0.0-py3-none-any.whl (26 kB)
Downloading nvidia_ml_py-12.560.30-py3-none-any.whl (40 kB)
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 40.5/40.5 kB 1.9 MB/s eta 0:00:00
Installing collected packages: nvidia-ml-py, pynvml
Successfully installed nvidia-ml-py-12.560.30 pynvml-12.0.0
Installing RAPIDS remaining 24.10.* libraries
Looking in indexes: https://pypi.org/simple, https://pypi.nvidia.com
C

In [3]:
def file_load(filepath):

    filename = os.path.basename(filepath)

    if 'features' in filename:
        return pd.read_csv(filepath, index_col=0, header=[0, 1, 2])

    if 'echonest' in filename:
        return pd.read_csv(filepath, index_col=0, header=[0, 1, 2])

    if 'genres' in filename:
        return pd.read_csv(filepath, index_col=0)

    if 'tracks' in filename:
        tracks = pd.read_csv(filepath, index_col=0, header=[0, 1])

        COLUMNS = [('track', 'tags'), ('album', 'tags'), ('artist', 'tags'),
                   ('track', 'genres'), ('track', 'genres_all')]
        for column in COLUMNS:
            tracks[column] = tracks[column].map(ast.literal_eval)

        COLUMNS = [('track', 'date_created'), ('track', 'date_recorded'),
                   ('album', 'date_created'), ('album', 'date_released'),
                   ('artist', 'date_created'), ('artist', 'active_year_begin'),
                   ('artist', 'active_year_end')]
        for column in COLUMNS:
            tracks[column] = pd.to_datetime(tracks[column])

        SUBSETS = ('small', 'medium', 'large')
        try:
            tracks['set', 'subset'] = tracks['set', 'subset'].astype(
                    'category', categories=SUBSETS, ordered=True)
        except (ValueError, TypeError):
            # the categories and ordered arguments were removed in pandas 0.25
            tracks['set', 'subset'] = tracks['set', 'subset'].astype(
                     pd.CategoricalDtype(categories=SUBSETS, ordered=True))

        COLUMNS = [('track', 'genre_top'), ('track', 'license'),
                   ('album', 'type'), ('album', 'information'),
                   ('artist', 'bio')]
        for column in COLUMNS:
            tracks[column] = tracks[column].astype('category')

        return tracks

In [4]:
tracks = file_load('fma_metadata/tracks.csv')
genres = file_load('fma_metadata/genres.csv')
features = file_load('fma_metadata/features.csv')


In [5]:
tracks = tracks.sample(n=30000, random_state=42)

In [6]:
track_ids = tracks.index.unique()
features = features.loc[track_ids]


In [7]:
best_statistics = {
    'mfcc': ['mean', 'std'],
    'chroma_stft': ['mean', 'max'],
    'chroma_cens': ['mean', 'max'],
    'chroma_cqt': ['mean'],
    'spectral_centroid': ['mean', 'std'],
    'spectral_bandwidth': ['mean', 'max'],
    'spectral_rolloff': ['mean'],
    'spectral_contrast': ['mean'],
    'rms': ['mean'],
    'zcr': ['mean', 'std'],
    'tonnetz': ['mean']
}
selected_columns = []
for feature, stats in best_statistics.items():
    for stat in stats:
        # Filter multi-index tuples that match the feature and stat
        matched_cols = [col for col in features.columns if col[0] == feature and col[1] == stat]
        selected_columns.extend(matched_cols)
features = features[selected_columns]


In [8]:
features = features.loc[:, features.columns.get_level_values('number').isin(['01'])]
features.head()

feature,mfcc,mfcc,chroma_stft,chroma_stft,chroma_cens,chroma_cens,chroma_cqt,spectral_centroid,spectral_centroid,spectral_bandwidth,spectral_bandwidth,spectral_rolloff,spectral_contrast,zcr,zcr,tonnetz
statistics,mean,std,mean,max,mean,max,mean,mean,std,mean,max,mean,mean,mean,std,mean
number,01,01,01,01,01,01,01,01,01,01,01,01,01,01,01,01
track_id,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3
43742,-271.812408,96.714348,0.272961,1.0,0.188667,0.643727,0.405262,832.44165,775.403687,1119.730835,3721.518311,1557.484375,16.684275,0.038113,0.051893,0.017989
31941,-94.052002,55.83728,0.483759,1.0,0.379672,0.591843,0.752012,1015.901001,335.10199,1520.825195,2767.865478,2138.62207,17.800627,0.027171,0.01955,-0.020757
50985,-452.438446,73.048615,0.33806,1.0,0.331159,0.702288,0.568731,372.16449,288.125244,662.792969,3634.620361,531.72876,32.151752,0.020377,0.009784,-0.008503
146981,-478.325043,82.608185,0.220894,1.0,0.209496,0.71875,0.333669,1597.073731,631.219116,1524.833252,3796.411377,2663.250488,29.62298,0.09311,0.043997,-0.00513
20461,-121.869453,98.762352,0.439927,1.0,0.197785,0.680126,0.415923,1659.429565,844.517212,2041.020386,3238.631836,3781.497314,18.266766,0.050857,0.040272,-0.000254


In [9]:
flattened_features_columns = [('feature', '_'.join(filter(None, col))) for col in features.columns]
features.columns = pd.MultiIndex.from_tuples(flattened_features_columns)
df = pd.merge(features, tracks, left_index=True, right_index=True)

In [31]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
import cuml
from cuml.svm import LinearSVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
import tensorflow as tf

def get_model(model_name, **kwargs):
    """
    Returns a machine learning or deep learning model based on the given model name.

    Parameters:
    - model_name (str): Name of the model (case insensitive). Valid options are:
      - 'logistic regression'
      - 'decision tree'
      - 'random forest'
      - 'k-nearest neighbors'
      - 'support vector machine'
      - 'naive bayes'
      - 'neural network'
      - 'convolutional neural network'
    - kwargs: Additional parameters to pass to the model constructor.

    Returns:
    - A machine learning or deep learning model.
    """
    model_name = model_name.lower()

    if model_name == 'logistic regression':
        return LogisticRegression(**kwargs)
    elif model_name == 'decision tree':
        return DecisionTreeClassifier(**kwargs)
    elif model_name == 'random forest':
        return RandomForestClassifier(**kwargs)
    elif model_name == 'k-nearest neighbors':
        return KNeighborsClassifier(**kwargs)
    elif model_name == 'support vector machine':
        return LinearSVC(**kwargs)
    elif model_name == 'naive bayes':
        return GaussianNB(**kwargs)
    elif model_name == 'neural network':
        return MLPClassifier(**kwargs)
    else:
        raise ValueError(f"Invalid model name '{model_name}'. Please choose from: "
                         "'logistic regression', 'decision tree', 'random forest', 'k-nearest neighbors', "
                         "'support vector machine', 'naive bayes', 'neural network', or 'convolutional neural network'.")

In [100]:
X = df.xs('feature', axis=1, level=0)
y = df[('track', 'genres')].apply(lambda x: x[0] if isinstance(x, list) and len(x) > 0 else None)

In [101]:
y.isnull().sum()

610

In [102]:
combined_df = X.copy()
combined_df['target'] = y
cleaned_df = combined_df.dropna()
X = cleaned_df.drop(columns=['target'])
y = cleaned_df['target']

In [103]:
y.isnull().sum()

0

In [104]:
y = y.astype(int).astype('category').cat.codes

In [105]:
counts = y.value_counts()

valid_classes = counts[counts >= 10].index
top_20_classes = counts.index[:20]

# Filter X and y to include only these valid classes
X = X[y.isin(top_20_classes)]
y = y[y.isin(top_20_classes)]

In [106]:
y

Unnamed: 0_level_0,0
track_id,Unnamed: 1_level_1
43742,2
31941,44
50985,3
146981,0
20461,20
...,...
85240,11
5194,9
51763,9
42828,9


## Genres prediction

In [107]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42, stratify=y_train)

In [78]:
X_train.dtypes

Unnamed: 0,0
mfcc_mean_01,float64
mfcc_std_01,float64
chroma_stft_mean_01,float64
chroma_stft_max_01,float64
chroma_cens_mean_01,float64
chroma_cens_max_01,float64
chroma_cqt_mean_01,float64
spectral_centroid_mean_01,float64
spectral_centroid_std_01,float64
spectral_bandwidth_mean_01,float64


In [108]:


# Check that X is a Pandas DataFrame
print(type(X))  # Should be <class 'pandas.core.frame.DataFrame'>

X_cudf = cudf.from_pandas(X)
print(type(X_cudf))  # Should be <class 'cudf.core.dataframe.DataFrame'>

<class 'pandas.core.frame.DataFrame'>
<class 'cudf.core.dataframe.DataFrame'>


In [109]:
from IPython.display import display
from sklearn.metrics import classification_report
models = {
    'logistic_regression': get_model('logistic regression', max_iter=1000),
    'decision_tree': get_model('decision tree', max_depth=10),
    'random_forest': get_model('random forest', n_estimators=100),
    'naive_bayes': get_model('naive bayes'),
    'k_nearest_neighbors': get_model('k-nearest neighbors', n_neighbors=5),
    'support_vector_machine': get_model('support vector machine'),
    'neural_network': get_model('neural network', hidden_layer_sizes=(200, 100)),
}

accuracies = {}

for model_name, model in models.items():
    if model_name == 'support_vector_machine':
    # Train the model
      model.fit(X_train.values, y_train)
    else:
      model.fit(X_train, y_train)

    # Predict on the test set
    y_pred = model.predict(X_test)

    # Generate classification report
    report = classification_report(y_test, y_pred, output_dict=True)

    print(f'\n\nClassification Report for {model_name}:')
    display(report)

    acc = model.score(X_test, y_test)
    accuracies[model_name] = acc

# Convert accuracies dict to DataFrame
accuracy_df = pd.DataFrame.from_dict(accuracies, orient='index', columns=['accuracy'])

print("\n\nAccuracy Table:")
display(accuracy_df)



Classification Report for logistic_regression:


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


{'0': {'precision': 0.21927710843373494,
  'recall': 0.19077568134171907,
  'f1-score': 0.2040358744394619,
  'support': 477.0},
 '1': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 99.0},
 '2': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 92.0},
 '3': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 102.0},
 '4': {'precision': 0.21875,
  'recall': 0.14432989690721648,
  'f1-score': 0.17391304347826086,
  'support': 97.0},
 '9': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 330.0},
 '11': {'precision': 0.21052631578947367,
  'recall': 0.23404255319148937,
  'f1-score': 0.2216624685138539,
  'support': 376.0},
 '14': {'precision': 0.289426523297491,
  'recall': 0.8485113835376532,
  'f1-score': 0.4316258351893096,
  'support': 1142.0},
 '16': {'precision': 0.06944444444444445,
  'recall': 0.02092050209205021,
  'f1-score': 0.03215434083601286,
  'support': 239.0},
 '17': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'supp



Classification Report for decision_tree:


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


{'0': {'precision': 0.2127659574468085,
  'recall': 0.25157232704402516,
  'f1-score': 0.23054755043227665,
  'support': 477.0},
 '1': {'precision': 0.13513513513513514,
  'recall': 0.050505050505050504,
  'f1-score': 0.07352941176470588,
  'support': 99.0},
 '2': {'precision': 0.16666666666666666,
  'recall': 0.03260869565217391,
  'f1-score': 0.05454545454545454,
  'support': 92.0},
 '3': {'precision': 0.04,
  'recall': 0.00980392156862745,
  'f1-score': 0.015748031496062992,
  'support': 102.0},
 '4': {'precision': 0.3142857142857143,
  'recall': 0.3402061855670103,
  'f1-score': 0.32673267326732675,
  'support': 97.0},
 '9': {'precision': 0.16304347826086957,
  'recall': 0.09090909090909091,
  'f1-score': 0.11673151750972763,
  'support': 330.0},
 '11': {'precision': 0.19642857142857142,
  'recall': 0.32180851063829785,
  'f1-score': 0.2439516129032258,
  'support': 376.0},
 '14': {'precision': 0.335387323943662,
  'recall': 0.6672504378283712,
  'f1-score': 0.44639718804920914,
  



Classification Report for random_forest:


{'0': {'precision': 0.26881720430107525,
  'recall': 0.2620545073375262,
  'f1-score': 0.2653927813163482,
  'support': 477.0},
 '1': {'precision': 0.36,
  'recall': 0.09090909090909091,
  'f1-score': 0.14516129032258066,
  'support': 99.0},
 '2': {'precision': 0.4166666666666667,
  'recall': 0.05434782608695652,
  'f1-score': 0.09615384615384616,
  'support': 92.0},
 '3': {'precision': 0.5,
  'recall': 0.0196078431372549,
  'f1-score': 0.03773584905660377,
  'support': 102.0},
 '4': {'precision': 0.4074074074074074,
  'recall': 0.3402061855670103,
  'f1-score': 0.3707865168539326,
  'support': 97.0},
 '9': {'precision': 0.2261904761904762,
  'recall': 0.11515151515151516,
  'f1-score': 0.15261044176706828,
  'support': 330.0},
 '11': {'precision': 0.2544731610337972,
  'recall': 0.3404255319148936,
  'f1-score': 0.2912400455062571,
  'support': 376.0},
 '14': {'precision': 0.3393258426966292,
  'recall': 0.7933450087565674,
  'f1-score': 0.4753410283315845,
  'support': 1142.0},
 '16'



Classification Report for naive_bayes:


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


{'0': {'precision': 0.36153846153846153,
  'recall': 0.09853249475890985,
  'f1-score': 0.15485996705107083,
  'support': 477.0},
 '1': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 99.0},
 '2': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 92.0},
 '3': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 102.0},
 '4': {'precision': 0.12850467289719625,
  'recall': 0.5670103092783505,
  'f1-score': 0.20952380952380953,
  'support': 97.0},
 '9': {'precision': 0.08116883116883117,
  'recall': 0.07575757575757576,
  'f1-score': 0.07836990595611286,
  'support': 330.0},
 '11': {'precision': 0.1427061310782241,
  'recall': 0.35904255319148937,
  'f1-score': 0.2042360060514372,
  'support': 376.0},
 '14': {'precision': 0.39648033126293997,
  'recall': 0.3353765323992995,
  'f1-score': 0.3633776091081594,
  'support': 1142.0},
 '16': {'precision': 0.12134502923976608,
  'recall': 0.3472803347280335,
  'f1-score': 0.17984832069339113,
  'support': 239.0



Classification Report for k_nearest_neighbors:


{'0': {'precision': 0.16206482593037214,
  'recall': 0.2830188679245283,
  'f1-score': 0.20610687022900764,
  'support': 477.0},
 '1': {'precision': 0.055944055944055944,
  'recall': 0.08080808080808081,
  'f1-score': 0.06611570247933884,
  'support': 99.0},
 '2': {'precision': 0.08148148148148149,
  'recall': 0.11956521739130435,
  'f1-score': 0.09691629955947137,
  'support': 92.0},
 '3': {'precision': 0.046153846153846156,
  'recall': 0.058823529411764705,
  'f1-score': 0.05172413793103448,
  'support': 102.0},
 '4': {'precision': 0.22641509433962265,
  'recall': 0.24742268041237114,
  'f1-score': 0.23645320197044334,
  'support': 97.0},
 '9': {'precision': 0.10948905109489052,
  'recall': 0.13636363636363635,
  'f1-score': 0.1214574898785425,
  'support': 330.0},
 '11': {'precision': 0.18471337579617833,
  'recall': 0.23138297872340424,
  'f1-score': 0.20543093270365997,
  'support': 376.0},
 '14': {'precision': 0.3387197016780609,
  'recall': 0.47723292469352013,
  'f1-score': 0.3



Classification Report for support_vector_machine:


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


{'0': {'precision': 0.18568232662192394,
  'recall': 0.1740041928721174,
  'f1-score': 0.17965367965367965,
  'support': 477.0},
 '1': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 99.0},
 '2': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 92.0},
 '3': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 102.0},
 '4': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 97.0},
 '9': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 330.0},
 '11': {'precision': 0.18110236220472442,
  'recall': 0.18351063829787234,
  'f1-score': 0.18229854689564068,
  'support': 376.0},
 '14': {'precision': 0.2782246879334258,
  'recall': 0.8782837127845884,
  'f1-score': 0.4225826838002949,
  'support': 1142.0},
 '16': {'precision': 0.1,
  'recall': 0.02510460251046025,
  'f1-score': 0.04013377926421405,
  'support': 239.0},
 '17': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 172.0},
 '20': {'precision': 0.0, 'recall': 0.0, '



Classification Report for neural_network:


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


{'0': {'precision': 0.07692307692307693,
  'recall': 0.0020964360587002098,
  'f1-score': 0.004081632653061225,
  'support': 477.0},
 '1': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 99.0},
 '2': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 92.0},
 '3': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 102.0},
 '4': {'precision': 0.19689119170984457,
  'recall': 0.3917525773195876,
  'f1-score': 0.2620689655172414,
  'support': 97.0},
 '9': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 330.0},
 '11': {'precision': 0.22401433691756273,
  'recall': 0.3324468085106383,
  'f1-score': 0.2676659528907923,
  'support': 376.0},
 '14': {'precision': 0.2844950213371266,
  'recall': 0.8756567425569177,
  'f1-score': 0.42946102641185313,
  'support': 1142.0},
 '16': {'precision': 0.09259259259259259,
  'recall': 0.10460251046025104,
  'f1-score': 0.09823182711198428,
  'support': 239.0},
 '17': {'precision': 0.1016949152542373,
  'reca



Accuracy Table:


Unnamed: 0,accuracy
logistic_regression,0.262598
decision_tree,0.258984
random_forest,0.31044
naive_bayes,0.200936
k_nearest_neighbors,0.211354
support_vector_machine,0.254731
neural_network,0.257495


In [110]:
X.shape

(23512, 16)

## Year of release Prediction

In [32]:
df[('album','date_released')]

Unnamed: 0_level_0,album
Unnamed: 0_level_1,date_released
track_id,Unnamed: 1_level_2
43742,NaT
31941,2009-02-12
50985,NaT
146981,2016-11-18
20461,NaT
...,...
85240,2013-06-01
5194,2004-09-13
51763,2011-07-29
42828,NaT


In [117]:
# Assuming 'date_released' is the column containing the datetime data
df[('album','year')] = df[('album', 'date_released')].dt.year

In [118]:
X = df.xs('feature', axis=1, level=0)
y = df[('album','year')]

In [119]:
combined_df = X.copy()
combined_df['target'] = y
cleaned_df = combined_df.dropna()
X = cleaned_df.drop(columns=['target'])
y = cleaned_df['target']

In [120]:
counts = y.value_counts()

valid_classes = counts[counts >= 10].index
top_20_classes = counts.index[:20]

# Filter X and y to include only these valid classes
X = X[y.isin(valid_classes)]
y = y[y.isin(valid_classes)]

In [121]:
y.value_counts()

Unnamed: 0_level_0,count
target,Unnamed: 1_level_1
2013.0,2219
2012.0,2194
2011.0,2167
2010.0,1924
2014.0,1795
2015.0,1784
2009.0,1738
2016.0,1482
2008.0,1181
2007.0,691


In [122]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42, stratify=y_train)

In [123]:
from sklearn.metrics import classification_report
models = {
    'logistic_regression': get_model('logistic regression', max_iter=1000),
    'decision_tree': get_model('decision tree', max_depth=10),
    'random_forest': get_model('random forest', n_estimators=100),
    'naive_bayes': get_model('naive bayes'),
    'k_nearest_neighbors': get_model('k-nearest neighbors', n_neighbors=5),
    'support_vector_machine': get_model('support vector machine'),
    'neural_network': get_model('neural network', hidden_layer_sizes=(200, 100)),
}

accuracies = {}

for model_name, model in models.items():
    if model_name == 'support_vector_machine':
    # Train the model
      model.fit(X_train.values, y_train)
    else:
      model.fit(X_train, y_train)

    # Predict on the test set
    y_pred = model.predict(X_test)

    # Generate classification report
    report = classification_report(y_test, y_pred, output_dict=True)

    print(f'\n\nClassification Report for {model_name}:')
    display(report)

    acc = model.score(X_test, y_test)
    accuracies[model_name] = acc

# Convert accuracies dict to DataFrame
accuracy_df = pd.DataFrame.from_dict(accuracies, orient='index', columns=['accuracy'])

print("\n\nAccuracy Table:")
display(accuracy_df)



Classification Report for logistic_regression:


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


{'1982.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 4.0},
 '1993.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 7.0},
 '1995.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 9.0},
 '1996.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 10.0},
 '1997.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 10.0},
 '1998.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 15.0},
 '1999.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 22.0},
 '2000.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 13.0},
 '2001.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 19.0},
 '2002.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 24.0},
 '2003.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 50.0},
 '2004.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 73.0},
 '2005.0': {'precision': 0.0, 'recall': 0.0



Classification Report for decision_tree:


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


{'1982.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 4.0},
 '1993.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 7.0},
 '1995.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 9.0},
 '1996.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 10.0},
 '1997.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 10.0},
 '1998.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 15.0},
 '1999.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 22.0},
 '2000.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 13.0},
 '2001.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 19.0},
 '2002.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 24.0},
 '2003.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 50.0},
 '2004.0': {'precision': 0.125,
  'recall': 0.0136986301369863,
  'f1-score': 0.024691358024691357,
  'support': 73.0},
 '2



Classification Report for random_forest:


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


{'1982.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 4.0},
 '1993.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 7.0},
 '1995.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 9.0},
 '1996.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 10.0},
 '1997.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 10.0},
 '1998.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 15.0},
 '1999.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 22.0},
 '2000.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 13.0},
 '2001.0': {'precision': 1.0,
  'recall': 0.05263157894736842,
  'f1-score': 0.1,
  'support': 19.0},
 '2002.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 24.0},
 '2003.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 50.0},
 '2004.0': {'precision': 0.5,
  'recall': 0.0273972602739726,
  'f1-score': 0.05194805194805195,
  's



Classification Report for naive_bayes:


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


{'1982.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 4.0},
 '1993.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 7.0},
 '1995.0': {'precision': 0.04081632653061224,
  'recall': 0.2222222222222222,
  'f1-score': 0.06896551724137931,
  'support': 9.0},
 '1996.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 10.0},
 '1997.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 10.0},
 '1998.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 15.0},
 '1999.0': {'precision': 0.02702702702702703,
  'recall': 0.045454545454545456,
  'f1-score': 0.03389830508474576,
  'support': 22.0},
 '2000.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 13.0},
 '2001.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 19.0},
 '2002.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 24.0},
 '2003.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 50.0},
 '2004.0': {'pr



Classification Report for k_nearest_neighbors:


{'1982.0': {'precision': 0.15384615384615385,
  'recall': 0.5,
  'f1-score': 0.23529411764705882,
  'support': 4.0},
 '1993.0': {'precision': 0.08333333333333333,
  'recall': 0.14285714285714285,
  'f1-score': 0.10526315789473684,
  'support': 7.0},
 '1995.0': {'precision': 0.03333333333333333,
  'recall': 0.1111111111111111,
  'f1-score': 0.05128205128205128,
  'support': 9.0},
 '1996.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 10.0},
 '1997.0': {'precision': 0.047619047619047616,
  'recall': 0.1,
  'f1-score': 0.06451612903225806,
  'support': 10.0},
 '1998.0': {'precision': 0.027777777777777776,
  'recall': 0.06666666666666667,
  'f1-score': 0.0392156862745098,
  'support': 15.0},
 '1999.0': {'precision': 0.05128205128205128,
  'recall': 0.09090909090909091,
  'f1-score': 0.06557377049180328,
  'support': 22.0},
 '2000.0': {'precision': 0.041666666666666664,
  'recall': 0.07692307692307693,
  'f1-score': 0.05405405405405406,
  'support': 13.0},
 '2001.0': {'pre



Classification Report for support_vector_machine:


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


{'1982.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 4.0},
 '1993.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 7.0},
 '1995.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 9.0},
 '1996.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 10.0},
 '1997.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 10.0},
 '1998.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 15.0},
 '1999.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 22.0},
 '2000.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 13.0},
 '2001.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 19.0},
 '2002.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 24.0},
 '2003.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 50.0},
 '2004.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 73.0},
 '2005.0': {'precision': 0.0, 'recall': 0.0



Classification Report for neural_network:


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


{'1982.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 4.0},
 '1993.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 7.0},
 '1995.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 9.0},
 '1996.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 10.0},
 '1997.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 10.0},
 '1998.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 15.0},
 '1999.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 22.0},
 '2000.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 13.0},
 '2001.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 19.0},
 '2002.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 24.0},
 '2003.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 50.0},
 '2004.0': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 73.0},
 '2005.0': {'precision': 0.0, 'recall': 0.0



Accuracy Table:


Unnamed: 0,accuracy
logistic_regression,0.113188
decision_tree,0.121021
random_forest,0.144265
naive_bayes,0.117736
k_nearest_neighbors,0.099545
support_vector_machine,0.12001
neural_network,0.111925
