In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score
import pickle

#### Import data

In [2]:
df = pd.read_csv('../data/csv/dataset.csv')

In [3]:
df


Unnamed: 0,img,asana,correctness
0,1,downdog,correct
1,2,downdog,wrong
2,3,downdog,correct
3,4,downdog,wrong
4,5,downdog,correct
...,...,...,...
199,200,downdog,correct
200,201,downdog,wrong
201,202,downdog,correct
202,203,downdog,wrong


In [4]:
downdog_df = df[df['asana'] == 'downdog'].drop(columns = 'asana')

In [5]:
downdog_df

Unnamed: 0,img,correctness
0,1,correct
1,2,wrong
2,3,correct
3,4,wrong
4,5,correct
...,...,...
199,200,correct
200,201,wrong
201,202,correct
202,203,wrong


In [7]:
# Define a function to load data from csv files
def load_data(file_path):
    data = pd.read_csv(f'../data/csv/{file_path}.csv').drop(columns=['visibility', 'presence'])
    return data.values.flatten()

# Load and preprocess the data
X = np.array([load_data(file) for file in downdog_df['img']])
y = np.where(downdog_df['correctness'] == 'correct', 1, 0)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#### Define and train the model

In [8]:
# Create a pipeline for preprocessing and model building
downdog_model = Pipeline([
    ('scaler', StandardScaler()),
    ('clf', RandomForestClassifier(n_estimators=100, criterion='entropy', random_state=42))
])

# Train the model
downdog_model.fit(X_train, y_train)

# Make predictions
y_pred = downdog_model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.9024390243902439


#### Test on a single file

In [11]:
U = pd.read_csv(f'../data/csv/1.csv').drop(columns=['visibility', 'presence']).values.flatten()
downdog_model.predict([U])

array([1])

In [12]:
with open('../models/downdog_model.pkl', 'wb') as file:
    pickle.dump(downdog_model, file)