In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score
import pickle

#### Import data

In [2]:
df = pd.read_csv('../data/csv/dataset.csv')

In [3]:
df


Unnamed: 0,img,asana,correctness
0,1,downdog,correct
1,2,downdog,wrong
2,3,downdog,correct
3,4,downdog,wrong
4,5,downdog,correct
...,...,...,...
375,376,cobra,correct
376,377,cobra,correct
377,378,cobra,correct
378,379,cobra,correct


### Downdog model

In [4]:
downdog_df = df[df['asana'] == 'downdog'].drop(columns = 'asana')

In [5]:
downdog_df

Unnamed: 0,img,correctness
0,1,correct
1,2,wrong
2,3,correct
3,4,wrong
4,5,correct
...,...,...
199,200,correct
200,201,wrong
201,202,correct
202,203,wrong


In [6]:
# Define a function to load data from csv files
def load_data(file_path):
    data = pd.read_csv(f'../data/csv/{file_path}.csv').drop(columns=['visibility', 'presence'])
    return data.values.flatten()

In [7]:
# Load and preprocess the data
X = np.array([load_data(file) for file in downdog_df['img']])
y = downdog_df['correctness']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#### Define and train the model

In [8]:
# Create a pipeline for preprocessing and model building
downdog_model = Pipeline([
    ('scaler', StandardScaler()),
    ('clf', RandomForestClassifier(n_estimators=100, criterion='entropy', random_state=42))
])

# Train the model
downdog_model.fit(X_train, y_train)

# Make predictions
y_pred = downdog_model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.9024390243902439


#### Test on a single file

In [9]:
U = pd.read_csv(f'../data/csv/19.csv').drop(columns=['visibility', 'presence']).values.flatten()
downdog_model.predict([U])

array(['correct'], dtype=object)

#### Save downdog model

In [10]:
with open('../models/downdog_model.pkl', 'wb') as file:
    pickle.dump(downdog_model, file)

### Updog model

In [11]:
updog_df = df[df['asana'] == 'updog'].drop(columns = 'asana')

In [12]:
X = np.array([load_data(file) for file in updog_df['img']])
y = updog_df['correctness']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

In [13]:
# Create a pipeline for preprocessing and model building
updog_model = Pipeline([
    ('scaler', StandardScaler()),
    ('clf', RandomForestClassifier(n_estimators=100, criterion='gini', random_state=42))
])

# Train the model
updog_model.fit(X_train, y_train)

# Make predictions
y_pred = updog_model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.8


In [14]:
U = pd.read_csv(f'../data/csv/258.csv').drop(columns=['visibility', 'presence']).values.flatten()
updog_model.predict([U])

array(['wrong'], dtype=object)

In [15]:
with open('../models/updog_model.pkl', 'wb') as file:
    pickle.dump(updog_model, file)

### Cobra model

In [16]:
cobra_df = df[df['asana'] == 'cobra'].drop(columns = 'asana')

In [17]:
X = np.array([load_data(file) for file in cobra_df['img']])
y = cobra_df['correctness']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

In [18]:
# Create a pipeline for preprocessing and model building
cobra_model = Pipeline([
    ('scaler', StandardScaler()),
    ('clf', RandomForestClassifier(n_estimators=100, criterion='gini', random_state=42))
])

# Train the model
cobra_model.fit(X_train, y_train)

# Make predictions
y_pred = cobra_model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.7777777777777778


In [19]:
with open('../models/cobra_model.pkl', 'wb') as file:
    pickle.dump(cobra_model, file)

### General asana model

In [20]:
X = np.array([load_data(file) for file in df['img']])
y = df['asana']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

In [21]:
# Create a pipeline for preprocessing and model building
asana_model = Pipeline([
    ('scaler', StandardScaler()),
    ('clf', RandomForestClassifier(n_estimators=100, criterion='gini', random_state=42))
])

# Train the model
asana_model.fit(X_train, y_train)

# Make predictions
y_pred = asana_model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.868421052631579


In [22]:
with open('../models/asana_model.pkl', 'wb') as file:
    pickle.dump(asana_model, file)