In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, accuracy_score

# Load and preprocess data
csv_url = 'https://raw.githubusercontent.com/zeroday-zaddy/cs422-project/c067b005a7f9a90ae114357cfe7948ed828dc07a/data/leafly_strain_data.csv'
df = pd.read_csv(csv_url)

# Drop rows where 'most_common_terpene' is blank
df = df.dropna(subset=['most_common_terpene'])

# Drop additional rows where 'name' is blank
df = df.dropna(subset=['name'])

# Handling missing values
df[['happy', 'euphoric', 'uplifted', 'sleepy', 'dry_mouth', 'dry_eyes', 'dizzy', 'paranoid', 'anxious', 'stress', 'pain', 'depression', 'anxiety', 'insomnia']] = df[['happy', 'euphoric', 'uplifted', 'sleepy', 'dry_mouth', 'dry_eyes', 'dizzy', 'paranoid', 'anxious', 'stress', 'pain', 'depression', 'anxiety', 'insomnia']].fillna(0)

# Fill NaN values in 'most_common_terpene' with 0
df['most_common_terpene'] = df['most_common_terpene'].fillna(0)

# Convert the 'most_common_terpene' column to a binary representation using one-hot encoding
df = pd.get_dummies(df, columns=['most_common_terpene'], prefix='terpene')

# Keep only the 'most_common_terpene' and effects columns
selected_columns = [col for col in df.columns if 'terpene_' in col] + ['happy', 'euphoric', 'uplifted', 'sleepy', 'dry_mouth', 'dry_eyes', 'dizzy', 'paranoid', 'anxious', 'stress', 'pain', 'depression', 'anxiety', 'insomnia']
df = df[selected_columns]


# Extract features (X) and target variables (y)
X = df.drop(['happy', 'euphoric', 'uplifted', 'sleepy', 'dry_mouth', 'dry_eyes', 'dizzy', 'paranoid', 'anxious', 'stress', 'pain', 'depression', 'anxiety', 'insomnia'], axis=1)
y = df[['happy', 'euphoric', 'uplifted', 'sleepy', 'dry_mouth', 'dry_eyes', 'dizzy', 'paranoid', 'anxious', 'stress', 'pain', 'depression', 'anxiety', 'insomnia']]

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a machine learning model using DecisionTreeClassifier
model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Calculate accuracy for each effect
for i, effect in enumerate(y.columns):
    accuracy = accuracy_score(y_test[effect], y_pred[:, i])
    print(f"Accuracy for {effect}: {accuracy}")

# You can also print a classification report for each target variable
for i, effect in enumerate(y.columns):
    print(f"Effect: {effect}")
    print(classification_report(y_test[effect], y_pred[:, i]))
    print("=" * 50)



Accuracy for happy: 0.2530612244897959
Accuracy for euphoric: 0.34285714285714286
Accuracy for uplifted: 0.36122448979591837
Accuracy for sleepy: 0.726530612244898
Accuracy for dry_mouth: 0.3122448979591837
Accuracy for dry_eyes: 0.363265306122449
Accuracy for dizzy: 0.5020408163265306
Accuracy for paranoid: 0.6224489795918368
Accuracy for anxious: 0.6265306122448979
Accuracy for stress: 0.3224489795918367
Accuracy for pain: 0.37551020408163266
Accuracy for depression: 0.3122448979591837
Accuracy for anxiety: 0.4326530612244898
Accuracy for insomnia: 0.6530612244897959
Effect: happy
              precision    recall  f1-score   support

          0%       0.25      0.99      0.40       124
        100%       0.00      0.00      0.00        15
         11%       0.00      0.00      0.00         1
         16%       0.00      0.00      0.00         1
         20%       0.00      0.00      0.00         3
         21%       0.00      0.00      0.00         1
         22%       0.00      0.

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

              precision    recall  f1-score   support

          0%       0.38      1.00      0.55       184
         10%       0.00      0.00      0.00         2
        100%       0.00      0.00      0.00         3
         11%       0.00      0.00      0.00         5
         12%       0.00      0.00      0.00         4
         13%       0.00      0.00      0.00         5
         14%       0.00      0.00      0.00         6
         15%       0.00      0.00      0.00         5
         16%       0.00      0.00      0.00         7
         17%       0.00      0.00      0.00         6
         18%       0.00      0.00      0.00        11
         19%       0.00      0.00      0.00        15
         20%       0.00      0.00      0.00        20
         21%       0.00      0.00      0.00         7
         22%       0.00      0.00      0.00        11
         23%       0.00      0.00      0.00         7
         24%       0.00      0.00      0.00         5
         25%       0.00    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, accuracy_score

# Load and preprocess data
csv_url = 'https://raw.githubusercontent.com/zeroday-zaddy/cs422-project/c067b005a7f9a90ae114357cfe7948ed828dc07a/data/leafly_strain_data.csv'
df = pd.read_csv(csv_url)

# Drop rows where any column has a blank space
df = df.dropna(how='any')

# Handling missing values
df[['happy', 'euphoric', 'uplifted', 'sleepy', 'dry_mouth', 'dry_eyes', 'dizzy', 'paranoid', 'anxious', 'stress', 'pain', 'depression', 'anxiety', 'insomnia']] = df[['happy', 'euphoric', 'uplifted', 'sleepy', 'dry_mouth', 'dry_eyes', 'dizzy', 'paranoid', 'anxious', 'stress', 'pain', 'depression', 'anxiety', 'insomnia']].fillna(0)

# Convert the 'most_common_terpene' column to a binary representation using one-hot encoding
df = pd.get_dummies(df, columns=['most_common_terpene'], prefix='terpene')

# Keep only the 'most_common_terpene' and effects columns
selected_columns = [col for col in df.columns if 'terpene_' in col] + ['happy', 'euphoric', 'uplifted', 'sleepy', 'dry_mouth', 'dry_eyes', 'dizzy', 'paranoid', 'anxious', 'stress', 'pain', 'depression', 'anxiety', 'insomnia']
df = df[selected_columns]

# Extract features (X) and target variables (y)
X = df.drop(['happy', 'euphoric', 'uplifted', 'sleepy', 'dry_mouth', 'dry_eyes', 'dizzy', 'paranoid', 'anxious', 'stress', 'pain', 'depression', 'anxiety', 'insomnia'], axis=1)
y = df[['happy', 'euphoric', 'uplifted', 'sleepy', 'dry_mouth', 'dry_eyes', 'dizzy', 'paranoid', 'anxious', 'stress', 'pain', 'depression', 'anxiety', 'insomnia']]

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a machine learning model using DecisionTreeClassifier
model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Calculate accuracy for each effect
for i, effect in enumerate(y.columns):
    accuracy = accuracy_score(y_test[effect], y_pred[:, i])
    print(f"Accuracy for {effect}: {accuracy}")

# You can also print a classification report for each target variable
for i, effect in enumerate(y.columns):
    print(f"Effect: {effect}")
    print(classification_report(y_test[effect], y_pred[:, i]))
    print("=" * 50)


Accuracy for happy: 0.15
Accuracy for euphoric: 0.05
Accuracy for uplifted: 0.05
Accuracy for sleepy: 0.7
Accuracy for dry_mouth: 0.05
Accuracy for dry_eyes: 0.35
Accuracy for dizzy: 0.35
Accuracy for paranoid: 0.3
Accuracy for anxious: 0.55
Accuracy for stress: 0.05
Accuracy for pain: 0.0
Accuracy for depression: 0.0
Accuracy for anxiety: 0.05
Accuracy for insomnia: 0.35
Effect: happy
              precision    recall  f1-score   support

         44%       0.00      0.00      0.00         1
         46%       0.00      0.00      0.00         1
         49%       0.00      0.00      0.00         1
         50%       0.00      0.00      0.00         1
         51%       0.00      0.00      0.00         1
         52%       0.00      0.00      0.00         1
         54%       0.00      0.00      0.00         2
         55%       0.00      0.00      0.00         1
         57%       0.00      0.00      0.00         2
         59%       0.17      1.00      0.29         2
         60%    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr