In [None]:
# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report

In [None]:
# Step 1: Read Data
file_path = 'plant_diary_new.csv'  # Adjust the path as necessary
plant_data = pd.read_csv(file_path)

# Check basic information about the data
print(plant_data.info())
print(plant_data.head())

In [None]:
# Step 2: Data Visualization
# Visualize relationships using scatter plots for each feature vs. owner
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

# Scatter plot for Height vs Owner
sns.scatterplot(x='height', y='owner', data=plant_data, ax=axes[0], hue='owner', palette='Set1')
axes[0].set_title('Height vs Owner')

# Scatter plot for Leaf Width vs Owner
sns.scatterplot(x='leaf_width', y='owner', data=plant_data, ax=axes[1], hue='owner', palette='Set1')
axes[1].set_title('Leaf Width vs Owner')

# Scatter plot for Leaf Length vs Owner
sns.scatterplot(x='leaf_length', y='owner', data=plant_data, ax=axes[2], hue='owner', palette='Set1')
axes[2].set_title('Leaf Length vs Owner')

plt.tight_layout()
plt.show()

In [None]:
# Step 3: Data Preprocessing
# We will use 'height', 'leaf_width', and 'leaf_length' as features and 'owner' as the target variable
X = plant_data[['height', 'leaf_width', 'leaf_length']]
y = plant_data['owner']

In [None]:
# Step 4: Data Division (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Step 5: Decision Tree Model Training and Testing
# Initialize and train the Decision Tree model
decision_tree_model = DecisionTreeClassifier(random_state=42)
decision_tree_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = decision_tree_model.predict(X_test)

In [None]:
# Step 6: Model Evaluation
accuracy = accuracy_score(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

# Print the results
print(f'Accuracy: {accuracy * 100:.2f}%')
print('Classification Report:\n', classification_rep)