In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
dataset = pd.read_csv('farming_classification.csv')

In [3]:
dataset.head()

Unnamed: 0,Farm ID,Farm Size (Acres),Crop Type,Soil pH,Rainfall (mm/year),Temperature (°C),Fertilizer Usage (kg/acre),Pest Control Usage (kg/acre),Irrigation (hours/week),Yield Category
0,1,190.4,Rice,8.48,1333.18,23.78,423.3,32.82,35.85,Low Yield
1,2,475.6,Corn,8.05,802.01,20.95,369.22,40.17,30.51,High Yield
2,3,367.34,Rice,6.13,532.22,30.98,97.43,36.65,13.95,Low Yield
3,4,301.34,Rice,8.29,1407.79,14.02,427.48,42.55,30.03,Low Yield
4,5,82.23,Wheat,5.85,1555.62,10.62,351.96,7.53,6.93,Low Yield


In [4]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
dataset['Crop Type'] = le.fit_transform(dataset['Crop Type'])
dataset['Yield Category'] = le.fit_transform(dataset['Yield Category'])

In [5]:
dataset.head()

Unnamed: 0,Farm ID,Farm Size (Acres),Crop Type,Soil pH,Rainfall (mm/year),Temperature (°C),Fertilizer Usage (kg/acre),Pest Control Usage (kg/acre),Irrigation (hours/week),Yield Category
0,1,190.4,1,8.48,1333.18,23.78,423.3,32.82,35.85,1
1,2,475.6,0,8.05,802.01,20.95,369.22,40.17,30.51,0
2,3,367.34,1,6.13,532.22,30.98,97.43,36.65,13.95,1
3,4,301.34,1,8.29,1407.79,14.02,427.48,42.55,30.03,1
4,5,82.23,2,5.85,1555.62,10.62,351.96,7.53,6.93,1


In [6]:
dataset = dataset.drop(columns = ['Farm ID'], axis= 'true')

In [8]:
X = dataset.iloc[:, :-1].values
Y = dataset.iloc[: , -1].values

In [11]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state = 0)

In [12]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [14]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

In [16]:
lr = LogisticRegression(random_state = 0)
svc = SVC(kernel = 'rbf')
gnb = GaussianNB()
dtc = DecisionTreeClassifier(criterion= 'entropy')
rfc = RandomForestClassifier(n_estimators = 50, criterion = 'entropy')
knn = KNeighborsClassifier(n_neighbors = 5, metric='minkowski', p = 2)

In [17]:
clfs = {
    'LR': lr,
    'SVC' : svc,
    'NB': gnb,
    'DT': dtc,
    'KN' : knn,
    'RF': rfc
}

In [18]:
def train_classifier(clf, X_train, Y_train, X_test, Y_test):
      clf.fit(X_train, Y_train)
      y_pred = clf.predict(X_test)
      acc = accuracy_score(Y_test, y_pred)
      cm = confusion_matrix(Y_test, y_pred)
      return acc, cm

In [19]:
accuracy_scores = []
confusion_scores = []

for name,clf in clfs.items():
    current_accuracy, current_confusion = train_classifier(clf, X_train,Y_train,X_test,Y_test)
    print("For name" , name)
    print("For Accuracy" , current_accuracy)
    print("For Confusion" , current_confusion)
    accuracy_scores.append(current_accuracy)
    confusion_scores.append(current_confusion)

For name LR
For Accuracy 0.925
For Confusion [[  3  13]
 [  2 182]]
For name SVC
For Accuracy 0.945
For Confusion [[  5  11]
 [  0 184]]
For name NB
For Accuracy 0.935
For Confusion [[  3  13]
 [  0 184]]
For name DT
For Accuracy 0.98
For Confusion [[ 14   2]
 [  2 182]]
For name KN
For Accuracy 0.94
For Confusion [[  7   9]
 [  3 181]]
For name RF
For Accuracy 0.985
For Confusion [[ 13   3]
 [  0 184]]


In [20]:
performance_df = pd.DataFrame({'Algorithm': clfs.keys(), "Accuracy" : accuracy_scores, "Confusion": confusion_scores})

In [21]:
performance_df

Unnamed: 0,Algorithm,Accuracy,Confusion
0,LR,0.925,"[[3, 13], [2, 182]]"
1,SVC,0.945,"[[5, 11], [0, 184]]"
2,NB,0.935,"[[3, 13], [0, 184]]"
3,DT,0.98,"[[14, 2], [2, 182]]"
4,KN,0.94,"[[7, 9], [3, 181]]"
5,RF,0.985,"[[13, 3], [0, 184]]"
