# Logistic Regression

# Importing the libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Importing the dataset

In [2]:
dataset = pd.read_csv('Crop_recommendation_dataset.csv')

In [3]:
dataset.head()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
0,90,42,43,20.879744,82.002744,6.502985,202.935536,rice
1,85,58,41,21.770462,80.319644,7.038096,226.655537,rice
2,60,55,44,23.004459,82.320763,7.840207,263.964248,rice
3,74,35,40,26.491096,80.158363,6.980401,242.864034,rice
4,78,42,42,20.130175,81.604873,7.628473,262.71734,rice


# Obtaining unique crop lables

In [4]:
unique_crop_labels = list(dataset.label.unique())

print('The unique crops are as follows: \n')

for i in unique_crop_labels:
    print(i);

print('\nNumber of unique crops:', len(unique_crop_labels))

The unique crops are as follows: 

rice
maize
chickpea
kidneybeans
pigeonpeas
mothbeans
mungbean
blackgram
lentil
pomegranate
banana
mango
grapes
watermelon
muskmelon
apple
orange
papaya
coconut
cotton
jute
coffee

Number of unique crops: 22


# Generating crop specific dataframes

In [5]:
crop_dict = dict()
for i in unique_crop_labels:

    temp_df = pd.read_csv('Crop_recommendation_dataset.csv')
    temp_df.loc[temp_df.label != i, ['label']] = '0'
    temp_df.loc[temp_df.label == i, ['label']] = '1'
    crop_dict[i] = temp_df

# Building a model for rice 

# Extracting Target Variable and independent variables

In [6]:
X = crop_dict['rice'].iloc[:, :-1].values
y = crop_dict['rice'].iloc[:, -1].values

# Splitting the dataset into training set and test set

In [7]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

In [8]:
print(X_train)

[[134.          56.          18.         ...  83.91902605   6.6912681
   70.97358303]
 [ 29.         122.         196.         ...  81.15595212   5.63832848
   73.06862952]
 [ 25.          68.          19.         ...  64.25510719   7.10845012
   67.47677295]
 ...
 [ 35.          64.          15.         ...  63.53604453   6.50014496
   69.5274407 ]
 [ 39.          65.          23.         ...  69.12613376   7.6859593
   41.02682925]
 [ 14.          22.           9.         ...  91.13772765   6.54319181
  112.5090516 ]]


In [9]:
print(X_test)

[[105.          14.          50.         ...  87.6883982    6.41905219
   59.65590798]
 [ 91.          12.          46.         ...  85.49938185   6.34394252
   48.31219031]
 [ 14.         121.         203.         ...  83.74765639   6.15868941
   74.46411148]
 ...
 [ 84.          27.          29.         ...  53.00366334   7.16709259
  168.2644287 ]
 [ 31.          13.          33.         ...  95.21224392   6.34246371
  148.3003692 ]
 [  5.          24.          40.         ...  93.87030088   6.29790758
  104.6735454 ]]


In [10]:
print(y_train)

['0' '0' '0' ... '0' '0' '0']


In [11]:
print(y_test)

['0' '0' '0' '0' '0' '1' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0'
 '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '1' '0' '0' '0' '0'
 '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0'
 '0' '0' '0' '0' '0' '0' '0' '0' '0' '1' '0' '0' '0' '0' '0' '0' '0' '0'
 '0' '0' '0' '0' '0' '0' '0' '0' '1' '0' '0' '0' '0' '0' '0' '0' '0' '0'
 '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '1' '0' '0'
 '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0'
 '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0'
 '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0'
 '0' '0' '0' '0' '1' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0'
 '1' '0' '0' '0' '0' '0' '0' '0' '1' '0' '0' '0' '0' '1' '0' '0' '0' '0'
 '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0'
 '0' '0' '0' '0' '1' '1' '0' '0' '0' '0' '0' '0' '0' '1' '0' '0' '0' '0'
 '0' '0' '0' '0' '0' '1' '0' '0' '0' '0' '0' '1' '0

# Feature Scaling

In [12]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [13]:
print(X_train)

[[ 2.25367108  0.07555744 -0.59141091 ...  0.56115786  0.28639844
  -0.58838147]
 [-0.58434455  2.06834149  2.90385791 ...  0.43651791 -1.09903674
  -0.55053196]
 [-0.69245943  0.43788181 -0.57177457 ... -0.3258651   0.83531751
  -0.65155552]
 ...
 [-0.42217223  0.31710702 -0.65031993 ... -0.35830141  0.03492274
  -0.61450776]
 [-0.31405735  0.34730072 -0.4932292  ... -0.10613716  1.5951916
  -1.12940532]
 [-0.98977536 -0.95102828 -0.76813798 ...  0.88678747  0.09156286
   0.16200634]]


In [14]:
print(X_test)

[[ 1.46983819 -1.19257786  0.03695202 ...  0.73119109 -0.07177737
  -0.79284878]
 [ 1.09143611 -1.25296526 -0.04159334 ...  0.63244639 -0.17060505
  -0.99778658]
 [-0.98977536  2.03814779  3.0413123  ...  0.55342752 -0.41435709
  -0.52532091]
 ...
 [ 0.90223507 -0.80005979 -0.37541115 ... -0.83340833  0.91247799
   1.16929376]
 [-0.53028711 -1.22277156 -0.29686578 ...  1.07058549 -0.17255083
   0.8086192 ]
 [-1.23303384 -0.89064089 -0.15941139 ...  1.01005156 -0.23117683
   0.02044856]]


# Training the Logistic Regression Model on the Training set

In [15]:
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(random_state = 0)
classifier.fit(X_train, y_train)

LogisticRegression(random_state=0)

# Predicting the Test set results

In [16]:
y_pred = classifier.predict(X_test)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[['0' '0']
 ['0' '0']
 ['0' '0']
 ...
 ['0' '0']
 ['0' '0']
 ['0' '0']]


# Confusion Matrix and Accuracy Score

In [17]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[521   0]
 [  6  23]]


0.9890909090909091

# Building models for all the other crops

In [18]:
crop_predict = dict()

for i in crop_dict:
    
    X1 = crop_dict[i].iloc[:, :-1].values
    y1 = crop_dict[i].iloc[:, -1].values
    
    X1_train, X1_test, y1_train, y1_test = train_test_split(X1, y1, test_size = 0.25, random_state = 0)
    
    sc = StandardScaler()
    X1_train = sc.fit_transform(X1_train)
    X1_test = sc.transform(X1_test)
    
    
    classifier1 = LogisticRegression(random_state = 0)
    classifier1.fit(X1_train, y1_train)
    
    
    y1_pred = classifier1.predict(X1_test)
    
    cm1 = confusion_matrix(y1_test, y1_pred)
    ascore1 = accuracy_score(y1_test, y1_pred)
    
    crop_predict[i] = dict()
    crop_predict[i]['classifier'] = classifier1
    crop_predict[i]['confusionMatrix'] = cm1
    crop_predict[i]['accuracyScore'] = ascore1
       

# Accuracy Scores for all the crops

In [19]:
for i in unique_crop_labels:
    
    res = "{:<15}".format(i)
    print(res, end = " ")
    print(crop_predict[i]['accuracyScore'])

rice            0.9890909090909091
maize           0.9563636363636364
chickpea        1.0
kidneybeans     1.0
pigeonpeas      0.9709090909090909
mothbeans       0.9690909090909091
mungbean        0.9872727272727273
blackgram       0.9545454545454546
lentil          0.990909090909091
pomegranate     0.9618181818181818
banana          0.9945454545454545
mango           0.9963636363636363
grapes          1.0
watermelon      0.9618181818181818
muskmelon       0.9981818181818182
apple           1.0
orange          0.990909090909091
papaya          0.9818181818181818
coconut         0.9963636363636363
cotton          0.9927272727272727
jute            0.9436363636363636
coffee          0.9963636363636363
