In [2]:
import matplotlib.pyplot as plt 
import seaborn  as sn
import pandas as pd
from sklearn.model_selection import train_test_split


In [3]:
data = pd.read_csv('vegetable_market.csv')

In [4]:
data

Unnamed: 0,Vegetable,Season,Month,Temp,Deasaster Happen in last 3month,Vegetable condition,Price per kg
0,potato,winter,jan,15,no,fresh,20
1,tomato,winter,jan,15,no,fresh,50
2,peas,winter,jan,15,no,fresh,70
3,pumkin,winter,jan,15,no,fresh,25
4,cucumber,winter,jan,15,no,fresh,20
...,...,...,...,...,...,...,...
116,brinjal,winter,jan,15,yes,fresh,33
117,ginger,winter,jan,15,no,fresh,88
118,potato,summer,apr,32,no,fresh,24
119,peas,summer,apr,33,no,fresh,33


In [6]:
data.isna().sum()

Vegetable                          0
Season                             0
Month                              0
Temp                               0
Deasaster Happen in last 3month    0
Vegetable condition                0
Price per kg                       0
dtype: int64

In [7]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 121 entries, 0 to 120
Data columns (total 7 columns):
 #   Column                           Non-Null Count  Dtype 
---  ------                           --------------  ----- 
 0   Vegetable                        121 non-null    object
 1   Season                           121 non-null    object
 2   Month                            121 non-null    object
 3   Temp                             121 non-null    int64 
 4   Deasaster Happen in last 3month  121 non-null    object
 5   Vegetable condition              121 non-null    object
 6   Price per kg                     121 non-null    int64 
dtypes: int64(2), object(5)
memory usage: 6.7+ KB


In [9]:
data.describe()

Unnamed: 0,Temp,Price per kg
count,121.0,121.0
mean,24.892562,55.330579
std,9.319157,48.769934
min,15.0,9.0
25%,15.0,22.0
50%,27.0,35.0
75%,32.0,70.0
max,43.0,250.0


In [56]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

In [57]:
def onehot_encode(df,column):
    df = df.copy()
    dummies = pd.get_dummies(df[column],prefix = column)
    df = pd.concat([df,dummies],axis = 'columns')
    df = df.drop(column,axis = 'columns')
    return df

In [82]:
def preprocessing_inputs(df):
    df = df.copy()
    # binary encoing
    df['Deasaster Happen in last 3month'] = df['Deasaster Happen in last 3month'].replace({'no':0,'yes':1})
    # one - hot encoding
    df = onehot_encode(df,column = 'Vegetable')
   # or 
    for column in ['Season','Month','Vegetable condition']:
        df = onehot_encode(df,column)
    # split df in to X,y
    y = df['Price per kg']
    X = df.drop(['Price per kg'],axis = 'columns')
    # train data 
    X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2,shuffle = True,random_state = 1)
    scaler.fit(X_train)
    X_train = pd.DataFrame(scaler.transform(X_train),index = X_train.index,columns= X_train.columns)
    X_test  = pd.DataFrame(scaler.transform(X_test),index = X_test.index,columns=X_test.columns)
    return X_train,X_test,y_train,y_test

In [83]:
X_train,X_test,y_train,y_test = preprocessing_inputs(data)

In [84]:
X_train.head()

Unnamed: 0,Temp,Deasaster Happen in last 3month,Vegetable_Bitter gourd,Vegetable_Raddish,Vegetable_brinjal,Vegetable_cabage,Vegetable_califlower,Vegetable_chilly,Vegetable_cucumber,Vegetable_garlic,...,Month_july,Month_june,Month_march,Month_may,Month_oct,Month_sept,Vegetable condition_avarage,Vegetable condition_fresh,Vegetable condition_scarp,Vegetable condition_scrap
97,0.622524,1.640825,-0.234404,-0.234404,-0.179605,-0.280449,-0.280449,-0.208514,-0.179605,-0.208514,...,-0.301511,-0.145865,-0.179605,-0.258199,-0.102598,-0.102598,-0.447214,0.6742,-0.258199,-0.301511
35,1.369552,-0.609449,-0.234404,-0.234404,5.567764,-0.280449,-0.280449,-0.208514,-0.179605,-0.208514,...,-0.301511,-0.145865,-0.179605,3.872983,-0.102598,-0.102598,-0.447214,0.6742,-0.258199,-0.301511
44,0.622524,-0.609449,-0.234404,-0.234404,-0.179605,-0.280449,-0.280449,-0.208514,5.567764,-0.208514,...,3.316625,-0.145865,-0.179605,-0.258199,-0.102598,-0.102598,2.236068,-1.48324,-0.258199,-0.301511
93,1.049397,1.640825,-0.234404,-0.234404,-0.179605,-0.280449,-0.280449,-0.208514,-0.179605,-0.208514,...,-0.301511,-0.145865,-0.179605,-0.258199,-0.102598,-0.102598,2.236068,-1.48324,-0.258199,-0.301511
115,-1.08497,-0.609449,-0.234404,-0.234404,-0.179605,-0.280449,-0.280449,-0.208514,-0.179605,-0.208514,...,-0.301511,-0.145865,-0.179605,-0.258199,-0.102598,-0.102598,-0.447214,0.6742,-0.258199,-0.301511


In [85]:
X_test

Unnamed: 0,Temp,Deasaster Happen in last 3month,Vegetable_Bitter gourd,Vegetable_Raddish,Vegetable_brinjal,Vegetable_cabage,Vegetable_califlower,Vegetable_chilly,Vegetable_cucumber,Vegetable_garlic,...,Month_july,Month_june,Month_march,Month_may,Month_oct,Month_sept,Vegetable condition_avarage,Vegetable condition_fresh,Vegetable condition_scarp,Vegetable condition_scrap
96,0.515805,-0.609449,-0.234404,-0.234404,-0.179605,-0.280449,-0.280449,-0.208514,-0.179605,-0.208514,...,3.316625,-0.145865,-0.179605,-0.258199,-0.102598,-0.102598,-0.447214,0.6742,-0.258199,-0.301511
54,0.409087,1.640825,-0.234404,-0.234404,-0.179605,-0.280449,-0.280449,-0.208514,-0.179605,-0.208514,...,-0.301511,-0.145865,-0.179605,-0.258199,-0.102598,-0.102598,-0.447214,0.6742,-0.258199,-0.301511
59,0.83596,-0.609449,-0.234404,-0.234404,-0.179605,-0.280449,-0.280449,-0.208514,-0.179605,-0.208514,...,-0.301511,-0.145865,-0.179605,-0.258199,-0.102598,-0.102598,-0.447214,0.6742,-0.258199,-0.301511
118,0.729242,-0.609449,-0.234404,-0.234404,-0.179605,-0.280449,-0.280449,-0.208514,-0.179605,-0.208514,...,-0.301511,-0.145865,-0.179605,-0.258199,-0.102598,-0.102598,-0.447214,0.6742,-0.258199,-0.301511
77,1.903143,-0.609449,-0.234404,-0.234404,-0.179605,-0.280449,-0.280449,-0.208514,-0.179605,-0.208514,...,-0.301511,-0.145865,-0.179605,3.872983,-0.102598,-0.102598,2.236068,-1.48324,-0.258199,-0.301511
46,-1.08497,-0.609449,-0.234404,4.266146,-0.179605,-0.280449,-0.280449,-0.208514,-0.179605,-0.208514,...,-0.301511,-0.145865,-0.179605,-0.258199,-0.102598,-0.102598,-0.447214,0.6742,-0.258199,-0.301511
31,-0.764815,-0.609449,-0.234404,-0.234404,-0.179605,-0.280449,3.56571,-0.208514,-0.179605,-0.208514,...,-0.301511,-0.145865,-0.179605,-0.258199,-0.102598,-0.102598,-0.447214,0.6742,-0.258199,-0.301511
82,-1.08497,-0.609449,-0.234404,-0.234404,-0.179605,-0.280449,-0.280449,-0.208514,-0.179605,4.795832,...,-0.301511,-0.145865,-0.179605,-0.258199,-0.102598,-0.102598,-0.447214,0.6742,-0.258199,-0.301511
48,0.088932,-0.609449,-0.234404,-0.234404,-0.179605,-0.280449,-0.280449,-0.208514,-0.179605,-0.208514,...,-0.301511,-0.145865,-0.179605,-0.258199,-0.102598,-0.102598,2.236068,-1.48324,-0.258199,-0.301511
99,-1.08497,1.640825,-0.234404,-0.234404,-0.179605,-0.280449,-0.280449,-0.208514,-0.179605,-0.208514,...,-0.301511,-0.145865,-0.179605,-0.258199,-0.102598,-0.102598,-0.447214,0.6742,-0.258199,-0.301511


In [86]:
y_train

97     45
35     40
44     12
93     90
115    21
       ..
9      45
72     10
12     20
107    32
37     40
Name: Price per kg, Length: 96, dtype: int64

In [87]:

y_test

96      30
54      40
59     100
118     24
77      55
46      15
31      20
82     132
48      28
99     150
98     170
100     24
2       70
88      32
114     25
56      70
119     33
104     21
67      45
62      50
113     90
17      20
103     23
109     22
73      25
Name: Price per kg, dtype: int64

In [88]:
X_train.describe()

Unnamed: 0,Temp,Deasaster Happen in last 3month,Vegetable_Bitter gourd,Vegetable_Raddish,Vegetable_brinjal,Vegetable_cabage,Vegetable_califlower,Vegetable_chilly,Vegetable_cucumber,Vegetable_garlic,...,Month_july,Month_june,Month_march,Month_may,Month_oct,Month_sept,Vegetable condition_avarage,Vegetable condition_fresh,Vegetable condition_scarp,Vegetable condition_scrap
count,96.0,96.0,96.0,96.0,96.0,96.0,96.0,96.0,96.0,96.0,...,96.0,96.0,96.0,96.0,96.0,96.0,96.0,96.0,96.0,96.0
mean,-1.96602e-16,1.110223e-16,1.549686e-16,1.092876e-16,9.540979000000001e-17,-1.5034270000000003e-17,-2.891206e-17,-4.047688e-18,9.078386000000001e-17,1.445603e-17,...,-5.0885220000000004e-17,3.874216e-17,5.146346e-17,-1.896631e-16,9.454243000000001e-17,1.5901630000000002e-17,4.7415780000000004e-17,-4.8572260000000006e-17,2.659909e-17,3.469447e-18
std,1.005249,1.005249,1.005249,1.005249,1.005249,1.005249,1.005249,1.005249,1.005249,1.005249,...,1.005249,1.005249,1.005249,1.005249,1.005249,1.005249,1.005249,1.005249,1.005249,1.005249
min,-1.08497,-0.6094494,-0.2344036,-0.2344036,-0.1796053,-0.2804491,-0.2804491,-0.2085144,-0.1796053,-0.2085144,...,-0.3015113,-0.145865,-0.1796053,-0.2581989,-0.1025978,-0.1025978,-0.4472136,-1.48324,-0.2581989,-0.3015113
25%,-1.08497,-0.6094494,-0.2344036,-0.2344036,-0.1796053,-0.2804491,-0.2804491,-0.2085144,-0.1796053,-0.2085144,...,-0.3015113,-0.145865,-0.1796053,-0.2581989,-0.1025978,-0.1025978,-0.4472136,-1.48324,-0.2581989,-0.3015113
50%,0.2490094,-0.6094494,-0.2344036,-0.2344036,-0.1796053,-0.2804491,-0.2804491,-0.2085144,-0.1796053,-0.2085144,...,-0.3015113,-0.145865,-0.1796053,-0.2581989,-0.1025978,-0.1025978,-0.4472136,0.6741999,-0.2581989,-0.3015113
75%,0.7292419,1.640825,-0.2344036,-0.2344036,-0.1796053,-0.2804491,-0.2804491,-0.2085144,-0.1796053,-0.2085144,...,-0.3015113,-0.145865,-0.1796053,-0.2581989,-0.1025978,-0.1025978,-0.4472136,0.6741999,-0.2581989,-0.3015113
max,1.903143,1.640825,4.266146,4.266146,5.567764,3.56571,3.56571,4.795832,5.567764,4.795832,...,3.316625,6.855655,5.567764,3.872983,9.746794,9.746794,2.236068,0.6741999,3.872983,3.316625


In [99]:
from sklearn.ensemble import GradientBoostingClassifier,RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LinearRegression,LogisticRegression,Ridge,Lasso
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC,LinearSVC

In [101]:
models = {
    'Logistic_Rgression'        : LogisticRegression(),
    'Linear_Regression'         : LinearRegression(),
     'Ridge'                    : Ridge(),
     'Lasso'                    : Lasso(),
    'SVC'                       : SVC(),
    'Linear_Svc'                : LinearSVC(),
    'Decision_tree'             : DecisionTreeClassifier(),
    'Random_forest'             : RandomForestClassifier(),
    'Gradient_Boosting'         : GradientBoostingClassifier(),
    'GNB'                       : GaussianNB(),
    'K_Neighbors'               : KNeighborsClassifier(),
    'Neural_Network'            : MLPClassifier()
}
for name,model in models.items():
    model.fit(X_train,y_train)
    print(name + 'Tranned')
    

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Logistic_RgressionTranned
Linear_RegressionTranned
RidgeTranned
LassoTranned
SVCTranned




Linear_SvcTranned
Decision_treeTranned
Random_forestTranned
Gradient_BoostingTranned
GNBTranned
K_NeighborsTranned
Neural_NetworkTranned




In [105]:
for name,model in models.items():
    print(name + ' Accuracy : {:2f}'.format(model.score(X_test,y_test)))

Logistic_Rgression Accuracy : 0.120000
Linear_Regression Accuracy : 0.545972
Ridge Accuracy : 0.566321
Lasso Accuracy : 0.682449
SVC Accuracy : 0.120000
Linear_Svc Accuracy : 0.080000
Decision_tree Accuracy : 0.040000
Random_forest Accuracy : 0.040000
Gradient_Boosting Accuracy : 0.040000
GNB Accuracy : 0.040000
K_Neighbors Accuracy : 0.000000
Neural_Network Accuracy : 0.080000
