In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
# suppress future warnings
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=UserWarning)

In [2]:
%pip install jupyterlab_play_cell_button 

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [3]:
df = pd.read_csv(r'C:\data\mia_data2.csv')

In [4]:
df.keys()

Index(['Unnamed: 0.1', 'Unnamed: 0', 'YEAR', 'MONTH', 'DAY_OF_WEEK',
       'OP_CARRIER_FL_NUM', 'ORIGIN', 'DEST', 'CRS_DEP_TIME', 'DEP_TIME',
       'DEP_DELAY', 'DEP_DELAY_NEW', 'CRS_ARR_TIME', 'ARR_TIME', 'ARR_DELAY',
       'ARR_DELAY_NEW', 'CANCELLED', 'CANCELLATION_CODE', 'DIVERTED',
       'CRS_ELAPSED_TIME', 'ACTUAL_ELAPSED_TIME', 'CARRIER_DELAY',
       'WEATHER_DELAY', 'NAS_DELAY', 'SECURITY_DELAY', 'LATE_AIRCRAFT_DELAY',
       'WHY_DELAY', 'DAY', 'DATE'],
      dtype='object')

In [5]:
# Deleting all flights were the flight was diverted
df = df[df.DIVERTED ==0]

In [6]:
#Deleting all rows where the flight was cancelled
df = df[df.CANCELLED == 0]


In [7]:
import datetime as datetime
#_________________________________________________________
# Function that convert the 'HHMM' string to datetime.time
def format_heure(chaine):
    if pd.isnull(chaine):
        return np.nan
    else:
        if chaine == 2400: chaine = 0
        chaine = "{0:04d}".format(int(chaine))
        heure = datetime.time(int(chaine[0:2]), int(chaine[2:4]))
        return heure
#_____________________________________________________________________
# Function that combines a date and time to produce a datetime.datetime
def combine_date_heure(x):
    if pd.isnull(x[0]) or pd.isnull(x[1]):
        return np.nan
    else:
        return datetime.datetime.combine(x[0],x[1])
#_______________________________________________________________________________
# Function that combine two columns of the dataframe to create a datetime format
def create_flight_time(df, col):    
    liste = []
    for index, cols in df[['DATE', col]].iterrows():    
        if pd.isnull(cols[1]):
            liste.append(np.nan)
        elif float(cols[1]) == 2400:
            cols[0] += datetime.timedelta(days=1)
            cols[1] = datetime.time(0,0)
            liste.append(combine_date_heure(cols))
        else:
            cols[1] = format_heure(cols[1])
            liste.append(combine_date_heure(cols))
    return pd.Series(liste)

In [8]:
df['DEPARTURE_TIME'] = df['DEP_TIME'].apply(format_heure);
df['ARRIVAL_TIME'] = df['ARR_TIME'].apply(format_heure)

In [9]:

df[['DAY', 'MONTH', 'YEAR']] = df[['DAY', 'MONTH', 'YEAR']].values.astype('str')

In [10]:
df['DATE'] = df['MONTH']+'-'+df['DAY']+'-'+df['YEAR']



In [11]:
from datetime import date, time
df['DEPARTURE'] = pd.to_datetime(df['DATE'].astype(str)+' '+ df['DEPARTURE_TIME'].astype(str))

In [12]:
df.ACTUAL_ELAPSED_TIME= df.ACTUAL_ELAPSED_TIME.astype(int)

In [13]:
df.ACTUAL_ELAPSED_TIME.info()

<class 'pandas.core.series.Series'>
Int64Index: 659119 entries, 0 to 736651
Series name: ACTUAL_ELAPSED_TIME
Non-Null Count   Dtype
--------------   -----
659119 non-null  int32
dtypes: int32(1)
memory usage: 7.5 MB


In [14]:
# I want to create dummy values based on the why delay column
dummy = pd.get_dummies(df['WHY_DELAY'])

In [15]:
dummy

Unnamed: 0,0,carrier,late aircraft,nas,security,weather
0,1,0,0,0,0,0
1,1,0,0,0,0,0
2,1,0,0,0,0,0
3,1,0,0,0,0,0
4,1,0,0,0,0,0
...,...,...,...,...,...,...
736647,1,0,0,0,0,0
736648,1,0,0,0,0,0
736649,1,0,0,0,0,0
736650,0,0,0,1,0,0


In [16]:
df = pd.concat([df, dummy], axis = 1)
df.keys()

Index(['Unnamed: 0.1', 'Unnamed: 0', 'YEAR', 'MONTH', 'DAY_OF_WEEK',
       'OP_CARRIER_FL_NUM', 'ORIGIN', 'DEST', 'CRS_DEP_TIME', 'DEP_TIME',
       'DEP_DELAY', 'DEP_DELAY_NEW', 'CRS_ARR_TIME', 'ARR_TIME', 'ARR_DELAY',
       'ARR_DELAY_NEW', 'CANCELLED', 'CANCELLATION_CODE', 'DIVERTED',
       'CRS_ELAPSED_TIME', 'ACTUAL_ELAPSED_TIME', 'CARRIER_DELAY',
       'WEATHER_DELAY', 'NAS_DELAY', 'SECURITY_DELAY', 'LATE_AIRCRAFT_DELAY',
       'WHY_DELAY', 'DAY', 'DATE', 'DEPARTURE_TIME', 'ARRIVAL_TIME',
       'DEPARTURE', '0', 'carrier', 'late aircraft', 'nas', 'security',
       'weather'],
      dtype='object')

In [17]:
df.drop(columns = ['Unnamed: 0.1', 'Unnamed: 0', 'OP_CARRIER_FL_NUM', 'CANCELLATION_CODE', 'DIVERTED',  'CARRIER_DELAY','WEATHER_DELAY', 'NAS_DELAY', 'SECURITY_DELAY', 'LATE_AIRCRAFT_DELAY', 'DATE', 'WHY_DELAY', 'carrier', 'late aircraft', 'nas','security', 'OP_CARRIER_FL_NUM', 'CRS_ARR_TIME', 'ARR_TIME', 'CRS_DEP_TIME', "DEP_TIME", 'CANCELLED']  , inplace = True)

In [18]:

df['ARRIVAL'] = df['DEPARTURE'] + pd.to_timedelta(df['ACTUAL_ELAPSED_TIME'], unit ='m')

In [20]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 659119 entries, 0 to 736651
Data columns (total 18 columns):
 #   Column               Non-Null Count   Dtype         
---  ------               --------------   -----         
 0   YEAR                 659119 non-null  object        
 1   MONTH                659119 non-null  object        
 2   DAY_OF_WEEK          659119 non-null  float64       
 3   ORIGIN               659119 non-null  object        
 4   DEST                 659119 non-null  object        
 5   DEP_DELAY            659119 non-null  float64       
 6   DEP_DELAY_NEW        659119 non-null  float64       
 7   ARR_DELAY            659119 non-null  float64       
 8   ARR_DELAY_NEW        659119 non-null  float64       
 9   CRS_ELAPSED_TIME     659119 non-null  float64       
 10  ACTUAL_ELAPSED_TIME  659119 non-null  int32         
 11  DAY                  659119 non-null  object        
 12  DEPARTURE_TIME       659119 non-null  object        
 13  ARRIVAL_TIME  

In [21]:
df.drop(columns = ['YEAR','MONTH', 'DAY', 'DEPARTURE_TIME', 'ARRIVAL_TIME'], inplace = True)

In [22]:
# I need to create two data frames to work on one that is about departures from MIA and the 
#other one that deals with arrivals.

df_arr = df[df.DEST == 'MIA']
df_dep = df[df.ORIGIN == 'MIA']

In [23]:
# In df_ arr, the DEST value is the same for all flights, I will be eliminating that column as well as everything that has to 
# deal with departures.

df_arr.drop(columns = ['DEST', 'DEP_DELAY_NEW', 'DEPARTURE'], inplace = True)




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_arr.drop(columns = ['DEST', 'DEP_DELAY_NEW', 'DEPARTURE'], inplace = True)


In [24]:
# Converting Origin to numerical data by using get_dummies.
temp = pd.get_dummies(df_arr.ORIGIN, prefix = 'ORIGIN')



In [25]:
temp

Unnamed: 0,ORIGIN_ATL,ORIGIN_AUS,ORIGIN_BDL,ORIGIN_BGR,ORIGIN_BHM,ORIGIN_BNA,ORIGIN_BOS,ORIGIN_BWI,ORIGIN_CAE,ORIGIN_CHS,...,ORIGIN_SLC,ORIGIN_STL,ORIGIN_STT,ORIGIN_STX,ORIGIN_TLH,ORIGIN_TPA,ORIGIN_TTN,ORIGIN_TUL,ORIGIN_TYS,ORIGIN_XNA
0,0,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
736642,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
736644,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
736645,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
736647,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [26]:
#Join the two dataframes
df_arr = pd.concat([df_arr, temp], axis = 1)

In [27]:
y_arr = df_arr['weather']

In [28]:
df_arr.drop(columns = ['weather', 'ORIGIN'], inplace = True)

In [29]:
df_arr

Unnamed: 0,DAY_OF_WEEK,DEP_DELAY,ARR_DELAY,ARR_DELAY_NEW,CRS_ELAPSED_TIME,ACTUAL_ELAPSED_TIME,0,ARRIVAL,ORIGIN_ATL,ORIGIN_AUS,...,ORIGIN_SLC,ORIGIN_STL,ORIGIN_STT,ORIGIN_STX,ORIGIN_TLH,ORIGIN_TPA,ORIGIN_TTN,ORIGIN_TUL,ORIGIN_TYS,ORIGIN_XNA
0,6.0,-16.0,-5.0,0.0,185.0,196,1,2010-05-01 18:35:00,0,0,...,0,0,1,0,0,0,0,0,0,0
1,7.0,15.0,0.0,0.0,185.0,170,1,2010-05-02 18:40:00,0,0,...,0,0,1,0,0,0,0,0,0,0
2,1.0,15.0,6.0,6.0,185.0,176,1,2010-05-03 18:46:00,0,0,...,0,0,1,0,0,0,0,0,0,0
3,2.0,-2.0,-11.0,0.0,185.0,176,1,2010-05-04 18:29:00,0,0,...,0,0,1,0,0,0,0,0,0,0
4,3.0,22.0,8.0,8.0,185.0,171,1,2010-05-05 18:48:00,0,0,...,0,0,1,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
736642,4.0,-8.0,-7.0,0.0,181.0,182,1,2021-09-16 14:44:00,0,0,...,0,0,0,0,0,0,0,0,0,0
736644,4.0,-3.0,-15.0,0.0,210.0,198,1,2021-09-16 11:10:00,0,0,...,0,0,0,0,0,0,0,0,0,0
736645,4.0,-3.0,-3.0,0.0,173.0,173,1,2021-09-16 11:20:00,0,0,...,0,0,0,0,0,0,0,0,0,0
736647,4.0,-4.0,-7.0,0.0,171.0,168,1,2021-09-16 10:09:00,0,0,...,0,0,0,0,0,0,0,0,0,0


In [30]:
X_arr_train, X_arr_test, y_arr_train, y_arr_test = train_test_split(df_arr, y_arr, test_size = .33, random_state = 42)

In [31]:
#Going to do the same things to df_dep that I just did with df_arr
df_dep.drop(columns = ['ORIGIN', 'ARR_DELAY', 'ARR_DELAY_NEW', 'ARRIVAL'], inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_dep.drop(columns = ['ORIGIN', 'ARR_DELAY', 'ARR_DELAY_NEW', 'ARRIVAL'], inplace = True)


In [32]:
# Converting DEST to numerical data by using get_dummies.
temp = pd.get_dummies(df_dep.DEST, prefix = 'ORIGIN')

In [33]:
temp

Unnamed: 0,ORIGIN_ATL,ORIGIN_AUS,ORIGIN_BDL,ORIGIN_BGR,ORIGIN_BHM,ORIGIN_BNA,ORIGIN_BOS,ORIGIN_BWI,ORIGIN_CAE,ORIGIN_CHS,...,ORIGIN_SLC,ORIGIN_STL,ORIGIN_STT,ORIGIN_STX,ORIGIN_TLH,ORIGIN_TPA,ORIGIN_TTN,ORIGIN_TUL,ORIGIN_TYS,ORIGIN_XNA
44,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
45,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
46,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
47,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
48,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
736643,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
736646,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
736649,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
736650,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [34]:
#Join the two dataframes
df_dep = pd.concat([df_dep, temp], axis = 1)

In [35]:
y_dep = df_dep['weather']

In [36]:
df_dep.drop(columns = ['weather', 'DEST'], inplace = True)

In [37]:
X_dep_train, X_dep_test, y_dep_train, y_dep_test = train_test_split(df_dep, y_dep, test_size = .33, random_state = 42)

In [38]:
# The data is not balanced
X_dep_train['0'].value_counts()

1    166834
0     54103
Name: 0, dtype: int64

In [39]:
y_dep_test.value_counts()

0    105020
1      3800
Name: weather, dtype: int64

In [40]:
!pip install imbalanced-learn

Defaulting to user installation because normal site-packages is not writeable


In [41]:

from imblearn.over_sampling import SMOTE
sm = SMOTE(random_state = 42, sampling_strategy = 1.0)
X_dep_train, y_dep_train = sm.fit_resample(X_dep_train, y_dep_train)


TypeError: The DType <class 'numpy.dtype[datetime64]'> could not be promoted by <class 'numpy.dtype[float64]'>. This means that no common DType exists for the given inputs. For example they cannot be stored in a single array unless the dtype is `object`. The full list of DTypes is: (<class 'numpy.dtype[float64]'>, <class 'numpy.dtype[float64]'>, <class 'numpy.dtype[float64]'>, <class 'numpy.dtype[float64]'>, <class 'numpy.dtype[int32]'>, <class 'numpy.dtype[datetime64]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>)

In [42]:
from imblearn.over_sampling import SMOTE
sm = SMOTE(random_state = 42, sampling_strategy = 1.0)
X_arr_train, y_arr_train = sm.fit_resample(X_arr_train, y_arr_train)

TypeError: The DType <class 'numpy.dtype[datetime64]'> could not be promoted by <class 'numpy.dtype[float64]'>. This means that no common DType exists for the given inputs. For example they cannot be stored in a single array unless the dtype is `object`. The full list of DTypes is: (<class 'numpy.dtype[float64]'>, <class 'numpy.dtype[float64]'>, <class 'numpy.dtype[float64]'>, <class 'numpy.dtype[float64]'>, <class 'numpy.dtype[float64]'>, <class 'numpy.dtype[int32]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[datetime64]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>)

In [43]:
y_dep_train.value_counts() 

0    213371
1      7566
Name: weather, dtype: int64

In [44]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import MultinomialNB, GaussianNB
from sklearn.model_selection import cross_val_score
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
from sklearn.metrics import classification_report

In [45]:

lgreg = LogisticRegression()
lgreg.fit(X_arr_train, y_arr_train)
predlgreg = lgreg.predict(X_arr_test)


TypeError: The DType <class 'numpy.dtype[datetime64]'> could not be promoted by <class 'numpy.dtype[float64]'>. This means that no common DType exists for the given inputs. For example they cannot be stored in a single array unless the dtype is `object`. The full list of DTypes is: (<class 'numpy.dtype[float64]'>, <class 'numpy.dtype[float64]'>, <class 'numpy.dtype[float64]'>, <class 'numpy.dtype[float64]'>, <class 'numpy.dtype[float64]'>, <class 'numpy.dtype[int32]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[datetime64]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>)

In [None]:
#print(classification_report(y_arr_test, predlgreg))

In [46]:
from sklearn.ensemble import RandomForestClassifier
rf=RandomForestClassifier(max_depth = 5, n_estimators=100, random_state = 42)
rf.fit(X_arr_train, y_arr_train)

TypeError: The DType <class 'numpy.dtype[datetime64]'> could not be promoted by <class 'numpy.dtype[float64]'>. This means that no common DType exists for the given inputs. For example they cannot be stored in a single array unless the dtype is `object`. The full list of DTypes is: (<class 'numpy.dtype[float64]'>, <class 'numpy.dtype[float64]'>, <class 'numpy.dtype[float64]'>, <class 'numpy.dtype[float64]'>, <class 'numpy.dtype[float64]'>, <class 'numpy.dtype[int32]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[datetime64]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>, <class 'numpy.dtype[uint8]'>)