In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from pandas.plotting import scatter_matrix
import numpy as np


In [2]:
df = pd.read_csv("../data/data_new.csv")

In [3]:
df.shape

(56442, 3)

In [4]:
df.dropna(axis=0)
df.shape

(56442, 3)

In [None]:
df.head

In [None]:
sns.pairplot(df, diag_kind='hist')

plt.show()

In [None]:
scatter_matrix(df, alpha=0.5, figsize=(8, 8), diagonal='kde')

plt.show()

In [None]:

sns.boxplot(x="degree", y="ir1", data=df)

plt.show()


In [None]:
# Grouped boxplots by pandas

df.boxplot(column=["ir1"], by=["degree"])

plt.show()


In [5]:
#df.replace({'C1': 'a_old'}, {'C1': 'a_new'})
#출처: https://rfriend.tistory.com/265 [R, Python 분석과 프로그래밍의 친구 (by R Friend)]

df = df.replace({'degree':[1,2,3,4,5]}, {'degree':['leftleft','left','straight','right','rightright']})

# 0값 제거
df = df[df !=0]

# Feature 추가(ir 차 / 비율)

In [6]:
df['ir_1-2'] = round(df['ir1'] - df['ir2'],3)
df['ir_1/2'] = round(df['ir1'] / df['ir2'],3) # 소숫점 2자리
# df['ir_1-2'] = df['ir1'] - df['ir2']
# df['ir_1/2'] = df['ir1'] / df['ir2']
df

df = df[df !=0]

In [7]:
df.isnull()

Unnamed: 0,ir1,ir2,degree,ir_1-2,ir_1/2
0,False,False,False,False,False
1,False,False,False,False,False
2,False,False,False,False,False
3,False,False,False,False,False
4,False,False,False,False,False
...,...,...,...,...,...
56437,False,False,False,False,False
56438,False,False,False,False,False
56439,False,False,False,False,False
56440,False,False,False,False,False


In [None]:
# normalization

from sklearn.preprocessing import MaxAbsScaler
scaler = MaxAbsScaler()
scaler.fit(df)
df_n = pd.DataFrame(scaler.transform(df),columns=['ir_L', 'ir_R', 'ir_1-2', 'ir_1/2'])

In [None]:
inputs

# Split data

In [10]:
# Original Data
df_inputs = df.loc[:,['ir1', 'ir2', 'ir_1-2', 'ir_1/2']]
df_outputs = df.loc[:,['degree']]

# Normalized Data
# df_inputs = df_n.loc[:,['ir_L', 'ir_R']]
# df_outputs = df_n.loc[:,['degree']]

inputs = np.array(df_inputs)
outputs = np.array(df_outputs)

In [11]:
num_data = len(inputs)
TRAIN_SPLIT = int(0.6 * num_data)
TEST_SPLIT = int(0.2 * num_data + TRAIN_SPLIT)

In [12]:
inputs_train, inputs_test, inputs_validate = np.split(inputs, [TRAIN_SPLIT, TEST_SPLIT])
outputs_train, outputs_test, outputs_validate = np.split(outputs, [TRAIN_SPLIT, TEST_SPLIT])

In [13]:
print(inputs_train.shape)
print(inputs_train.ndim)

(33865, 4)
2


# Random Forest Classification

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
rf = RandomForestClassifier(n_estimators=100, oob_score=True, random_state=123456)
rf.fit(inputs_train, outputs_train)

In [None]:
from sklearn.metrics import accuracy_score

predicted = rf.predict(inputs_test)
accuracy = accuracy_score(outputs_test, predicted)

print(f'Out-of-bag score estimate: {rf.oob_score_:.3}')
print(f'Mean accuracy score: {accuracy:.3}')

In [None]:
df.degree

In [None]:
from sklearn.metrics import confusion_matrix

cm = pd.DataFrame(confusion_matrix(outputs_test, predicted))
#cm = pd.DataFrame(confusion_matrix(outputs_test, predicted), columns=df.degree, index=df.degree)

sns.heatmap(cm, annot=True)

# DNN

In [8]:
import tensorflow as tf

In [9]:
inputs = []
outputs = []

DIRECTIONS = [
    'leftleft',
    'left',
    'straight',
    'right',
    'rightright'
]

NUM_DIRECTIONS = len(DIRECTIONS)
ONE_HOT_ENCODED_DIRECTIONS = np.eye(NUM_DIRECTIONS)

NUM_DATAS = len(df)

for i in range(NUM_DATAS):
    tensor = []
    tensor += [
        (df['ir1'][i]), (df['ir2'][i]), (df['ir_1-2'][i]), 
        (df['ir_1/2'][i])
    ]
#     tensor += [
#         (df['ir1'][i]), (df['ir2'][i])
#     ]
    output = ONE_HOT_ENCODED_DIRECTIONS[DIRECTIONS.index(df['degree'][i])]
    
    inputs.append(tensor)
    outputs.append(output)
    
# from sklearn.preprocessing import MaxAbsScaler
# scaler = MaxAbsScaler()
# scaler.fit(inputs)
# inputs = scaler.transform(inputs)

inputs = np.array(inputs)
outputs = np.array(outputs)

In [None]:
# for randomize
tf.random.set_seed(1337)

num_inputs = len(inputs)
randomize = np.arange(num_inputs)
np.random.shuffle(randomize)


inputs = inputs[randomize]
outputs = outputs[randomize]

In [25]:

model = tf.keras.Sequential()
# model.add(tf.keras.Input(shape = inputs_train.shape[1]))
# model.add(tf.keras.layers.SimpleRNN(100, return_sequences = False, input_shape = (inputs_train.shape[1], ) ))
model.add(tf.keras.layers.SimpleRNN(10, input_shape = (1,4), return_sequences = False))
# model.add(tf.keras.layers.LSTM(20, input_shape=(1,2))) # (timestep, feature) 
# model.add(tf.keras.layers.Dense(10, activation='relu'))
# model.add(tf.keras.layers.Dropout(rate=.5))
# model.add(tf.keras.layers.Dense(100, activation='relu'))
# model.add(tf.keras.layers.Dropout(rate=.5))
# model.add(tf.keras.layers.Dense(10, activation='relu'))
# model.add(tf.keras.layers.Dropout(rate=.1))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(5, activation='softmax')) # softmax, sigmoid

# model.compile(optimizer='adam', loss='mae', metrics=['mae']) # mse / mae / acc
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['acc'])
# model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=[tf.keras.metrics.CategoricalAccuracy()])

model.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
simple_rnn_5 (SimpleRNN)     (None, 10)                150       
_________________________________________________________________
flatten_4 (Flatten)          (None, 10)                0         
_________________________________________________________________
dense_5 (Dense)              (None, 5)                 55        
Total params: 205
Trainable params: 205
Non-trainable params: 0
_________________________________________________________________


In [26]:
# inputs_train = np.reshape(inputs_train, (inputs_train.shape[0], 1, inputs_train.shape[1]))
# outputs_train = np.reshape(outputs_train, (outputs_train.shape[0], 1, outputs_train.shape[1]))

# history = model.fit(inputs_train, outputs_train, epochs=1, batch_size=1, validation_data=(inputs_validate, outputs_validate))
history = model.fit(inputs_train, outputs_train, epochs=10, batch_size=10)

# history = model.fit(inputs_train, outputs_train, epochs=10, batch_size=1)


Train on 33865 samples
Epoch 1/10
   10/33865 [..............................] - ETA: 1:35:22

UnimplementedError:  Cast string to float is not supported
	 [[node metrics/acc/Cast (defined at /Users/peter/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py:1751) ]] [Op:__inference_distributed_function_2878]

Function call stack:
distributed_function


In [None]:
outputs_predict = model.predict(inputs_test)

In [None]:
print(outputs_predict[1][1])

In [None]:
plt.scatter(outputs_test, outputs_predict, alpha=0.4)
plt.xlabel("Actual degree")
plt.ylabel("Predicted degree")
plt.title("DNN(Regression)")
plt.show()

# Feature 추가(mot_L, mot_R)

In [None]:
def chdeg(degree_original):
    degree = ((degree_original - 50)*1.5)//1
    if degree <= 0:
        left = 25
        right = 25 + abs(degree)
    else:
        left = 25 - abs(degree)
        right = 25
    return left, right

left = []
right = []
for i in range(len(df)):
    left.append(chdeg(df['degree'][i])[0])
    right.append(chdeg(df['degree'][i])[1])
    
df['mot_L'] = left
df['mot_R'] = right

In [None]:
sns.pairplot(df, diag_kind='hist')

plt.show()

In [None]:
scatter_matrix(df, alpha=0.5, figsize=(8, 8), diagonal='kde')

plt.show()

# Feature 추가(ir distance 차이, 비율)

### ir 좌우 차이

In [None]:
df['ir_R-L'] = df['ir_R'] - df['ir_L']
df['ir_L/R'] = df['ir_L'] / df['ir_R'] 
df

In [None]:
sns.pairplot(df, diag_kind='hist')

plt.show()

In [None]:
scatter_matrix(df, alpha=0.5, figsize=(8, 8), diagonal='kde')

plt.show()

# Except degree under 10 over 90 data

In [None]:
df.sort_values(by=['degree'])

In [None]:
df_s = df.loc[(10 < df['degree']) & (df['degree'] < 90), : ]

In [None]:
sns.pairplot(df_s, diag_kind='hist')

plt.show()

In [None]:
scatter_matrix(df_s, alpha=0.5, figsize=(8, 8), diagonal='kde')

plt.show()

# exclude degree over 50

In [None]:
df_d = df.loc[df['degree'] < 50, : ]
df_d

In [None]:
sns.pairplot(df_d, diag_kind='hist')

plt.show()

In [None]:
scatter_matrix(df_d, alpha=0.5, figsize=(8, 8), diagonal='kde')

plt.show()

# Time stamp 설정

In [None]:
# Time Stamp를 지정하여 그 안의 값들에 대해 대표값으로 데이터를 구성
# 대표값은 평균(averave), 중앙값(median), 최빈값(mode) 중 평균값과 중앙값을 우선적으로 테스트한다.

In [None]:
df

In [None]:
frame = 5
length = len(df)
df_frame = pd.DataFrame(columns=['ir_L', 'ir_R', 'degree', 'mot_L', 'mot_R', 'ir_R-L', 'ir_L/R'])
ir_L, ir_R, degree, mot_L, mot_R, ir_RL, ir_LR = [], [], [], [], [], [], []

for i in range(length//frame):
    a = i*frame
    b = (i+1)*frame
    ir_L.append(np.median(df['ir_L'][a:b]))
    ir_R.append(np.median(df['ir_R'][a:b]))
    degree.append(np.median(df['degree'][a:b]))
    mot_L.append(np.median(df['mot_L'][a:b]))
    mot_R.append(np.median(df['mot_R'][a:b]))
    ir_RL.append(np.median(df['ir_R-L'][a:b]))
    ir_LR.append(np.median(df['ir_L/R'][a:b]))
df_frame['ir_L'] = ir_L
df_frame['ir_R'] = ir_R
df_frame['degree'] = degree
df_frame['mot_L'] = mot_L
df_frame['mot_R'] = mot_R
df_frame['ir_R-L'] = ir_RL
df_frame['ir_L/R'] = ir_LR

In [None]:
df_frame

In [None]:
sns.pairplot(df_frame, diag_kind='hist')

plt.show()

In [None]:
scatter_matrix(df_frame, alpha=0.5, figsize=(8, 8), diagonal='kde')

plt.show()

In [None]:
import tensorflow as tf

# Original Data
df_inputs = df_frame.loc[:,['ir_L', 'ir_R', 'ir_R-L', 'ir_L/R']]
df_outputs = df_frame.loc[:,['degree']]

# Normalized Data
# df_inputs = df_n.loc[:,['ir_L', 'ir_R']]
# df_outputs = df_n.loc[:,['degree']]

inputs = np.array(df_inputs)
outputs = np.array(df_outputs)

num_data = len(inputs)
TRAIN_SPLIT = int(0.6 * num_data)
TEST_SPLIT = int(0.2 * num_data + TRAIN_SPLIT)

inputs_train, inputs_test, inputs_validate = np.split(inputs, [TRAIN_SPLIT, TEST_SPLIT])
outputs_train, outputs_test, outputs_validate = np.split(outputs, [TRAIN_SPLIT, TEST_SPLIT])



model = tf.keras.Sequential()
model.add(tf.keras.Input(shape = inputs_train.shape[1]))
model.add(tf.keras.layers.Dense(50, activation='relu'))
# model.add(tf.keras.layers.Dropout(rate=.2))
# model.add(tf.keras.layers.Dense(10, activation='relu'))
# model.add(tf.keras.layers.Dropout(rate=.2))

model.add(tf.keras.layers.Dense(1, activation='relu'))



# model.compile(optimizer='adam', loss='mae', metrics=['mae']) # mse / mae
model.compile(optimizer='adam', loss='mse', metrics=['mse']) # mse / mae
model.summary()

In [None]:
history = model.fit(inputs_train, outputs_train, epochs=1, batch_size=1, validation_data=(inputs_validate, outputs_validate))

In [None]:
outputs_predict = model.predict(inputs_test)

In [None]:
plt.scatter(outputs_test, outputs_predict, alpha=0.4)
plt.xlabel("Actual degree")
plt.ylabel("Predicted degree")
plt.title("DNN(Regression)")
plt.show()

In [None]:
ir_L = 18.8
ir_R = 2.7
ir_RL = ir_R - ir_L
ir_LR = ir_L / ir_R
data = [ir_L, ir_R, ir_RL, ir_LR]
data

In [None]:
model.predict([[data]])[0][0]

In [None]:
from train import Model

In [None]:
m = Model("../data/data.csv")
m.feature_engineering()
m.set_data()
m.train()

In [None]:
plt.scatter(m.outputs_test, m.outputs_predict, alpha=0.4)
plt.xlabel("Actual degree")
plt.ylabel("Predicted degree")
plt.title("DNN(Regression)")
plt.show()

In [None]:
m.collect_data()

In [None]:
from modicar import Car
import modi

In [None]:
bundle = modi.MODI()
car = Car()

mot = bundle.motors[0]
ir1 = bundle.irs[0]
ir2 = bundle.irs[1]
btn = bundle.buttons[0]
dial = bundle.dials[0]

In [None]:
car.learn()

In [None]:
car.collect_data(mot, ir1, ir2, btn, dial)

In [None]:
car.start(mot, ir1, ir2)