# Abalone Age Prediction

![method](https://user-images.githubusercontent.com/28593767/112245885-1e390080-8c95-11eb-9dee-5f66bb94cbf8.png)

회귀 분석을 이용한 전복 나이 예측

In [1]:
import numpy as np
import pandas as pd
import csv

np.random.seed(777)

In [2]:
# Training hyper parameters
RAND_MEAN = 0
RAND_STD = 0.0030

LEARNING_RATE = 0.001

In [3]:
# Implement main_exec function
# epoch number, minibatch size, output report, training rate
def main_exec(epoch_count=10, mb_size=10, report=1, train_rate=0.8):
    load_dataset()   # Load data
    init_model()     # Initialize parameters
    train_and_test(epoch_count, mb_size, report, train_rate)   # Train and test

In [4]:
# Implement load_dataset function
def load_dataset():
    with open('data_nn/abalone.csv') as csv_file:
        csvreader = csv.reader(csv_file)
        next(csvreader, None)    # Skip the first row (column info)
        rows = []
        # Store csv data to empty list, rows
        for row in csvreader:
            rows.append(row)
    # Global variable (전역 변수)
    # Input vector size increases from 8 to 10 (One-hot vector)
    global data, input_cnt, output_cnt
    input_cnt, output_cnt = 10, 1    # Size of independant and dependant variables
    data = np.zeros([len(rows), input_cnt+output_cnt])   # Buffer
    
    # One-hot vector
    # I = [1,0,0], M = [0,1,0], F = [0,0,1]
    for n, row in enumerate(rows):
        if row[0] == 'I': data[n, 0] = 1
        if row[0] == 'M': data[n, 1] = 1
        if row[0] == 'F': data[n, 2] = 1
        data[n, 3:] = row[1:]    # For the rest, store data from enumerate

In [5]:
# data 출력 형태 확인 (원 핫 벡터 확인)

with open('data_nn/abalone.csv') as csvfile:
    csvreader = csv.reader(csvfile)
    next(csvreader, None)    # Skip the first row (column info)
    rows = []
    for row in csvreader:
        rows.append(row)
        
global data
data = np.zeros([len(rows), 11]) 

for n, row in enumerate(rows):
    if row[0] == 'I': data[n, 0] = 1
    if row[0] == 'M': data[n, 1] = 1
    if row[0] == 'F': data[n, 2] = 1
    data[n, 3:] = row[1:]  
print(data)

[[ 0.      1.      0.     ...  0.101   0.15   15.    ]
 [ 0.      1.      0.     ...  0.0485  0.07    7.    ]
 [ 0.      0.      1.     ...  0.1415  0.21    9.    ]
 ...
 [ 0.      1.      0.     ...  0.2875  0.308   9.    ]
 [ 0.      0.      1.     ...  0.261   0.296  10.    ]
 [ 0.      1.      0.     ...  0.3765  0.495  12.    ]]


In [6]:
# Implement parameters initializing function

def init_model():
    global weight, bias, input_cnt, output_cnt
    weight = np.random.normal(RAND_MEAN, RAND_STD, [input_cnt, output_cnt])
    bias = np.zeros([output_cnt])
    # print(weight)
    # print(bias)

In [7]:
# Implement training and testing function & Ouput training result

def train_and_test(epoch_count, mb_size, report, train_rate):
    step_count = arrange_data(mb_size, train_rate)  # Return how many steps in each minibatch
    test_x, test_y = get_test_data()                # Get x and y value from test data
    
    # Nested for-loop
    for epoch in range(epoch_count):
        losses, accs = [], []         # Store loss and accuracy of total minibatch (1 epoch)
        for n in range(step_count):
            # Return x and y value from train data from minibatch size and step count
            train_x, train_y = get_train_data(mb_size, n)
            loss, acc = run_train(train_x, train_y)
            losses.append(loss)
            accs.append(acc)
        
        if report > 0 and (epoch+1) % report == 0:
            acc = run_test(test_x, test_y)
            # format 5.3f : 소수점을 포함한 전체 자릿수.소수점 이하 자릿수
            print("Epoch {} : Train - loss = {:5.3f}. accuracy = {:5.3f} / Test = {:5.3f}".\
                  format(epoch+1, np.mean(losses), np.mean(accs), acc))
            
        final_acc = run_test(test_x, test_y)
        print("\n 최종 테스트 결과 : final accuracy = {:5.3f}.format(final_acc)")