In [25]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
import numpy as np
import os
import re
import heapq
import sys

DYNACARDS_PATH = "datasets"

def load_dynacards_data(dynacards_path=DYNACARDS_PATH):
    # Load dataset from csv file.
    csv_path = os.path.join(dynacards_path, "tagged_dynacards_good.csv") 
    return pd.read_csv(csv_path)

def tokens(string):
    result = []
    for t in re.split('[\[\]\s]\s*', str(string)):
        if(t):
            result.append(float(t))
    return result

def get_min_max(data):  
    min_value, max_value = sys.float_info.max, sys.float_info.min
    for e in data:
        for m in tokens(e):
            min_value = min(m, min_value)
            max_value = max(m, max_value)               
    return min_value, max_value


def feature_scaling(nmin, nmax, arr):
    if (nmax == nmin):
        return None
    return [(e - nmin)/(nmax - nmin) for e in arr ]


def scale(data, df, col_name):
    min_value, max_value = get_min_max(data)
    result = []
    for e in data:
        result.append(feature_scaling(min_value, max_value, tokens(e)))
    df[col_name] = result

    
df = load_dynacards_data()
w_df = pd.DataFrame()
train_set, test_set = train_test_split(df, test_size=0.2, random_state=42)
scale(train_set['load'], w_df, 'load')
scale(train_set['position'], w_df, 'position')
w_df.to_csv('output.csv', index=False)
    