# TRAINING AND TUNING MODEL

In [1]:
import pandas as pd
import numpy as np
from sklearn import neighbors
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor, KNeighborsClassifier
from sklearn.multioutput import MultiOutputRegressor
import joblib
import seaborn as sns
import matplotlib

In [2]:
train_data=pd.read_csv('train.csv')

In [3]:
data=train_data[['sat_id', 'epoch', 'x_sim','x', 'y_sim', 'y', 'z_sim', 'z', 'Vx_sim', 'Vx', 'Vy_sim', 'Vy', 'Vz_sim','Vz']]
data['xdelta']=data['x']-data['x_sim']
data['ydelta']=data['y']-data['y_sim']
data['zdelta']=data['z']-data['z_sim']
data['Vxdelta']=data['Vx']-data['Vx_sim']
data['Vydelta']=data['Vy']-data['Vy_sim']
data['Vzdelta']=data['Vz']-data['Vz_sim']
X=data[['x_sim', 'y_sim', 'z_sim', 'Vx_sim', 'Vy_sim', 'Vz_sim']]
y=data[['xdelta', 'ydelta', 'zdelta', 'Vxdelta', 'Vydelta', 'Vzdelta']]

In [4]:
X_train, X_valid, y_train, y_valid=train_test_split(X,y, train_size=0.99, random_state=0)

In [5]:
model=KNeighborsRegressor(n_neighbors=1, n_jobs=8)
#Training model
model.fit(X_train, y_train)

KNeighborsRegressor(algorithm='auto', leaf_size=30, metric='minkowski',
                    metric_params=None, n_jobs=8, n_neighbors=1, p=2,
                    weights='uniform')

In [6]:
#Save model in file
filename = 'finalized_model.sav'
joblib.dump(model, filename)

['finalized_model.sav']

# MAKE PREDICTION FOR SUBMISSION

In [7]:
test=pd.read_csv('test.csv')

In [8]:
test_data=test[['id','sat_id', 'x_sim', 'y_sim', 'z_sim', 'Vx_sim', 'Vy_sim', 'Vz_sim']]
test_data=pd.DataFrame(test_data)
X_test=test_data[['x_sim', 'y_sim', 'z_sim', 'Vx_sim', 'Vy_sim', 'Vz_sim']]

In [9]:
filename = 'finalized_model.sav'
loaded_model = joblib.load(filename)
predicted = loaded_model.predict(X_test)

In [10]:
predicted=pd.DataFrame(predicted).rename(columns={0:"x", 1:"y",2:"z",3:"Vx", 4:"Vy",5:"Vz"})

In [11]:
submission=pd.DataFrame(columns=['id', 'x', 'y', 'z', 'Vx', 'Vy', 'Vz'])
submission.loc[:,'id']=test.loc[:,'id']
submission.loc[:,'x']=test.loc[:,'x_sim']
submission.loc[:,'y']=test.loc[:,'y_sim']
submission.loc[:,'z']=test.loc[:,'z_sim']
submission.loc[:,'Vx']=test.loc[:,'Vx_sim']
submission.loc[:,'Vy']=test.loc[:,'Vy_sim']
submission.loc[:,'Vz']=test.loc[:,'Vz_sim']
submission['x']=submission['x']+1.33*predicted['x']
submission['y']=submission['y']+1.33*predicted['y']
submission['z']=submission['z']+1.33*predicted['z']
submission['Vx']=submission['Vx']+1.33*predicted['Vx']
submission['Vy']=submission['Vy']+1.33*predicted['Vy']
submission['Vz']=submission['Vz']+1.33*predicted['Vz']
submission=submission[['id', 'x', 'y', 'z', 'Vx', 'Vy', 'Vz']]
submission.to_csv("submission.csv", index=False)