In [1]:
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
import pandas as pd
import torch
import numpy as np
import matplotlib.pyplot as plt
import os
import csv
import gym
import shutil

data_collect_file='Pendulum-v0_collect.csv'
data_train_file='Pendulum-v0_train.csv'
csv_file = os.path.join(data_collect_file)
openedfile=open(csv_file, 'w')
csv_writer = csv.writer(openedfile)
csv_writer.writerow(['action','cos_theta_old','sin_theta_old','theta_dot_old','cos_theta','sin_theta','theta_dot'])
env = gym.make('Pendulum-v0')
obs_old=np.zeros(3,)
state_size=4
action_size=3
prediction_model=None


HIDDEN1_UNITS=256
HIDDEN2_UNITS=128
HIDDEN3_UNITS=64
HIDDEN4_UNITS=32


class cnn():
    def __init__(self, data_file,state_size=4,action_size=3,Testing=False):
     self.testing=Testing
     self.data= pd.read_csv(data_file)
     self.data['action']=(self.data['action']+2)/4
     self.data['cos_theta_old']=(self.data['cos_theta_old']+1)/2
     self.data['cos_theta']=(self.data['cos_theta']+1)/2
     self.data['sin_theta']=(self.data['sin_theta']+1)/2
     self.data['sin_theta_old']=(self.data['sin_theta_old']+1)/2
     self.data['theta_dot']=(self.data['theta_dot']+8)/16
     self.data['theta_dot_old']=(self.data['theta_dot_old']+8)/16
     #print('data is:',self.data)
     self.model = Net(state_size, action_size).float()
     self.optim = Adam(self.model.parameters(), lr=0.03)
     self.criterion= nn.MSELoss()
     
    def train(self,epoch) :
     batch_size=32
     data_len=self.data.shape[0]
     print('Size of data:',data_len)
     loss_track=[]
     running_loss = 0.0
     x=self.data[['action',"cos_theta_old","sin_theta_old","theta_dot_old"]].values
     y=self.data[["cos_theta","sin_theta","theta_dot"]].values
     for epo in range(epoch):
       for i in range(int(data_len/10)):
        rand_ids = np.random.randint(0, data_len, batch_size)
        #print("ids are ",rand_ids)
        self.optim.zero_grad()
        inputs, labels = x[rand_ids],y[rand_ids]
        #print("Inputs are:",inputs)
        #print("labels are:",labels)
        inputs=torch.from_numpy(inputs)
        labels=torch.from_numpy(labels).float()
        #print("type of imput",type(inputs))
        
        # forward + backward + optimize
        outputs = self.model(inputs.float())
        loss = self.criterion(outputs, labels)
        #print('loss is:',loss)
        loss.backward()
        self.optim.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.8f' %(epo + 1, i + 1, running_loss))
            loss_track.append(running_loss)
            running_loss = 0.0
     #plt.figure()       
     #plt.plot(loss_track,label='loss')
     #plt.grid()
     #plt.title('Displacement Value on stream of image data')
     #plt.legend(loc="upper left")
     #plt.show()
     
def predict(model,data):
 #print('Data is:',data)
 data[0]=(data[0]+2)/4
 data[1]=(data[1]+1)/2
 data[2]=(data[2]+1)/2
 data[3]=(data[3]+8)/16
 test_data=torch.from_numpy(data).float()
 out=model(test_data)     
 return out.detach().numpy()
         
        
class Net(nn.Module):
    def __init__(self, state_size, action_size):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(state_size, HIDDEN1_UNITS)
        self.fc2 = nn.Linear(HIDDEN1_UNITS, HIDDEN2_UNITS)
        self.fc3 = nn.Linear(HIDDEN2_UNITS, HIDDEN3_UNITS)
        self.fc4 = nn.Linear(HIDDEN3_UNITS, HIDDEN4_UNITS)
        self.fc5 = nn.Linear(HIDDEN4_UNITS, action_size)
    
    def forward(self, x):
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.relu(x)
        x = self.fc3(x)
        x = F.relu(x)
        x = self.fc4(x)
        x = F.relu(x)
        x = self.fc5(x)
        return x



Running simulation with mass: 1.0


In [2]:
def datacollection():
  global obs_old
  env.reset() 
  for _ in range(1000):
    #env.render()
    #select a random action in range of permissible action (+2 to -2)
    action=env.action_space.sample()
    obs,reward,done,_=env.step(action) # take a random action and observe the state of system 
    # obs gives three values=> [cos(theta),sin(theta),theta_dot] range +-(1,1,8)
    csv_writer.writerow([round(action[0],3),round(obs_old[0],3),round(obs_old[1],3),round(obs_old[2],3),round(obs[0],3),round(obs[1],3),round(obs[2],3)])
    #print(" obs:",obs,"obs_old",obs_old)
    obs_old=obs
  return 1
    
    

In [3]:
def trainnn(epo):
 neuralnet=cnn(data_train_file)
 neuralnet.train(epo)
 torch.save(neuralnet.model.state_dict(), "./sm_train.pt")
 return 1

#This function run synchronously
def copyfile(source,dest):
    shutil.copyfile(source, dest)  

def loaddata():
 global prediction_model
 device = torch.device('cpu')
 prediction_model=Net(state_size, action_size).float()
 prediction_model.load_state_dict(torch.load("./sm_test.pt", map_location=device))

In [4]:
def test(sim_steps,d_step): 
 env = gym.make('Pendulum-v0')
 env.reset() 
 pend_gym= []
 pend_sm=[]
 obs_old=np.zeros(3,)
 for k in range(sim_steps):
    #env.render()
    #select a random action in range of permissible action (+2 to -2)
    action=env.action_space.sample()
    action=action.astype('double')
    #action=(action+2)/4
    #print("type of action:",type(action))
    obs,reward,done,_=env.step(action) # take a random action and observe the state of system 
    if k%d_step==0:
     obs_old=obs
    y_predict = predict(prediction_model,np.concatenate((action,obs_old),axis=0))
    pend_gym.append(obs)
    pend_sm.append(y_predict)
    y_predict[0]=(y_predict[0]*2)-1
    y_predict[1]=(y_predict[1]*2)-1
    y_predict[2]=(y_predict[2]*16)-8
    obs_old=y_predict
    
 df_gym=pd.DataFrame(pend_gym,columns=['cos','sin','theta_dot'])
 df_sm=pd.DataFrame(pend_sm,columns=['cos','sin','theta_dot'])

 plt.figure(figsize=(10,4))
 plt.plot(df_gym["theta_dot"],label='from gym')
 plt.plot(df_sm["theta_dot"],label='from sm')
 plt.grid()
 plt.title('Displacement Value on stream of image data')
 plt.xlabel('image_frame_number')
 plt.ylabel('Disp Value')
 plt.legend(loc="upper left")
 plt.show()

 plt.figure(figsize=(10,4))
 plt.plot(df_gym["cos"],label='from gym')
 plt.plot(df_sm["cos"],label='from sm')
 plt.grid()
 plt.title('Displacement Value on stream of image data')
 plt.xlabel('image_frame_number')
 plt.ylabel('Disp Value')
 plt.legend(loc="upper left")
 plt.show()

 plt.figure(figsize=(10,4))
 plt.plot(df_gym["sin"],label='from gym')
 plt.plot(df_sm["sin"],label='from sm')
 plt.grid()
 plt.title('Displacement Value on stream of image data')
 plt.xlabel('image_frame_number')
 plt.ylabel('Disp Value')
 plt.legend(loc="upper left")
 plt.show()


In [None]:
for i in range(1,100):
 print('In iteration:',i)
 datacollection() # async
 trainnn(1)       #async
#s
 copyfile( "./sm_train.pt", "./sm_test.pt")
 copyfile( "./Pendulum-v0_collect.csv", "./Pendulum-v0_train.csv")
#intermittently load:
 loaddata()
#continuously test
test(1000,20)

In iteration: 1
Size of data: 870
In iteration: 2
Size of data: 871
In iteration: 3
Size of data: 1923
In iteration: 4
Size of data: 2983
In iteration: 5
Size of data: 3845
In iteration: 6
Size of data: 4892
In iteration: 7
Size of data: 5962
In iteration: 8
Size of data: 6842
In iteration: 9
Size of data: 7890
In iteration: 10
Size of data: 8932
In iteration: 11
Size of data: 9993
In iteration: 12
Size of data: 10861
In iteration: 13
Size of data: 11910
In iteration: 14
Size of data: 12966
In iteration: 15
Size of data: 13843
In iteration: 16
Size of data: 14884
In iteration: 17
Size of data: 15948
In iteration: 18
Size of data: 16999
In iteration: 19
Size of data: 17862
In iteration: 20
Size of data: 18922
In iteration: 21
Size of data: 19962
In iteration: 22
Size of data: 20839
[1,  2000] loss: 7.73735417
In iteration: 23
Size of data: 21895
[1,  2000] loss: 7.17660036
In iteration: 24
Size of data: 22939
