In [1]:
import pandas as pd
import plotly.express as px
from time import sleep
import ipywidgets as widgets
from rocket import *

In [3]:
class Control():
    def __init__(self,env):
        self.env = env
        self.hoverslam_params = [4.23279606 ,-0.2, -5.25 ,  39.30850588 ,0, -7.83525488]
        self.reset()
        
    def reset(self):
        self.errint_alt,self.errint_ang = 0,0
        self.rocket_state = []
        self.pid_state = []
        self.hoverslam = False
        
    def pid(self,state,params):
        #params
        kp_alt, ki_alt, kd_alt, kp_ang, ki_ang, kd_ang = params
        ki_max = 1.17
        #kp_ang, ki_ang, kd_ang = 39.30850588 ,0, -7.83525488
        #kp_alt,ki_alt,kd_alt = 4.23279606 ,-0.2, -5.1
        #ki_alt=-0.05
        # Calculate setpoints (target values)
        if state[1]>0.6:
            self.hoverslam=True
            
        if self.hoverslam:
            kp_alt, ki_alt, kd_alt, kp_ang, ki_ang, kd_ang = self.hoverslam_params
            alt_tgt = -1.45 # malku poveke za hover slam 
            ang_tgt = (.02*np.pi)*(state[0]+state[7])
        else:
            alt_tgt = 0.8
            ang_tgt = 0
            
        # Calculate error values
        alt_error = (alt_tgt - state[1])
        ang_error = (ang_tgt - state[2])
        self.errint_alt += ki_alt * alt_error / 60

        # Use PID to get adjustments
        alt_adj = kp_alt*alt_error + kd_alt*state[8] -  np.clip(self.errint_alt, 0,ki_max)
        ang_adj = kp_ang*ang_error + kd_ang*state[9]
       
        if checkbox.value:
            self.pid_state.append([alt_error,kp_alt*alt_error,kd_alt*state[8],np.clip(self.errint_alt, 0,ki_max),alt_adj,
                                   ang_error,ang_adj])
        if self.hoverslam:
            a = np.array([-0.5*ang_adj,alt_adj,ang_adj])
        else:
            a = np.array([ang_adj,alt_adj,0])
        a = np.clip(a, -1, +1)

        # If the legs are on the ground we made it, kill engines ne raboti oti ima delay 
        if(state[3] or state[4]):
            a = [0,-1,0]   

        return a

    def run(self,params, verbose=False):
        """ runs an episode given pid parameters """
        done = False
        self.reset()
        state = self.env.reset()
        if verbose:
            self.env.render()
            sleep(.005)
        self.rocket_state.append(state)
        total = 0
        while not done:
            a = self.pid(state,params)
            state,reward,done,_ = self.env.step(a)
            #vnesi greska
            if self.env.stepnumber in np.random.randint(50,400,3):
                self.env.lander.angularVelocity = (0.3) * np.random.uniform(-1, 1)
            total += reward
            if verbose:
                self.env.render()
                sleep(.005)
            self.rocket_state.append(state)
        return total

    def optimize(self,params, current_score, step):
        """ runs a step of randomized hill climbing """

        # add gaussian noise (less noise as n_steps increases)
        test_params = params + np.random.normal(0,5.0/step,size=params.shape)

        # test params over 5 trial avg
        scores = []
        for trial in range(5):
            score = self.run(test_params)
            scores.append(score)
        avg = np.mean(scores)

        # update params if improved
        if avg > current_score:
            return test_params,avg
        else:
            return params,current_score
        
    def debug(self):
        states = np.array(self.rocket_state).reshape(len(self.rocket_state),-1)
        f = pd.DataFrame({ 'x' : states[:,0],
                           'y' : states[:,1],
                           'theta' : states[:,2],
                           'throtole' : states[:,5],
                           'gimbal' : states[:,6],
                           'vx' : states[:,7],
                           'vy' : states[:,8],
                           'vtheta' : states[:,9]})
                      
        fig = px.line(f)
        fig.show()
        states = np.array(self.pid_state).reshape(len(self.pid_state),-1)
        f = pd.DataFrame({ 'alt_err' : states[:,0],
                           'alt_kp' : states[:,1],
                           'alt_kd' : states[:,2],
                           'alt_ki' : states[:,3],
                           'alt_adj' : states[:,4],
                           'ang_err' : states[:,5],
                           'ang_adj' : states[:,6]
                         })
                      
        fig = px.line(f)
        fig.show()
        return

In [4]:
#params = [4.23279606 ,-0.2, -5.25 ,  39.30850588 ,0, -7.83525488] # best so far za sketanje
params = [4.23279606 ,0, -4.75 ,29.1,0,52.4]#za letanje 

def simulate(button):
    params = np.array([widget_params[0].value, widget_params[1].value, widget_params[2].value,
                       widget_params[3].value, widget_params[4].value, widget_params[5].value])
    cnt = Control(RocketLander())
    cnt.env.maxstep = 2000
    cnt.env.seed()
    cnt.env.ypos = 0.1
    score = cnt.run(params,True)
    cnt.env.close()
    print ("Score:",score)
    if checkbox.value:
        cnt.debug()
        
widget_params = [widgets.FloatSlider(
    value=params[i],
    min=-40,
    max=60,
    step=0.1,
    description=name,
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='.2f'
)for i,name in enumerate(['Kp','Ki','Kd','Kp','Ki','Kd'])]
button = widgets.Button(
    description='Start',
    disabled=False,
    button_style='',
    tooltip='Click me',
    icon='check'
)
checkbox = widgets.Checkbox(
    value=False,
    description='Debug',
    disabled=False,
    indent=False
)
button.on_click(simulate)
center_layout = widgets.Layout(display='flex',flex_flow='column',align_items='center')
alt_box = widgets.VBox([widgets.Label("Altitude",layout = center_layout), widget_params[0], widget_params[1], widget_params[2]])
ang_box = widgets.VBox([widgets.Label("Anagle",layout = center_layout), widget_params[3], widget_params[4], widget_params[5]])
cnt_box = widgets.VBox([widgets.Label("Simulation"),button,checkbox])
widgets.HBox([alt_box, ang_box,cnt_box])

HBox(children=(VBox(children=(Label(value='Altitude', layout=Layout(align_items='center', display='flex', flex…

### Evristika za odbiranje na parametrite

In [None]:
params = np.array([4.23279606 ,-0.2, -5.1 ,  39.30850588 ,0, -7.83525488])
cnt = Control(RocketLander())
cnt.env.maxstep = 900
cnt.env.seed()
score = 1
for steps in range(51):
    params,score = cnt.optimize(params,score,steps+1)
    if steps%10 == 0:
        print( "Step:",steps,"Score:",score,"Params:",params)

# Get data for final run
scores = []
for trial in range(1):
    score = cnt.run(params,True)
    scores.append(score)
cnt.env.close()
print ("Average Score:",np.mean(scores))
cnt.debug()