In [1]:
import numpy as np
import random
import pandas as pd

import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Activation
from tensorflow.keras.optimizers import Adam

# Generate random wind following bronian mouvement 

In [2]:
class Wind:
    def __init__(self, A=np.array([[0.9,0.2],[1,1.1]]), sigma=5, _V0=np.zeros(2)):
        '''Parameters:
        dV = - A * Vt * dt + sigma * dW
        A is a 2x2 matrix
        V0 with shape of (2,)
        '''
        self.A = A
        self.sigma = sigma
        self.V0 = _V0
        
    def generate(self, n=100, T=10):
        '''Parameters:
        Generating the wind variable
        
        return an array of (100,2), which represents V1 and V2 from t=0 to t=T.
        '''
        dt = T/n
        list_t = np.arange(n+1) * dt
        Vt = np.zeros((100,2))
        Vt[0,0] = self.V0[0]
        Vt[0,1] = self.V0[1]
        W1 = np.random.normal(scale=np.sqrt(dt),size=100)
        W2 = np.random.normal(scale=np.sqrt(dt),size=100)
        for i in range(1,n):
            V_tmp = self.A.dot(Vt[i-1])
            Vt[i,0] = Vt[i-1,0] - V_tmp[0] * dt + self.sigma * W1[i-1]
            Vt[i,1] = Vt[i-1,1] - V_tmp[1] * dt + self.sigma * W2[i-1]
        return Vt

In [3]:
X0 = np.array([0, 0], dtype=np.float32)
velocity_init = np.array([20, 20], dtype=np.float32)
delta_T = 0.1
g = np.array([0, -4]) # gravity acceleration
resistance = 0.01 # coefficient lambda
total_time = 10
D = 200

# Build model

In [17]:
class AngryBirdEnv:
    
    def __init__(self,X0 = np.array([0, 0], dtype=np.float32),
                 g=np.array([0, -4]),resistance=0.01,
                 D=200,deltaT=0.1,T_total=10,v0=np.array([20, 20], dtype=np.float32)):
        
        xavier=tf.keras.initializers.GlorotUniform()
        self.model = tf.keras.Sequential(name="Reseau_de_neurones")
        self.model.add(layers.Dense(units=24,input_shape=(3,),kernel_initializer="glorot_uniform"))
        self.model.add(layers.ReLU())
        self.model.add(layers.Dense(units=12,kernel_initializer="glorot_uniform"))
        self.model.add(layers.ReLU())
        self.model.add(layers.Dense(units=6,kernel_initializer="glorot_uniform"))
        self.model.add(layers.ReLU())
        self.model.add(layers.Dense(units = 2, kernel_initializer="glorot_uniform"))
        self.train_op = tf.keras.optimizers.Adam(learning_rate=1e-5)
        
        self.X0 = X0
        self.g = g # gravity acceleration
        self.resistance = resistance # coefficient lambda
        self.D = D
        self.deltaT = deltaT
        self.T_total = T_total
        # the initial velocity
        self.v0 = v0
        # generate wind
        self.wind_generator = Wind()
    
    # Running the model and give the control
    def run(self,time, X):
        #observation = np.array([time, X[0], X[1]]).reshape(1,-1)
        
        flatten_X = np.array(X).flatten()
        data_in = np.concatenate((flatten_X, [time])).reshape(1,-1)

        return self.model(data_in)

    
    # using the physics and give the next position
    def next_step(self,current_time,current_position,control,wind):
        '''
        current_time = eg. 1.2 1.3 ..
        control = np.array(2,)
        wind : wind variable at current time, np.array(2,)
        '''
        
        deltaT = self.deltaT
        g = self.g
        resistance = self.resistance
        v0 = self.v0
        
        deltaX = v0 * deltaT + g * ((current_time+deltaT)**2 - current_time**2) / 2 - resistance * current_position * deltaT + wind * deltaT + control * deltaT
        
        return current_position + deltaX
        
    def process(self,wind):
        X = self.X0
        loss_u = 0
        for i in range(100):
            if i%10 == 0:
                control = self.run(i/10,X)[0]
                loss_u += np.linalg.norm(control,ord=2)**2
            X = self.next_step(i/10,X,control,wind[i])
        loss = loss_u + self.loss_position(X)
        
        return loss,X,loss_u

    
    def loss_position(self, position):
        """
        Fonction de coût terminale

        Parameters
        ----------
        position: arr (2,)

        Returns
        -------
        float
        """
        D = self.D
        u1 = ((position[0] - D) - position[1]) / np.sqrt(2)
        u2 = ((position[0] - D) + position[1]) / np.sqrt(2)
        u3 = position[0] + position[1] - (D-15)
#         return tf.math.square(u1 + u1 * (u1>0)) + tf.math.square(u2) + tf.math.square(u3*(u3<0))a
        
        return (u1 + 3 * np.maximum(u1, 0))**2 + u2**2 + 5*(np.maximum(-u3, 0))**2
    
      
    # get gradients
    def get_grad(self, wind):
        with tf.GradientTape() as tape:
            tape.watch(self.model.variables)
            L,_,_ = self.process(wind)
            gradient = tape.gradient(L, self.model.trainable_variables)
        return gradient
    

      
    # perform gradient descent
    def network_learn(self, epochs):
        loss_train = []
        for epoch in range(epochs):
            wind = self.wind_generator.generate()
            loss,_,_ = self.process(wind)
            gradient = self.get_grad(wind)
            self.train_op.apply_gradients(zip(gradient, self.model.trainable_variables))
            loss_train.append(loss)
            print("{} epoch finished".format(epoch))
            print("---------------------------------")
        self.loss_train = loss_train
            
    def predict(self,num=1000):
        loss_total = []
        loss_u_total = 0
        X_average = np.zeros(shape=(2,))
        for _ in range(num):
            wind = self.wind_generator.generate()
            loss,X,loss_u = self.process(wind)
        #print(loss_control)
        #print(X)
        #print(self.loss_position(float(X)))
        #print(type(X[0]))
            loss_total.append(loss)
            X_average += X
            loss_u_total += loss_u
        
        self.loss = np.mean(loss_total)
        print('Loss average is :',self.loss)
        print('Semi-larger for IC :',1.96*np.var(loss_total)/np.sqrt(num))
        print('Loss control average is:', loss_u_total/num)
        print('Final position average is:',X_average/num)
            

In [18]:
env = AngryBirdEnv()

In [19]:
env.network_learn(epochs=800)

0 epoch finished
---------------------------------
1 epoch finished
---------------------------------
2 epoch finished
---------------------------------
3 epoch finished
---------------------------------
4 epoch finished
---------------------------------
5 epoch finished
---------------------------------
6 epoch finished
---------------------------------
7 epoch finished
---------------------------------
8 epoch finished
---------------------------------
9 epoch finished
---------------------------------
10 epoch finished
---------------------------------
11 epoch finished
---------------------------------
12 epoch finished
---------------------------------
13 epoch finished
---------------------------------
14 epoch finished
---------------------------------
15 epoch finished
---------------------------------
16 epoch finished
---------------------------------
17 epoch finished
---------------------------------
18 epoch finished
---------------------------------
19 epoch finished
----

158 epoch finished
---------------------------------
159 epoch finished
---------------------------------
160 epoch finished
---------------------------------
161 epoch finished
---------------------------------
162 epoch finished
---------------------------------
163 epoch finished
---------------------------------
164 epoch finished
---------------------------------
165 epoch finished
---------------------------------
166 epoch finished
---------------------------------
167 epoch finished
---------------------------------
168 epoch finished
---------------------------------
169 epoch finished
---------------------------------
170 epoch finished
---------------------------------
171 epoch finished
---------------------------------
172 epoch finished
---------------------------------
173 epoch finished
---------------------------------
174 epoch finished
---------------------------------
175 epoch finished
---------------------------------
176 epoch finished
---------------------------

314 epoch finished
---------------------------------
315 epoch finished
---------------------------------
316 epoch finished
---------------------------------
317 epoch finished
---------------------------------
318 epoch finished
---------------------------------
319 epoch finished
---------------------------------
320 epoch finished
---------------------------------
321 epoch finished
---------------------------------
322 epoch finished
---------------------------------
323 epoch finished
---------------------------------
324 epoch finished
---------------------------------
325 epoch finished
---------------------------------
326 epoch finished
---------------------------------
327 epoch finished
---------------------------------
328 epoch finished
---------------------------------
329 epoch finished
---------------------------------
330 epoch finished
---------------------------------
331 epoch finished
---------------------------------
332 epoch finished
---------------------------

470 epoch finished
---------------------------------
471 epoch finished
---------------------------------
472 epoch finished
---------------------------------
473 epoch finished
---------------------------------
474 epoch finished
---------------------------------
475 epoch finished
---------------------------------
476 epoch finished
---------------------------------
477 epoch finished
---------------------------------
478 epoch finished
---------------------------------
479 epoch finished
---------------------------------
480 epoch finished
---------------------------------
481 epoch finished
---------------------------------
482 epoch finished
---------------------------------
483 epoch finished
---------------------------------
484 epoch finished
---------------------------------
485 epoch finished
---------------------------------
486 epoch finished
---------------------------------
487 epoch finished
---------------------------------
488 epoch finished
---------------------------

626 epoch finished
---------------------------------
627 epoch finished
---------------------------------
628 epoch finished
---------------------------------
629 epoch finished
---------------------------------
630 epoch finished
---------------------------------
631 epoch finished
---------------------------------
632 epoch finished
---------------------------------
633 epoch finished
---------------------------------
634 epoch finished
---------------------------------
635 epoch finished
---------------------------------
636 epoch finished
---------------------------------
637 epoch finished
---------------------------------
638 epoch finished
---------------------------------
639 epoch finished
---------------------------------
640 epoch finished
---------------------------------
641 epoch finished
---------------------------------
642 epoch finished
---------------------------------
643 epoch finished
---------------------------------
644 epoch finished
---------------------------

781 epoch finished
---------------------------------
782 epoch finished
---------------------------------
783 epoch finished
---------------------------------
784 epoch finished
---------------------------------
785 epoch finished
---------------------------------
786 epoch finished
---------------------------------
787 epoch finished
---------------------------------
788 epoch finished
---------------------------------
789 epoch finished
---------------------------------
790 epoch finished
---------------------------------
791 epoch finished
---------------------------------
792 epoch finished
---------------------------------
793 epoch finished
---------------------------------
794 epoch finished
---------------------------------
795 epoch finished
---------------------------------
796 epoch finished
---------------------------------
797 epoch finished
---------------------------------
798 epoch finished
---------------------------------
799 epoch finished
---------------------------

In [20]:
env.predict(num=1000)

Loss average is : 6267.208
Semi-larger for IC : 7673535.217242098
Loss control average is: 233.04297474880505
Final position average is: tf.Tensor([206.79648   -7.993649], shape=(2,), dtype=float32)


# Export the model

In [89]:
path_to_dir = "savedModel"
env.model.save(path_to_dir, save_format='h5')



In [90]:
model = tf.keras.models.load_model(path_to_dir)



In [93]:
env.model(np.array([4, 40.5, 10.8]).reshape(1,-1))[0]

<tf.Tensor: shape=(2,), dtype=float32, numpy=array([ 3.0025268, -0.5165669], dtype=float32)>

In [94]:
#compare with above result
model(np.array([4, 40.5, 10.8]).reshape(1,-1))[0]

<tf.Tensor: shape=(2,), dtype=float32, numpy=array([ 3.0025268, -0.5165669], dtype=float32)>

## Test the performance of loaded model

In [None]:
X0 = np.array([0, 0], dtype=np.float32)
v0 = np.array([20, 20], dtype=np.float32)
deltaT = 0.1
g = np.array([0, -4]) # gravity acceleration
resistance = 0.01 # coefficient lambda
total_time = 10
D = 200

In [95]:
def process_perf(model, wind):
    X = X0
    loss_u = 0
    for i in range(100):
        if i%10 == 0:
            control = model(np.array([X[0], X[1], i/10]).reshape(1,-1))[0]
            loss_u += np.linalg.norm(control,ord=2)**2
        X = next_step(i/10,X,control,wind[i])
    loss = loss_u + loss_position_perf(X)

    return loss,X,loss_u

In [81]:
def next_step(current_time,current_position,control,wind):
    '''
    current_time = eg. 1.2 1.3 ..
    control = np.array(2,)
    wind : wind variable at current time, np.array(2,)
    '''

    deltaX = v0 * deltaT + g * ((current_time+deltaT)**2 - current_time**2) / 2 - resistance * current_position * deltaT + wind * deltaT + control * deltaT

    return current_position + deltaX

In [82]:
def loss_position_perf(position):
    """
    Fonction de coût terminale

    Parameters
    ----------
    position: arr (2,)

    Returns
    -------
    float
    """
    u1 = ((position[0] - D) - position[1]) / np.sqrt(2)
    u2 = ((position[0] - D) + position[1]) / np.sqrt(2)
    u3 = position[0] + position[1] - (D-15)

    return (u1 + np.maximum(u1, 0))**2 + u2**2 + (np.maximum(-u3, 0))**2

In [96]:
wind_generator = Wind()
num = 600
loss_total = 0
loss_u_total = 0
X_average = np.zeros(shape=(2,))
for _ in range(num):
    wind = wind_generator.generate()
    loss,X,loss_u = process_perf(model3, wind)
    loss_total += loss
    X_average += X
    loss_u_total += loss_u

print('Loss average is :',loss_total/num)
print('Loss control average is:', loss_u_total/num)
print('Final position average is:',X_average/num)

Loss average is : tf.Tensor(735.63446, shape=(), dtype=float32)
Loss control average is: 183.53340372393131
Final position average is: tf.Tensor([195.1349     7.984869], shape=(2,), dtype=float32)
