steven-ja
diff --git a/‎assets/images/sections/skills/svelte.png
16.1 KB b/‎assets/images/sections/skills/svelte.png
16.1 KB
diff --git a/‎content/posts/machine learning/RL/ppo.ipynb
+320 b/‎content/posts/machine learning/RL/ppo.ipynb
+320
diff --git a/‎content/posts/machine learning/RL/ppo.py
+116 b/‎content/posts/machine learning/RL/ppo.py
+116
diff --git a/‎data/en/author.yaml
+1-1 b/‎data/en/author.yaml
+1-1
diff --git a/‎data/en/sections/skills.yaml
+6 b/‎data/en/sections/skills.yaml
+6
diff --git a/‎public/images/sections/skills/svelte_hub16989d5432f747a495481e59dd94a22_16448_24x24_fit_box_3.png
442 Bytes b/‎public/images/sections/skills/svelte_hub16989d5432f747a495481e59dd94a22_16448_24x24_fit_box_3.png
442 Bytes
diff --git a/‎public/index.html
+24-1 b/‎public/index.html
+24-1
@@ -0,0 +1,116 @@
+import gymnasium as gym
+import torch
+import numpy as np
+import torch.nn as nn
+import torch.optim as optim
+from torch.utils.data import Dataset, DataLoader
+
+# Define the PPO agent
+class PPOAgent(nn.Module):
+    def __init__(self, state_dim, action_dim):
+        super(PPOAgent, self).__init__()
+        self.policy_network = nn.Sequential(
+            nn.Linear(state_dim, 128),
+            nn.ReLU(),
+            nn.Linear(128, 128),
+            nn.ReLU(),
+            nn.Linear(128, action_dim)
+        )
+        self.value_network = nn.Sequential(
+            nn.Linear(state_dim, 128),
+            nn.ReLU(),
+            nn.Linear(128, 128),
+            nn.ReLU(),
+            nn.Linear(128, 1)
+        )
+
+    def forward(self, state):
+        policy_output = self.policy_network(state)
+        value_output = self.value_network(state)
+        return policy_output, value_output
+
+# Define the priority network
+class PriorityNetwork(nn.Module):
+    def __init__(self, state_dim, action_dim):
+        super(PriorityNetwork, self).__init__()
+        self.priority_network = nn.Sequential(
+            nn.Linear(state_dim + action_dim + 1 + state_dim + 1, 128),
+            nn.ReLU(),
+            nn.Linear(128, 1)
+        )
+
+    def forward(self, experience):
+        priority_output = self.priority_network(experience)
+        return priority_output
+
+# Define the PPO trainer
+class PPOTrainer:
+
+    def __init__(self, agent, priority_network, gamma, lambda_, epsilon, c1, c2):
+        self.agent = agent
+        self.priority_network = priority_network
+        self.gamma = gamma
+        self.lambda_ = lambda_
+        self.epsilon = epsilon
+        self.c1 = c1
+        self.c2 = c2
+
+    def train(self, batch_size, epochs):
+        for epoch in range(epochs):
+            # Sample a batch of experiences from the replay buffer
+            batch_experiences = self.sample_batch(batch_size)
+
+            # Compute the TD-error for each experience in the batch
+            td_errors = []
+            for experience in batch_experiences:
+                state, action, reward, next_state, done = experience
+                td_error = reward + self.gamma * self.agent.value_network(next_state) - self.agent.value_network(state)
+                td_errors.append(td_error)
+
+            # Train the priority network
+            self.priority_network.train()
+            priority_optimizer = optim.Adam(self.priority_network.parameters(), lr=0.001)
+            priority_loss_fn = nn.MSELoss()
+            for experience, td_error in zip(batch_experiences, td_errors):
+                priority_optimizer.zero_grad()
+                priority_output = self.priority_network(experience)
+                loss = priority_loss_fn(priority_output, torch.tensor(td_error))
+                loss.backward()
+                priority_optimizer.step()
+
+            # Train the PPO agent
+            self.agent.train()
+            policy_optimizer = optim.Adam(self.agent.policy_network.parameters(), lr=0.001)
+            value_optimizer = optim.Adam(self.agent.value_network.parameters(), lr=0.001)
+            for experience in batch_experiences:
+                state, action, reward, next_state, done = experience
+                policy_optimizer.zero_grad()
+                value_optimizer.zero_grad()
+                policy_output, value_output = self.agent(state)
+                policy_loss = -torch.log(policy_output[action]) * reward
+                value_loss = (value_output - reward) ** 2
+                loss = policy_loss + value_loss
+                loss.backward()
+                policy_optimizer.step()
+                value_optimizer.step()
+
+    def sample_batch(self, batch_size):
+        # Sample a batch of experiences from the replay buffer
+        # This is a placeholder for the actual sampling logic
+        batch_experiences = []
+        for _ in range(batch_size):
+            batch_experiences.append(np.random.rand(6))  # state, action, reward, next_state, done
+        return batch_experiences
+
+# Create the Gym Car2D environment
+env =  gym.make("CarRacing-v2")
+
+# Create the PPO agent and priority network
+agent = PPOAgent(state_dim=env.observation_space.shape[0], action_dim=env.action_space.shape[0])
+priority_network = PriorityNetwork(state_dim=env.observation_space.shape[0], action_dim=env.action_space.shape[0])
+
+# Create the PPO trainer
+trainer = PPOTrainer(agent, priority_network, gamma=0.99, lambda_=0.95, epsilon=0.1, c1=0.5, c2=0.01)
+
+# Train the PPO agent
+trainer.train(batch_size=32, epochs=1000)
@@ -16,6 +16,6 @@ contactInfo:
 summary:
   - I am a Data Scientist
   - I am a Machine Learning Engineer
-  - I love Physics and Maths
+  - I studied Physics and Maths
   - I like open-source projects
   - I like challenges
@@ -68,3 +68,9 @@ skills:
 - name: C++
   logo: /images/sections/skills/c++.png
   summary: "Know basic C/C++ programming. I used often to accelerate python computational time"
+
+- name: Svelte
+  logo: /images/sections/skills/svelte.png
+  summary: "Learned Svelte in order to build fast and robust server-side webapps."
+
+
@@ -324,7 +324,7 @@ <h1 class="greeting"> Hi, I am Stefano</h1>
 
           <li>I am a Machine Learning Engineer</li>
 
-          <li>I love Physics and Maths</li>
+          <li>I studied Physics and Maths</li>
 
           <li>I like open-source projects</li>
 
@@ -889,6 +889,29 @@ <h5 class="card-title">C&#43;&#43;</h5>
 </div>
 
 
+        <div class="col-xs-12 col-sm-6 col-lg-4 pt-2">
+  <a class="text-decoration-none" >
+    <div class="card">
+      <div class="card-head d-flex">
+        
+          
+          
+          
+            
+          
+        
+          <img class="card-img-xs" src="/images/sections/skills/svelte_hub16989d5432f747a495481e59dd94a22_16448_24x24_fit_box_3.png" alt="Svelte" />
+          <h5 class="card-title">Svelte</h5>
+        
+      </div>
+      <div class="card-body">
+        <p class="card-text">Learned Svelte in order to build fast and robust server-side webapps.</p>
+      </div>
+    </div>
+  </a>
+</div>
+
+      
     </div>
   </div>
 </div>