Soal 1

Buatlah sebuah program sederhana (boleh dalam Python, Java, atau bahasa lain) yang merepresentasikan dua agen dengan karakteristik berikut:

• Agen A memiliki tujuan untuk mengirim pesan ke Agen B.

• Agen B harus bisa menerima pesan dan merespons balik.

• Implementasikan konsep otonomi (agen dapat berjalan tanpa intervensi manual
setelah diinisialisasi).

Tuliskan kode dan jelaskan bagian mana yang menunjukkan sifat agen (autonomous, reactive,social, proactive)!

In [None]:
import time
import random

# --- Agent Class Definition ---
class SimpleAgent:
    """
    A base class representing a simple agent with core characteristics.
    """
    def __init__(self, name):
        self.name = name
        # Autonomous: The agent has its own state and identity.
        self.state = "idle"
        self.conversation_log = []

    def _send_message(self, target_agent, content, msg_type="inform"):
        """
        Internal method for an agent to send a message.
        Social: This method facilitates communication with other agents.
        """
        message = {
            'sender': self.name,
            'receiver': target_agent.name,
            'content': content,
            'type': msg_type,
            'timestamp': time.time()
        }
        # Simulate sending the message by calling the target's receive method
        target_agent._receive_message(message)
        self.conversation_log.append(f"Sent to {target_agent.name}: {content}")

    def _receive_message(self, message):
        """
        Internal method for an agent to receive a message.
        Reactive: This method is triggered by an external event (receiving a message).
        Social: This method handles incoming communication.
        """
        self.conversation_log.append(f"Received from {message['sender']}: {message['content']}")
        print(f"[{self.name}] Received message: '{message['content']}' from {message['sender']}")

        # Social & Reactive: Respond based on the received message.
        if message['type'] == 'request':
            self._handle_request(message)
        elif message['type'] == 'inform':
            self._handle_inform(message)
        else:
            print(f"[{self.name}] Received unknown message type: {message['type']}")

    def _handle_request(self, message):
        """Handles incoming 'request' messages."""
        print(f"[{self.name}] Processing request: '{message['content']}'")
        # Simulate some processing time
        time.sleep(0.5)
        response_content = f"Request '{message['content']}' processed. Result: Success."
        # Correctly pass the sender agent object (Agent A) to respond back
        # We need to find the sender agent object somehow.
        # A better way is to pass the sender object in the message or have a global registry.
        # For simplicity here, we'll assume the sender is always agent_a if the message came from Agent A.
        # A more robust system would use a dictionary mapping names to agent objects.
        if message['sender'] == "Agent A":
            sender_agent = agent_a # Use the global reference to agent_a
        elif message['sender'] == "Agent B":
            sender_agent = agent_b # Use the global reference to agent_b
        else:
            print(f"[{self.name}] Error: Could not find sender agent for '{message['sender']}'.")
            return

        self._send_message(target_agent=sender_agent, content=response_content, msg_type="inform")

    def _handle_inform(self, message):
        """Handles incoming 'inform' messages."""
        print(f"[{self.name}] Acknowledged information: '{message['content']}'")
        # The agent could proactively decide to act on this information,
        # but for simplicity, it just acknowledges it here.
        # Example of potential proactivity:
        # if "urgent" in message['content'].lower():
        #     self.state = "prioritizing"
        #     self._send_message(..., "This is urgent, escalating!")

    def run(self):
        """
        The main loop representing the agent's autonomous operation.
        Autonomous: This method allows the agent to operate independently.
        Proactive: The agent can initiate actions within its loop.
        """
        print(f"[{self.name}] is starting its autonomous operation.")
        self.state = "running"

        # --- Agent A specific behavior ---
        if self.name == "Agent A":
            # Proactive: Agent A decides to initiate contact.
            print(f"[{self.name}] is proactively deciding to send an initial message.")
            time.sleep(1) # Simulate a short delay before sending
            self._send_message(target_agent=agent_b, content="Hello Agent B, this is a message from A.", msg_type="inform")

            # Agent A then waits for a response or performs other tasks
            # For this example, it just waits a bit and then sends another message
            time.sleep(2)
            self._send_message(target_agent=agent_b, content="How are you?", msg_type="request")

        # --- Agent B specific behavior ---
        elif self.name == "Agent B":
            # Agent B is primarily reactive, waiting for messages.
            # In a more complex system, it could also have proactive background tasks.
            print(f"[{self.name}] is ready to receive messages.")
            # Simulate waiting for messages by sleeping
            # In a real system, this would be an event loop or callback mechanism
            time.sleep(5)

        # Simulate the agent running for a short period
        time.sleep(1)
        self.state = "idle"
        print(f"[{self.name}] is finishing its current operation cycle.")

# --- Main Execution ---
if __name__ == "__main__":
    # Create instances of the agents
    agent_a = SimpleAgent("Agent A")
    agent_b = SimpleAgent("Agent B")

    print("--- Starting Multi-Agent Simulation ---")

    # Autonomous: Both agents are initialized and then run their own processes.
    # The main script doesn't need to manually trigger each step after initialization.
    # Run agents concurrently using simple sequential calls for this example.
    # In a real system, you might use threading or asyncio.
    agent_a.run()
    agent_b.run() # Agent B runs, but its main activity is triggered by Agent A's messages.

    print("\n--- Conversation Log for Agent A ---")
    for entry in agent_a.conversation_log:
        print(entry)
    print("\n--- Conversation Log for Agent B ---")
    for entry in agent_b.conversation_log:
        print(entry)

    print("\n--- Simulation Ended ---")

--- Starting Multi-Agent Simulation ---
[Agent A] is starting its autonomous operation.
[Agent A] is proactively deciding to send an initial message.
[Agent B] Received message: 'Hello Agent B, this is a message from A.' from Agent A
[Agent B] Acknowledged information: 'Hello Agent B, this is a message from A.'
[Agent B] Received message: 'How are you?' from Agent A
[Agent B] Processing request: 'How are you?'
[Agent A] Received message: 'Request 'How are you?' processed. Result: Success.' from Agent B
[Agent A] Acknowledged information: 'Request 'How are you?' processed. Result: Success.'
[Agent A] is finishing its current operation cycle.
[Agent B] is starting its autonomous operation.
[Agent B] is ready to receive messages.
[Agent B] is finishing its current operation cycle.

--- Conversation Log for Agent A ---
Sent to Agent B: Hello Agent B, this is a message from A.
Received from Agent B: Request 'How are you?' processed. Result: Success.
Sent to Agent B: How are you?

--- Conver

Soal 2

Simulasikan:

• Tiga agen (A, B, C) harus menyepakati siapa yang akan menjadi koordinator tugas.

• Gunakan protokol kesepakatan statis (misalnya voting dengan aturan mayoritas).

• Bandingkan dengan kesepakatan dinamis (misalnya rotasi per giliran).

Tambahkan implementasi sederhana optimasi terdistribusi:

• Agen A, B, C masing-masing memiliki kapasitas energi yang berbeda.

• Buat algoritma agar ketiga agen dapat membagi beban tugas secara adil dengan saling bertukar informasi kapasitas.

In [None]:
import time
import random

# --- Agent Class Definition ---
class SimpleAgent:
    """
    A base class representing a simple agent with core characteristics.
    Includes methods for voting, rotating, and distributed optimization.
    """
    def __init__(self, name, initial_energy):
        self.name = name
        # Autonomous: The agent has its own state and identity.
        self.energy_capacity = initial_energy
        self.current_energy = initial_energy # Assuming energy starts full
        self.assigned_workload = 0.0
        self.state = "idle"
        self.conversation_log = []
        # For distributed optimization
        self.known_energies = {} # Dictionary to store {agent_name: energy}
        # For turn-based rotation
        self.turn_order = ["Agent A", "Agent B", "Agent C"] # Define the order
        self.is_coordinator = False

    def _send_message(self, target_agent, content, msg_type="inform"):
        """
        Internal method for an agent to send a message.
        Social: This method facilitates communication with other agents.
        """
        message = {
            'sender': self.name,
            'receiver': target_agent.name,
            'content': content,
            'type': msg_type,
            'timestamp': time.time()
        }
        # Simulate sending the message by calling the target's receive method
        target_agent._receive_message(message)
        self.conversation_log.append(f"Sent to {target_agent.name}: {content}")

    def _receive_message(self, message):
        """
        Internal method for an agent to receive a message.
        Reactive: This method is triggered by an external event (receiving a message).
        Social: This method handles incoming communication.
        """
        self.conversation_log.append(f"Received from {message['sender']}: {message['content']}")
        print(f"[{self.name}] Received message: '{message['content']}' from {message['sender']}")

        # Handle different types of messages
        if message['type'] == 'request':
            # Example: Handle a request for energy info
            if "energy_info" in message['content']:
                 response_content = f"My energy capacity is {self.energy_capacity}."
                 self._send_message(target_agent=eval(message['sender']), content=response_content, msg_type="inform")
        elif message['type'] == 'inform':
            # Handle general information
            if "energy_info" in message['content']:
                # Parse energy info (simplified format)
                # Content format: "My energy capacity is X."
                try:
                    energy_val = float(message['content'].split()[-1])
                    self.known_energies[message['sender']] = energy_val
                    print(f"[{self.name}] Updated known energy for {message['sender']} to {energy_val}")
                except (ValueError, IndexError):
                    print(f"[{self.name}] Could not parse energy info from {message['sender']}: {message['content']}")
            elif "is_coordinator" in message['content']:
                 # Update coordinator status based on received message
                 if message['sender'] in self.turn_order: # Only trust messages from known agents
                     self.is_coordinator = (self.name == message['sender'])
                     print(f"[{self.name}] Coordinator status updated: {'I am coordinator' if self.is_coordinator else 'I am not coordinator'}")
        elif message['type'] == 'vote':
            # Handle incoming votes during static protocol
            self._handle_vote(message)
        else:
            print(f"[{self.name}] Received unknown message type: {message['type']}")

    def _handle_vote(self, message):
        """Handles incoming 'vote' messages."""
        # Content format: "Votes for X"
        vote_target = message['content'].split()[-1]
        print(f"[{self.name}] Received vote for {vote_target} from {message['sender']}")
        # In a real system, votes would be tallied here or stored globally.
        # For this simulation, we'll just log it.

    # --- Static Agreement Protocol: Majority Voting ---
    def conduct_static_voting(self, all_agents):
        """
        Proactive: Agent initiates the voting process.
        """
        print(f"\n--- Starting Static Voting Protocol for Coordinator Selection ---")
        votes = {agent.name: 0 for agent in all_agents}

        for agent in all_agents:
            # Each agent votes for the one with the highest known energy (or itself if unknown)
            # For simulation, agents know each other's energy at the start
            target_to_vote = agent.name # Default vote for self
            highest_energy = agent.energy_capacity

            for other_name, other_energy in agent.known_energies.items():
                 if other_energy > highest_energy:
                     target_to_vote = other_name
                     highest_energy = other_energy
                 elif other_energy == highest_energy and other_name != agent.name:
                     # Tie-breaker: random choice between tied candidates
                     if random.choice([True, False]):
                         target_to_vote = other_name
                         highest_energy = other_energy

            votes[target_to_vote] += 1
            print(f"[{agent.name}] votes for {target_to_vote}")
            # In a real system, this vote would be broadcast using _send_message
            # For simplicity, we calculate the result immediately.

        # Determine winner
        winner = max(votes, key=votes.get)
        print(f"\nStatic Voting Result: {winner} is the coordinator (received {votes[winner]} votes).")

        # Update coordinator status
        for agent in all_agents:
            agent.is_coordinator = (agent.name == winner)

        return winner

    # --- Dynamic Agreement Protocol: Turn-Based Rotation ---
    def conduct_dynamic_rotation(self, all_agents, round_num):
        """
        Determines the coordinator based on a predefined rotation order and round number.
        """
        print(f"\n--- Starting Dynamic Rotation Protocol (Round {round_num}) ---")
        num_agents = len(self.turn_order)
        # Calculate the index of the coordinator for this round
        coordinator_index = round_num % num_agents
        coordinator_name = self.turn_order[coordinator_index]

        print(f"Dynamic Rotation Result (Round {round_num}): {coordinator_name} is the coordinator.")

        # Update coordinator status for all agents
        for agent in all_agents:
            agent.is_coordinator = (agent.name == coordinator_name)

        return coordinator_name

    # --- Distributed Optimization: Fair Workload Division ---
    def share_energy_info(self, all_agents):
        """
        Proactive: Agent shares its energy information with others.
        """
        print(f"\n--- Agent {self.name} Sharing Energy Info ---")
        for agent in all_agents:
            if agent != self:
                content = f"My energy capacity is {self.energy_capacity}."
                self._send_message(target_agent=agent, content=content, msg_type="inform")

    def calculate_fair_workload(self, total_workload, all_agents):
        """
        Calculates the agent's fair share of the workload based on its energy capacity.
        Assumes all agents know each other's energy.
        """
        print(f"\n--- Calculating Fair Workload Division ---")
        total_capacity = sum(agent.energy_capacity for agent in all_agents)

        if total_capacity == 0:
            print("Error: Total energy capacity is zero, cannot divide workload.")
            return 0.0

        # Calculate proportional share based on capacity
        self_proportion = self.energy_capacity / total_capacity
        calculated_workload = self_proportion * total_workload

        self.assigned_workload = calculated_workload
        print(f"[{self.name}] Total workload: {total_workload}, Total capacity: {total_capacity}")
        print(f"[{self.name}] My capacity: {self.energy_capacity}, Proportion: {self_proportion:.2f}")
        print(f"[{self.name}] My assigned workload: {calculated_workload:.2f}")

        return calculated_workload

    def run(self, all_agents, total_workload, round_num):
        """
        The main loop representing the agent's autonomous operation.
        Combines static/dynamic coordination and distributed optimization.
        """
        print(f"\n[{self.name}] is starting its autonomous operation for round {round_num}.")
        self.state = "running"

        # --- Step 1: Share energy info for distributed optimization ---
        self.share_energy_info(all_agents)

        # Simulate time for messages to propagate
        time.sleep(0.5)

        # --- Step 2: Decide coordinator using Static Protocol ---
        static_coordinator = self.conduct_static_voting(all_agents)

        # Reset coordinator status for dynamic protocol simulation
        for agent in all_agents:
            agent.is_coordinator = False

        # --- Step 3: Decide coordinator using Dynamic Protocol ---
        dynamic_coordinator = self.conduct_dynamic_rotation(all_agents, round_num)

        # --- Step 4: Perform distributed optimization calculation ---
        # Note: The calculation depends on knowing others' capacities.
        # In a real system, this might happen after sharing info is complete.
        # For this simulation, we assume agents know capacities at the start or after sharing.
        # We'll call it here assuming info is available.
        self.calculate_fair_workload(total_workload, all_agents)

        self.state = "idle"
        print(f"[{self.name}] has finished its operation cycle for round {round_num}.\n")


# --- Main Execution ---
if __name__ == "__main__":
    # Create instances of the agents with different energy capacities
    agent_a = SimpleAgent("Agent A", initial_energy=100.0)
    agent_b = SimpleAgent("Agent B", initial_energy=60.0)
    agent_c = SimpleAgent("Agent C", initial_energy=40.0)
    all_agents = [agent_a, agent_b, agent_c]

    # Assume total workload to be divided
    total_workload = 100.0

    print("--- Starting Multi-Agent Simulation ---")
    print(f"Initial Agent Energies: A={agent_a.energy_capacity}, B={agent_b.energy_capacity}, C={agent_c.energy_capacity}")

    # Simulate multiple rounds to show dynamic rotation
    for round_num in range(3):
        print(f"\n--- SIMULATION ROUND {round_num + 1} ---")

        # All agents run their autonomous processes
        # In a real system, this might involve concurrent execution.
        for agent in all_agents:
            # Agents share info, decide coordinator (static), decide coordinator (dynamic), optimize workload
            agent.run(all_agents, total_workload, round_num)

        # Print summary for the round
        print(f"\n--- Summary for Round {round_num + 1} ---")
        for agent in all_agents:
            print(f"  {agent.name}: Energy={agent.energy_capacity}, Workload={agent.assigned_workload:.2f}, Is_Coordinator_Static={agent.is_coordinator} (Static Result Only)")

        # Reset coordinator status for the next round simulation
        for agent in all_agents:
            agent.is_coordinator = False

    print("\n--- Simulation Ended ---")

    # Print final conversation logs
    for agent in all_agents:
        print(f"\n--- Conversation Log for {agent.name} ---")
        for entry in agent.conversation_log:
            print(f"  {entry}")

--- Starting Multi-Agent Simulation ---
Initial Agent Energies: A=100.0, B=60.0, C=40.0

--- SIMULATION ROUND 1 ---

[Agent A] is starting its autonomous operation for round 0.

--- Agent Agent A Sharing Energy Info ---
[Agent B] Received message: 'My energy capacity is 100.0.' from Agent A
[Agent C] Received message: 'My energy capacity is 100.0.' from Agent A

--- Starting Static Voting Protocol for Coordinator Selection ---
[Agent A] votes for Agent A
[Agent B] votes for Agent B
[Agent C] votes for Agent C

Static Voting Result: Agent A is the coordinator (received 1 votes).

--- Starting Dynamic Rotation Protocol (Round 0) ---
Dynamic Rotation Result (Round 0): Agent A is the coordinator.

--- Calculating Fair Workload Division ---
[Agent A] Total workload: 100.0, Total capacity: 200.0
[Agent A] My capacity: 100.0, Proportion: 0.50
[Agent A] My assigned workload: 50.00
[Agent A] has finished its operation cycle for round 0.


[Agent B] is starting its autonomous operation for round

Soal 3

Studi kasus pasar online multi agen:

• Setiap agen pembeli ingin belajar memilih penjual terbaik berdasarkan harga dan reputasi.

• Implementasikan algoritma sederhana multi-agent learning (misalnya Q-Learning atau rule-based update) di mana agen pembeli memperbarui strateginya setiap kali bertransaksi.

• Tambahkan mekanisme trust/reputation: setelah transaksi, pembeli memberi skor
reputasi ke penjual. Agen pembeli berikutnya menggunakan reputasi ini dalam pengambilan keputusan.

Tugas:

1. Buatlah simulasi sederhana untuk 2 pembeli dan 2 penjual!
2. Jelaskan bagaimana learning memengaruhi strategi agen pembeli!
3. Jelaskan bagaimana trust/reputation mencegah pembeli memilih penjual yang menipu!

In [None]:
import random
import numpy as np

# --- Seller Agent Class ---
class SellerAgent:
    """
    Represents a seller agent with a fixed price and a reputation score.
    Some sellers might be fraudulent (e.g., selling low quality items).
    """
    def __init__(self, name, fixed_price, is_fraudulent=False):
        self.name = name
        self.fixed_price = fixed_price
        self.is_fraudulent = is_fraudulent
        # Reputation is initialized and updated based on buyer feedback
        self.reputation_score = 0.5  # Start with neutral reputation
        self.total_transactions = 0
        self.positive_feedback_count = 0

    def update_reputation(self, feedback_score):
        """
        Updates the seller's reputation based on buyer feedback.
        """
        self.total_transactions += 1
        if feedback_score > 0: # Assuming positive feedback is > 0
            self.positive_feedback_count += 1

        # Calculate new reputation as average of all feedback
        # A more robust system might use a weighted average or decay old scores
        if self.total_transactions > 0:
            self.reputation_score = self.positive_feedback_count / self.total_transactions
        else:
            self.reputation_score = 0.5 # Default if no transactions yet

    def __repr__(self):
        return f"Seller(name={self.name}, price={self.fixed_price}, fraud={self.is_fraudulent}, rep={self.reputation_score:.2f})"

# --- Buyer Agent Class ---
class BuyerAgent:
    """
    Represents a buyer agent using Q-Learning to choose sellers based on price and reputation.
    """
    def __init__(self, name, learning_rate=0.1, discount_factor=0.9, exploration_rate=0.2):
        self.name = name
        # Autonomous: The agent has its own state (Q-table, strategy parameters)
        self.q_table = {} # State -> {Seller: Q-value}
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.exploration_rate = exploration_rate # Probability of choosing randomly (exploration)

    def choose_seller(self, sellers):
        """
        Proactive: The agent decides which seller to choose based on learned strategy.
        Uses Q-Learning to balance exploration and exploitation.
        """
        # State could be more complex, but for simplicity, let's just use a default state
        # or consider the current reputations and prices as the state context.
        # For this basic implementation, we'll use a simple state string.
        state = "default_state"

        # Initialize Q-table entry for this state if it doesn't exist
        if state not in self.q_table:
            self.q_table[state] = {seller.name: 0.0 for seller in sellers}

        # Exploration: Choose a random seller
        if random.uniform(0, 1) < self.exploration_rate:
            chosen_seller = random.choice(sellers)
            print(f"  [{self.name}] (Exploring) Chose seller: {chosen_seller.name}")
            return chosen_seller

        # Exploitation: Choose the seller with the highest Q-value for the current state
        else:
            # Find the seller with the highest Q-value
            best_seller_name = max(self.q_table[state], key=self.q_table[state].get)
            chosen_seller = next(s for s in sellers if s.name == best_seller_name)
            print(f"  [{self.name}] (Exploiting) Chose seller: {chosen_seller.name} (Q-value: {self.q_table[state][best_seller_name]:.2f})")
            return chosen_seller

    def evaluate_transaction_and_update_q(self, chosen_seller, all_sellers):
        """
        Reactive: The agent updates its strategy based on the outcome of the transaction.
        Uses Q-Learning update rule.
        """
        state = "default_state"
        action = chosen_seller.name # The action is choosing a specific seller

        # Calculate reward based on price and reputation
        # Lower price and higher reputation should give higher reward
        # Fraudulent sellers get a very negative reward
        if chosen_seller.is_fraudulent:
             reward = -10.0
             feedback_score = 0 # Negative feedback for fraudulent transaction
        else:
             # Normalize price and reputation for reward calculation
             # Assuming lower prices are better (higher reward)
             # Assuming higher reputation is better (higher reward)
             # This is a simple example reward function
             normalized_price_reward = 1.0 / (1 + chosen_seller.fixed_price) # Higher reward for lower price
             normalized_reputation_reward = chosen_seller.reputation_score # Directly use reputation
             reward = normalized_price_reward + normalized_reputation_reward
             feedback_score = 1 # Positive feedback for good transaction

        print(f"  [{self.name}] Transaction with {chosen_seller.name}: Fraud={chosen_seller.is_fraudulent}, Price={chosen_seller.fixed_price}, Rep={chosen_seller.reputation_score:.2f}")
        print(f"  [{self.name}] Calculated reward: {reward:.2f}")

        # Q-Learning Update Rule: Q(s, a) <- Q(s, a) + alpha * [reward + gamma * max(Q(s', a')) - Q(s, a)]
        # Here, s' is the next state after the transaction. For simplicity, we assume s' is also "default_state"
        # and the agent's next action is based on the updated Q-table for the same state.
        current_q = self.q_table[state][action]
        # We consider the 'next state' to be the same default state for simplicity
        # The max Q-value for the next state (default) considering all possible sellers
        next_state = state
        if next_state not in self.q_table:
            self.q_table[next_state] = {seller.name: 0.0 for seller in all_sellers}

        max_next_q = max(self.q_table[next_state].values()) if self.q_table[next_state] else 0.0

        new_q = current_q + self.learning_rate * (reward + self.discount_factor * max_next_q - current_q)
        self.q_table[state][action] = new_q
        print(f"  [{self.name}] Updated Q-value for choosing {action}: {new_q:.2f}")

        # Return the feedback score for the seller's reputation update
        return feedback_score

    def __repr__(self):
        return f"Buyer(name={self.name})"


# --- Simulation Setup ---
def run_marketplace_simulation():
    """
    Runs the multi-agent marketplace simulation.
    """
    print("--- Initializing Multi-Agent Marketplace Simulation ---")

    # Create Seller Agents (2 sellers, 1 potentially fraudulent)
    seller_a = SellerAgent(name="Seller A", fixed_price=50.0, is_fraudulent=False) # Good seller
    seller_b = SellerAgent(name="Seller B", fixed_price=30.0, is_fraudulent=True)  # Potentially fraudulent seller
    sellers = [seller_a, seller_b]

    # Create Buyer Agents
    buyer_x = BuyerAgent(name="Buyer X", exploration_rate=0.3) # Higher exploration initially
    buyer_y = BuyerAgent(name="Buyer Y", exploration_rate=0.1) # Lower exploration, more exploitation
    buyers = [buyer_x, buyer_y]

    print("\nInitial Seller States:")
    for seller in sellers:
        print(f"  {seller}")

    print("\nInitial Buyer Q-Tables (before any transactions):")
    for buyer in buyers:
        print(f"  {buyer.name}: {buyer.q_table}")

    # Run simulation for a number of rounds
    num_rounds = 6
    print(f"\n--- Starting Simulation for {num_rounds} Rounds ---")

    for round_num in range(1, num_rounds + 1):
        print(f"\n--- Round {round_num} ---")

        # Each buyer makes a transaction in this round
        for buyer in buyers:
            print(f"\n{buyer.name}'s turn:")

            # Buyer chooses a seller based on current strategy (Q-table and exploration)
            chosen_seller = buyer.choose_seller(sellers)

            # Transaction occurs
            # Buyer evaluates the outcome and updates its Q-table
            feedback_score = buyer.evaluate_transaction_and_update_q(chosen_seller, sellers)

            # Seller updates its reputation based on the buyer's feedback
            chosen_seller.update_reputation(feedback_score)
            print(f"  [{chosen_seller.name}] Reputation updated to: {chosen_seller.reputation_score:.2f}")

    print("\n--- Simulation Ended ---")

    print("\nFinal Seller States:")
    for seller in sellers:
        print(f"  {seller}")

    print("\nFinal Buyer Q-Tables (after {num_rounds} rounds):")
    for buyer in buyers:
        print(f"  {buyer.name}: {buyer.q_table}")

# --- Run the Simulation ---
if __name__ == "__main__":
    run_marketplace_simulation()


--- Initializing Multi-Agent Marketplace Simulation ---

Initial Seller States:
  Seller(name=Seller A, price=50.0, fraud=False, rep=0.50)
  Seller(name=Seller B, price=30.0, fraud=True, rep=0.50)

Initial Buyer Q-Tables (before any transactions):
  Buyer X: {}
  Buyer Y: {}

--- Starting Simulation for 6 Rounds ---

--- Round 1 ---

Buyer X's turn:
  [Buyer X] (Exploiting) Chose seller: Seller A (Q-value: 0.00)
  [Buyer X] Transaction with Seller A: Fraud=False, Price=50.0, Rep=0.50
  [Buyer X] Calculated reward: 0.52
  [Buyer X] Updated Q-value for choosing Seller A: 0.05
  [Seller A] Reputation updated to: 1.00

Buyer Y's turn:
  [Buyer Y] (Exploiting) Chose seller: Seller A (Q-value: 0.00)
  [Buyer Y] Transaction with Seller A: Fraud=False, Price=50.0, Rep=1.00
  [Buyer Y] Calculated reward: 1.02
  [Buyer Y] Updated Q-value for choosing Seller A: 0.10
  [Seller A] Reputation updated to: 1.00

--- Round 2 ---

Buyer X's turn:
  [Buyer X] (Exploiting) Chose seller: Seller A (Q-value: