In [1]:
import numpy as np
import random

class OnlineMatchingModel:
    def __init__(self, N, T, E, rewards, usage_duration_distributions):
        # Initialize the model parameters
        self.N = N  # Number of offline vertices (resources)
        self.T = T  # Number of online vertices (requests)
        self.E = E  # Edges in the bipartite graph
        self.rewards = rewards  # Reward for each resource
        self.usage_duration_distributions = usage_duration_distributions  # Usage duration distributions

    def greedy_algorithm(self):
        total_reward = 0
        resource_availability = [0] * self.N  # Track when resources become available
        for t in range(self.T):
            available_resources = [i for i in range(self.N) if t >= resource_availability[i]]
            # Find the highest reward resource that can be matched
            best_resource = None
            best_reward = 0
            for i in available_resources:
                if (i, t) in self.E and self.rewards[i] > best_reward:
                    best_reward = self.rewards[i]
                    best_resource = i
            if best_resource is not None:
                total_reward += best_reward
                # Sample the usage duration
                duration = np.random.choice(self.usage_duration_distributions[best_resource])
                resource_availability[best_resource] = t + duration
        return total_reward

    def clairvoyant_opt(self, arrival_sequence):
        total_reward = 0
        resource_availability = [0] * self.N
        for t in range(self.T):
            current_request = arrival_sequence[t]
            # Check all resources that can be matched with the current request
            for i in range(self.N):
                if (i, current_request) in self.E and resource_availability[i] <= t:
                    total_reward += self.rewards[i]
                    duration = np.random.choice(self.usage_duration_distributions[i])
                    resource_availability[i] = t + duration
                    break  # Assuming one match per request
        return total_reward


# Example usage
N = 5  # Number of resources
T = 10  # Number of requests
E = [(i, j) for i in range(N) for j in range(T) if random.random() > 0.5]  # Randomly generate edges
rewards = np.random.randint(1, 10, N)
usage_duration_distributions = [np.random.randint(1, 5, 10) for _ in range(N)]

model = OnlineMatchingModel(N, T, E, rewards, usage_duration_distributions)
greedy_reward = model.greedy_algorithm()
arrival_sequence = np.random.permutation(T)  # Random arrival sequence for OPT
opt_reward = model.clairvoyant_opt(arrival_sequence)

print(f"Greedy Reward: {greedy_reward}")
print(f"OPT Reward: {opt_reward}")


Greedy Reward: 49
OPT Reward: 49


#### Main differences between Inventory Balancing Algorithm and Rank Based Allocation Algorithm:
* Goals:
    * IBA: Balance the cost of holding inventory with the risk of being out of stock, optimizing inventory levels by adjusting replenishment points and order volumes.
    * RBA: Maximize long-term rewards or utility while considering the reuse of resources. By prioritizing the highest-ranking available units, the algorithm attempts to optimize allocation decisions while keeping resources efficiently recycled.
* Scenario:
    * IBA: Inventory management and demand matching
    * RBA: Address the reusability of resources and dynamically reprioritize resource units

#### Inventory Balancing Algorithm (with unreusable resources)

In [2]:
class OnlineMatchingModel:
    def __init__(self, N, T, E, rewards):
        self.N = N  # Number of offline vertices (resources)
        self.T = T  # Number of online vertices (requests)
        self.E = E  # Edges in the bipartite graph
        self.rewards = rewards  # Reward for each resource
        
        self.inventory = {}
        for n in range(N):
            self.inventory[n] = 1 # Each resource is initially set to available

        
    def g(self, x):
        return np.exp(-x)
    # Used to adjust the selected weight or priority of each resource
    # When the ratio of the resource's remaining inventory x is high, the value of e^(-x) is smaller, 
    # which means that the priority of resources being selected is correspondingly higher, 
    # because we are more inclined to use resources with more remaining inventory
    
    def allocate_resource(self, t):
        scores = {}
        for n in self.E[t]:
            if self.inventory[n] > 0:  # Allocation is considered only when resources are available
                score = (1 - self.g(self.inventory[n])) * self.rewards[n]
                scores[n] = score

        if scores:
            selected_resource = max(scores, key=scores.get)
            self.inventory[selected_resource] = 0  # After allocation, the resource is marked as unavailable
            return selected_resource
        return None  # If no resources are available, return None
    
# sample
N = 5
T = 10
E = {}
for t in range(T):
    E[t] = range(N)

rewards = np.random.rand(N)

model = OnlineMatchingModel(N, T, E, rewards)

allocations = []
for t in range(T):
    allocation = model.allocate_resource(t)
    allocations.append(allocation)

print(allocations)

[0, 3, 1, 4, 2, None, None, None, None, None]


#### Inventory Balancing Algorithm (reusable)

In [3]:
import numpy as np

class OnlineMatchingModel:
    def __init__(self, N, T, E, rewards, usage_duration_distributions):
        self.N = N  # Number of offline vertices (resources)
        self.T = T  # Number of online vertices (requests)
        self.E = E  # Edges in the bipartite graph
        self.rewards = rewards  # Reward for each resource
        self.usage_duration_distributions = usage_duration_distributions  # Usage duration distributions

        self.inventory = {}
        for n in range(N):
            self.inventory[n] = 1  # Each resource is initially set to available
        
        self.return_times = {}
        for n in range(N):
            self.return_times[n] = []  # Initializes the return time list for each resource
    
    def g(self, x):
        return np.exp(-x)
    # Used to adjust the selected weight or priority of each resource
    # When the ratio of the resource's remaining inventory x is high, the value of e^(-x) is smaller, 
    # which means that the priority of resources being selected is correspondingly higher, 
    # because we are more inclined to use resources with more remaining inventory

    
    def update_inventory(self, current_time):
        for n in range(self.N):
            updated_return_times = []
            for return_time in self.return_times[n]:
                if return_time > current_time:
                    updated_return_times.append(return_time)
            self.return_times[n] = updated_return_times
            
            if not updated_return_times:  # If there is no resource waiting to be returned, it is set to available
                self.inventory[n] = 1
            else:
                self.inventory[n] = 0
    
    def allocate_resource(self, t):
        self.update_inventory(t)
        
        scores = {}
        for n in self.E[t]:
            score = (1 - self.g(self.inventory[n])) * self.rewards[n]
            scores[n] = score
        
        selected_resource = max(scores, key=scores.get)
        
        duration = np.random.choice(self.usage_duration_distributions[selected_resource])
        self.return_times[selected_resource].append(t + duration)
        
        self.inventory[selected_resource] = 0
        
        return selected_resource

# sample
N = 5
T = 10
E = {}
for t in range(T):
    E[t] = range(N)

rewards = np.random.rand(N)

usage_duration_distributions = {}
for n in range(N):
    usage_duration_distributions[n] = [1, 2, 3]

model = OnlineMatchingModel(N, T, E, rewards, usage_duration_distributions)

allocations = []
for t in range(T):
    allocation = model.allocate_resource(t)
    allocations.append(allocation)

print(allocations)

[4, 3, 3, 4, 4, 3, 4, 4, 4, 3]


#### Rank Based Allocation Algorithm

In [53]:
import numpy as np

class OnlineMatchingModel:
    def __init__(self, N, T, E, rewards, usage_duration_distributions):
        self.N = N  # Number of resources
        self.T = T  # Number of time steps or requests
        self.E = E  # Edges representing possible matches between resources and requests
        self.rewards = rewards  # Rewards for allocating each resource
        self.usage_duration_distributions = usage_duration_distributions  # Usage durations for each resource
        
        # Initialize resource units' availability, rank, and return time
        # self.unit_availability = {n: [True for _ in range(len(usage_duration_distributions[n]))] for n in range(N)}
        # self.unit_rank = {n: list(range(len(usage_duration_distributions[n]))) for n in range(N)}
        # self.unit_return_time = {n: [-1 for _ in range(len(usage_duration_distributions[n]))] for n in range(N)}
        
        # Initialize resource units' availability
        # For each resource, create a list indicating whether each unit is available
        self.unit_availability = {}
        for n in range(self.N):  # Iterate over each resource
            availability_list = []  # Initialize an empty list for storing availability status of each unit
            for _ in range(len(self.usage_duration_distributions[n])):  # Iterate over the number of units for each resource
                availability_list.append(True)  # Initially every unit is available
            self.unit_availability[n] = availability_list  # Assign the list to the corresponding resource in the dictionary
        
        # Initialize resource units' rank
        # For each resource, create a list indicating the rank of each unit
        self.unit_rank = {}
        for n in range(self.N):  # Iterate over each resource
            rank_list = []  # Initialize an empty list for storing rank of each unit
            for rank in range(len(self.usage_duration_distributions[n])):  # Iterate over the number of units for each resource
                rank_list.append(rank)  # Assign a rank to each unit
            self.unit_rank[n] = rank_list  # Assign the list to the corresponding resource in the dictionary
        
        # Initialize resource units' return time
        # For each resource, create a list indicating the return time of each unit
        self.unit_return_time = {}
        for n in range(self.N):  # Iterate over each resource
            return_time_list = []  # Initialize an empty list for storing return time of each unit
            for _ in range(len(self.usage_duration_distributions[n])):  # Iterate over the number of units for each resource
                return_time_list.append(-1)  # Initially, there is no return time (-1 indicates not in use)
            self.unit_return_time[n] = return_time_list  # Assign the list to the corresponding resource in the dictionary


    def g(self, x):
        return np.exp(-x)
    
    def update_availability(self, t):
        # Iterate over each resource to update its units' availability
        for i in range(self.N):
            # Iterate over each unit of the resource
            for k in range(len(self.unit_availability[i])):
                # Check if the current time is greater than or equal to the return time of the unit
                if t >= self.unit_return_time[i][k]:
                    # If so, make the unit available again
                    self.unit_availability[i][k] = True

    def allocate_resource(self, t):
        # First, update the availability of all resources at time t
        self.update_availability(t)
        
        # Initialize a dictionary to store the scores for each available resource
        scores = {}
        # Iterate over the resources that are available at time t
        for i in self.E[t]:
            # Find all available units of resource i
            available_units = [k for k, available in enumerate(self.unit_availability[i]) if available]
            # If there are available units, calculate the score for the resource
            if available_units:
                # Find the highest ranked available unit of resource i
                highest_ranked_unit = max(available_units, key=lambda x: self.unit_rank[i][x])
                # Calculate the score based on the reward and the rank of the unit
                scores[i] = self.rewards[i] * (1 - self.g(self.unit_rank[i][highest_ranked_unit] / len(self.unit_rank[i])))

        # If there are any scores calculated, proceed to allocate a resource
        if scores:
            # Select the resource with the highest score
            selected_resource = max(scores, key=scores.get)
            # Find the highest ranked available unit of the selected resource
            selected_unit = max([k for k, available in enumerate(self.unit_availability[selected_resource]) if available], 
                                key=lambda x: self.unit_rank[selected_resource][x])
            # Mark the selected unit as unavailable
            self.unit_availability[selected_resource][selected_unit] = False
            # Set the return time for the unit based on the selected usage duration
            duration = np.random.choice(self.usage_duration_distributions[selected_resource])
            self.unit_return_time[selected_resource][selected_unit] = t + duration
            # Return the selected resource and unit
            return selected_resource, selected_unit
        else:
            # If no resources are available, return None
            return None, None


# Example usage
N = 4  # Number of resources
T = 20  # Number of time steps or requests
E = {t: range(N) for t in range(T)}  # Possible matches between resources and requests
rewards = np.random.rand(N)  # Random rewards for each resource

# Generate usage duration distributions randomly
usage_duration_distributions = {}
for n in range(N):
    usage_duration_distributions[n] = np.random.choice(range(3, 6), size=2, replace=False).tolist()

print("Usage Duration Distributions:", usage_duration_distributions)

model = OnlineMatchingModel(N, T, E, rewards, usage_duration_distributions)

for t in range(T):
    resource, unit = model.allocate_resource(t)
    if resource is not None:
        print(f"At time {t}, resource {resource} unit {unit} is allocated")
    else:
        print(f"At time {t}, no resource is allocated")



Usage Duration Distributions: {0: [5, 4], 1: [4, 5], 2: [3, 4], 3: [5, 3]}
At time 0, resource 1 unit 1 is allocated
At time 1, resource 3 unit 1 is allocated
At time 2, resource 2 unit 1 is allocated
At time 3, resource 0 unit 1 is allocated
At time 4, resource 0 unit 0 is allocated
At time 5, resource 1 unit 1 is allocated
At time 6, resource 3 unit 1 is allocated
At time 7, resource 2 unit 1 is allocated
At time 8, resource 0 unit 1 is allocated
At time 9, resource 1 unit 1 is allocated
At time 10, resource 3 unit 1 is allocated
At time 11, resource 2 unit 1 is allocated
At time 12, resource 0 unit 0 is allocated
At time 13, resource 1 unit 1 is allocated
At time 14, resource 3 unit 1 is allocated
At time 15, resource 2 unit 1 is allocated
At time 16, resource 0 unit 1 is allocated
At time 17, resource 3 unit 1 is allocated
At time 18, resource 1 unit 1 is allocated
At time 19, resource 2 unit 1 is allocated
