In [1]:
import dyna_env_drifttype
import numpy as np
import pandas as pd
import ast
from collections import deque
from dyna_env_drifttype import TaskEnv_driftype

In [2]:
env = TaskEnv_driftype()

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  frequencies[label][action] = ast.literal_eval(frequencies[label][action]) #判断需要计算的内容是不是合法的Python类型，如果是则执行，否则就报错


In [67]:
actions = env.observation_space.index.to_list()

In [3]:
env.observation_space['pp']['geen']

{'Tau': 0.42410714285714285,
 'pp': 0.40625,
 'va': 0.09598214285714286,
 'sib': 0.05133928571428571,
 'po': 0.022321428571428572}

In [5]:
env.set_flag()
env.drift(change_at_states=['pp'],drift_dis_type='inverse',intensity=0.7)
env.observation_space['pp']['geen']

drift happen
change_frequencies function running


{'Tau': 0.15483630952380953,
 'pp': 0.18589583333333332,
 'va': 0.19063839285714285,
 'sib': 0.21928720238095237,
 'po': 0.24934226190476189}

In [57]:

    def perturb_probs(prob_list,intensity=0.5, ranking=True):
        #intensity 0.5-1
 
    # Convert to numpy array and normalize to ensure it sums to 1
        probs = np.array(prob_list, dtype=float)
        probs = probs / probs.sum()
        
        n = len(probs)
    
    # If intensity is 0, return original distribution
        if intensity == 0:
            return probs
        
        # Get the sorted indices to understand ranking
        sorted_indices = np.argsort(probs)
        
        # Create target distribution based on ranking preference
        if ranking:
            # Maintain original ranking: highest prob gets most concentration
            target_weights = np.zeros(n)
            for i, idx in enumerate(sorted_indices):
                # Linear weighting: smallest gets 1, largest gets n
                target_weights[idx] = i + 1
        else:
            # Reverse ranking: lowest prob gets most concentration  
            target_weights = np.zeros(n)
            for i, idx in enumerate(sorted_indices):
                # Reverse linear weighting: largest gets 1, smallest gets n
                target_weights[idx] = n - i
        
        # Normalize target weights to create target distribution
        target_dist = target_weights / np.sum(target_weights)
        
        # Interpolate between original and target based on intensity
        modified_probs = (1-intensity )* probs + (intensity) * target_dist
        
        # Ensure final normalization
        modified_probs = modified_probs / np.sum(modified_probs)
        
        return np.round(modified_probs,2)

In [32]:
valuelist = []
for a in actions:
    problist = list(env.observation_space['pp'][a].values())
    
    new_prob = perturb_probs(problist,0.7)
    valuelist.append(new_prob)
    print(new_prob)

[0.29607701 0.37763866 0.17113893 0.10290522 0.05224017]
[0.29037858 0.38840259 0.16487535 0.05386888 0.10247461]
[0.36056548 0.30854167 0.16879464 0.10873512 0.0533631 ]
[0.37189989 0.05400455 0.30765643 0.10374289 0.16269625]
[0.29570874 0.38346762 0.09977917 0.16846911 0.05257535]
[0.36761693 0.31317537 0.05421942 0.16121437 0.10377391]
[0.17417722 0.30581224 0.35342827 0.11469409 0.05188819]


In [58]:
np.random.seed(2)
example = np.random.dirichlet(np.ones(3), size=1)[0]
example = np.round(example,2)
preserve_rank = perturb_probs(example,0.7,True)
inverse_rank = perturb_probs(example,0.7,False)
example, preserve_rank,inverse_rank

(array([0.41, 0.02, 0.57]),
 array([0.36, 0.12, 0.52]),
 array([0.36, 0.36, 0.29]))

In [64]:

example = np.random.dirichlet(np.ones(3), size=1)[0]
example = np.round(example,2)
example


array([0.39, 0.34, 0.27])

In [30]:
keylists = []
for a in actions:
    keylist = list(env.observation_space['pp'][a].keys())
    print(keylist)
    keylists.append(keylist)
    

['pp', 'Tau', 'va', 'po', 'sib']
['pp', 'Tau', 'va', 'po', 'sib']
['Tau', 'pp', 'va', 'sib', 'po']
['Tau', 'po', 'pp', 'sib', 'va']
['pp', 'Tau', 'po', 'va', 'sib']
['Tau', 'pp', 'po', 'va', 'sib']
['va', 'pp', 'Tau', 'po', 'sib']


In [37]:
for i in range(7):
    result_dict = dict(zip(keylists[i], valuelist[i]))
    print(result_dict)


{'pp': 0.2960770059235326, 'Tau': 0.3776386645126548, 'va': 0.1711389337641357, 'po': 0.10290522347872913, 'sib': 0.05224017232094776}
{'pp': 0.2903785780240074, 'Tau': 0.38840258541089573, 'va': 0.16487534626038783, 'po': 0.05386888273314867, 'sib': 0.10247460757156049}
{'Tau': 0.3605654761904762, 'pp': 0.30854166666666666, 'va': 0.16879464285714285, 'sib': 0.10873511904761904, 'po': 0.05336309523809524}
{'Tau': 0.3718998862343572, 'po': 0.05400455062571103, 'pp': 0.30765642775881685, 'sib': 0.1037428896473265, 'va': 0.16269624573378838}
{'pp': 0.2957087436586094, 'Tau': 0.3834676216054909, 'po': 0.09977917039689645, 'va': 0.16846911369740375, 'sib': 0.052575350641599516}
{'Tau': 0.36761693200049367, 'pp': 0.31317536714796995, 'po': 0.05421942490435641, 'va': 0.16121436504998152, 'sib': 0.10377391089719858}
{'va': 0.17417721518987342, 'pp': 0.30581223628691984, 'Tau': 0.3534282700421941, 'po': 0.11469409282700423, 'sib': 0.05188818565400844}


In [41]:
valuelist = []
for a in actions:
    problist = list(env.observation_space['pp'][a].values())
    
    new_prob = perturb_probs(problist,0.95,False)
    valuelist.append(new_prob)
    print(new_prob)

[0.14490172 0.08738422 0.19518982 0.25492865 0.31759558]
[0.14395199 0.08917821 0.19414589 0.31786704 0.25485688]
[0.08453869 0.14697917 0.19479911 0.2559003  0.31778274]
[0.08642776 0.31788965 0.14683163 0.25506826 0.19378271]
[0.14484035 0.08835571 0.25440764 0.19474485 0.31765145]
[0.08571393 0.14775145 0.31792546 0.19353573 0.25507343]
[0.1956962  0.14652426 0.08334916 0.25689346 0.31753692]


In [42]:
for i in range(7):
    result_dict = dict(zip(keylists[i], valuelist[i]))
    print(result_dict)


{'pp': 0.14490172320947764, 'Tau': 0.08738422186322026, 'va': 0.19518982229402262, 'po': 0.25492864835756596, 'sib': 0.3175955842757135}
{'pp': 0.14395198522622346, 'Tau': 0.08917820867959374, 'va': 0.19414589104339797, 'po': 0.3178670360110803, 'sib': 0.2548568790397045}
{'Tau': 0.08453869047619049, 'pp': 0.14697916666666666, 'va': 0.19479910714285714, 'sib': 0.2559002976190476, 'po': 0.3177827380952381}
{'Tau': 0.08642775881683733, 'po': 0.3178896473265074, 'pp': 0.1468316268486917, 'sib': 0.2550682593856655, 'va': 0.19378270762229807}
{'pp': 0.1448403461653238, 'Tau': 0.08835571471202627, 'po': 0.2544076395105938, 'va': 0.19474485228290064, 'sib': 0.31765144732915546}
{'Tau': 0.08571393311119341, 'pp': 0.14775145008021723, 'po': 0.31792545970628167, 'va': 0.19353572750833029, 'sib': 0.25507342959397755}
{'va': 0.19569620253164557, 'pp': 0.14652426160337553, 'Tau': 0.08334915611814346, 'po': 0.2568934599156118, 'sib': 0.3175369198312236}
