In [10]:
import numpy as np
import pandas as pd
import random
from collections import defaultdict
from sklearn.preprocessing import MinMaxScaler

In [5]:
def introduce_missingness(df, missing_rate):
    """Introduce missing values randomly into the dataframe at the specified rate."""
    df_with_missing = df.copy()
    # Total number of elements in the dataframe
    total_elements = df_with_missing.size
    # Number of elements to be set as NaN
    num_missing = int(missing_rate * total_elements)
    # Get random indices
    missing_indices = np.random.choice(total_elements, num_missing, replace=False)
    # Convert the flat indices to multi-dimensional indices
    multi_dim_indices = np.unravel_index(missing_indices, df_with_missing.shape)
    # Assign NaN to the chosen indices
    df_with_missing.values[multi_dim_indices] = np.nan
    return df_with_missing

In [6]:
data_path = "data/toy_dataset.csv"
df = pd.read_csv(data_path, na_values='?')

In [7]:
missingness_rates = np.linspace(0.1, 1, 10)

for miss_rate in missingness_rates:
    df_with_missing = introduce_missingness(df, miss_rate)
    print(f'Missing Rate: {miss_rate}')
    print(df_with_missing.head())

Missing Rate: 0.1
   Col 1  Col 2  Col 3  Col 4
0   0.17   0.26   0.57   1.00
1   0.50   0.53   0.00   0.83
2   0.83   0.00   0.57   0.33
3    NaN   0.39   0.87   0.50
4   1.00   0.53   0.14   0.67
Missing Rate: 0.2
   Col 1  Col 2  Col 3  Col 4
0   0.17   0.26   0.57    NaN
1    NaN   0.53   0.00   0.83
2   0.83   0.00   0.57   0.33
3   0.17   0.39    NaN   0.50
4    NaN   0.53   0.14    NaN
Missing Rate: 0.30000000000000004
   Col 1  Col 2  Col 3  Col 4
0   0.17    NaN   0.57   1.00
1   0.50   0.53   0.00   0.83
2    NaN   0.00    NaN   0.33
3    NaN   0.39   0.87   0.50
4   1.00   0.53   0.14   0.67
Missing Rate: 0.4
   Col 1  Col 2  Col 3  Col 4
0   0.17    NaN   0.57    NaN
1    NaN   0.53   0.00    NaN
2   0.83   0.00    NaN    NaN
3   0.17    NaN    NaN    NaN
4   1.00    NaN   0.14   0.67
Missing Rate: 0.5
   Col 1  Col 2  Col 3  Col 4
0    NaN    NaN    NaN   1.00
1    NaN   0.53    NaN   0.83
2   0.83   0.00    NaN    NaN
3    NaN   0.39    NaN   0.50
4    NaN   0.53   0.14  

In [60]:
import numpy as np
import pandas as pd
import random
from collections import defaultdict
from sklearn.preprocessing import MinMaxScaler

class ImputationEnvironment:
    def __init__(self, incomplete_data, complete_data):
        self.incomplete_data = incomplete_data
        self.complete_data = complete_data
        self.state = incomplete_data.copy()
        self.missing_indices = np.argwhere(pd.isna(incomplete_data.values))
        
    def reset(self):
        self.state = self.incomplete_data.copy()
        return self.state

    def step(self, action, position):
        row, col = position
        self.state.iat[row, col] = action

        reward = -abs(self.complete_data.iat[row, col] - action)
        done = not pd.isna(self.state.values).any()
        return self.state, reward, done

    def get_possible_actions(self, col):
        # Ensure col is an integer if it's a numpy int
        if isinstance(col, np.int64):
            col = int(col)

        if isinstance(col, int):
            if 0 <= col < len(self.complete_data.columns):
                col_name = self.complete_data.columns[col]
            else:
                raise KeyError(f"Column index {col} out of range")
        elif isinstance(col, str):
            if col in self.complete_data.columns:
                col_name = col
            else:
                raise KeyError(f"Column name '{col}' not found in DataFrame")
        else:
            raise TypeError("Column must be either an integer index or a string column name")

        return self.complete_data[col_name].dropna().unique()

class QLearningAgent:
    def __init__(self, env, alpha=0.1, gamma=0.9, epsilon=0.1):
        self.env = env
        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = epsilon
        self.q_table = defaultdict(lambda: defaultdict(float))

    def choose_action(self, state, position):
        state_key = (tuple(state.values.flatten()), tuple(position))
        
        if random.uniform(0, 1) < self.epsilon:
            return random.choice(self.env.get_possible_actions(position[1]))
        else:
            col = position[1]
            actions = self.env.get_possible_actions(col)
            q_values = {a: self.q_table[state_key][a] for a in actions}
            return max(q_values, key=q_values.get)

    def learn(self, state, action, reward, next_state, position):
        state_key = (tuple(state.values.flatten()), tuple(position))
        next_state_key = (tuple(next_state.values.flatten()), tuple(position))

        q_predict = self.q_table[state_key][action]
        q_target = reward + self.gamma * max(self.q_table[next_state_key].values(), default=0)
        self.q_table[state_key][action] += self.alpha * (q_target - q_predict)

    def train(self, episodes=1000):
        for _ in range(episodes):
            state = self.env.reset()
            done = False
            while not done:
                position = random.choice(self.env.missing_indices)
                action = self.choose_action(state, position)
                next_state, reward, done = self.env.step(action, position)
                self.learn(state, action, reward, next_state, position)
                state = next_state

In [61]:
# Load data and train the agent
incomplete_data_path = 'data/toy_dataset_missing.csv'
complete_data_path = 'data/toy_dataset.csv'

incomplete_data = pd.read_csv(incomplete_data_path)
complete_data = pd.read_csv(complete_data_path)

incomplete_data.replace("?", np.nan, inplace=True)
complete_data.replace("?", np.nan, inplace=True) # we shouldn't really have missing data here but wtvr

In [62]:
print(complete_data.columns)

Index(['Col 1', 'Col 2', 'Col 3', 'Col 4'], dtype='object')


In [63]:

#Optional: Scale the data
scaler = MinMaxScaler()
incomplete_data = pd.DataFrame(scaler.fit_transform(incomplete_data), columns=incomplete_data.columns)
complete_data = pd.DataFrame(scaler.transform(complete_data), columns=complete_data.columns)

env = ImputationEnvironment(incomplete_data, complete_data)
agent = QLearningAgent(env)

agent.train(episodes=1000)

In [64]:
# # Example calls
# col_index = 3  # Example of integer index
# col_name = 'Col 4'  # Example of column name

# actions_by_index = env.get_possible_actions(col_index)
# actions_by_name = env.get_possible_actions(col_name)
# print(actions_by_index)
# print(actions_by_name)

[1.   0.83 0.33 0.5  0.67 0.   0.7 ]
[1.   0.83 0.33 0.5  0.67 0.   0.7 ]


In [65]:
# Imputed data
imputed_data = env.state
print(imputed_data)

   Col 1  Col 2  Col 3  Col 4
0   0.17   0.26   0.57   1.00
1   0.50   0.53   0.00   0.83
2   0.83   0.00   0.57   0.00
3   0.17   0.26   0.87   0.50
4   1.00   0.53   0.57   0.67
5   0.17   0.84   0.86   0.00
6   0.85   0.26   0.17   0.83
7   0.03   1.00   0.71   1.00
8   0.17   0.13   0.86   0.83
9   0.00   0.26   1.00   0.70
