In [None]:
import pandas as pd

In [None]:
# Read agents and states
agents = pd.read_pickle("../../../data/agent_df_base_res_national_load_adjusted.pkl").reset_index(drop=False)
states = pd.read_csv("../../../state_input_csvs/states.csv", header = None, names = ['state_abbr', 'state_name'])

# Join to state name
agents = agents.merge(states, on='state_abbr', how='left')

# Sort by number of agents per state
sorted = agents.groupby(['state_abbr', 'state_name'], as_index = False).agg(agent_count=('agent_id', 'count')).sort_values(by='agent_count', ascending=False)

In [None]:
# Define state categories by agent count
large_states = sorted[sorted['agent_count'] > 1000]
mid_large_states = sorted[(sorted['agent_count'] > 500) & (sorted['agent_count'] <= 1000)]
mid_states = sorted[(sorted['agent_count'] > 100) & (sorted['agent_count'] <= 500)]
small_states = sorted[sorted['agent_count'] <= 100]

# Write states to CSV
large_states[['state_abbr', 'state_name']].to_csv("../../../state_input_csvs/large_states.csv", index=False, header=False)
mid_large_states[['state_abbr', 'state_name']].to_csv("../../../state_input_csvs/mid_large_states.csv", index=False, header=False)
mid_states[['state_abbr', 'state_name']].to_csv("../../../state_input_csvs/mid_states.csv", index=False, header=False)
small_states[['state_abbr', 'state_name']].to_csv("../../../state_input_csvs/small_states.csv", index=False, header=False)

# Write test CSVs
large_states[['state_abbr', 'state_name']].sample(n=4, random_state=42).to_csv("../../../state_input_csvs/large_states_test.csv", index=False, header=False)
mid_large_states[['state_abbr', 'state_name']].sample(n=12, random_state=42).to_csv("../../../state_input_csvs/mid_large_states_test.csv", index=False, header=False)
mid_states[['state_abbr', 'state_name']].sample(n=27, random_state=42).to_csv("../../../state_input_csvs/mid_states_test.csv", index=False, header=False)
small_states[['state_abbr', 'state_name']].sample(n=5, random_state=42).to_csv("../../../state_input_csvs/small_states_test.csv", index=False, header=False)

# Overall states
sorted[['state_abbr', 'state_name']].to_csv("../../../state_input_csvs/states.csv", index=False, header=False)
sorted[['state_abbr', 'state_name']].sample(n=10, random_state=42).to_csv("../../../state_input_csvs/states_test.csv", index=False, header=True)


In [None]:
# Upload to GCE
!gsutil cp ../../../state_input_csvs/large_states.csv gs://dgen-assets/large_states.csv
!gsutil cp ../../../state_input_csvs/mid_large_states.csv gs://dgen-assets/mid_large_states.csv
!gsutil cp ../../../state_input_csvs/mid_states.csv gs://dgen-assets/mid_states.csv
!gsutil cp ../../../state_input_csvs/small_states.csv gs://dgen-assets/small_states.csv

!gsutil cp ../../../state_input_csvs/large_states_test.csv gs://dgen-assets/large_states_test.csv
!gsutil cp ../../../state_input_csvs/mid_large_states_test.csv gs://dgen-assets/mid_large_states_test.csv
!gsutil cp ../../../state_input_csvs/mid_states_test.csv gs://dgen-assets/mid_states_test.csv
!gsutil cp ../../../state_input_csvs/small_states_test.csv gs://dgen-assets/small_states_test.csv

!gsutil cp ../../../state_input_csvs/states.csv gs://dgen-assets/states.csv
!gsutil cp ../../../state_input_csvs/states_test.csv gs://dgen-assets/states_test.csv
