In [17]:
import matplotlib as plt
import pandas as pd
import numpy as np
from datetime import datetime

In [18]:
# Read the required columns along with 'QUEUE_NAME' from the CSV file
columns_to_read = ['QUEUED_TIMESTAMP', 'RUNTIME_SECONDS', 'WALLTIME_SECONDS', 'NODES_REQUESTED', 'QUEUE_NAME', 'QUEUED_WAIT_SECONDS', 'COBALT_JOBID']
df = pd.read_csv('ANL-ALCF-DJC-THETA_20180101_20181231.csv', usecols=columns_to_read)

# Filter rows where QUEUE_NAME is either 'default' or 'backfill'
df = df[df['QUEUE_NAME'].isin(['default'])]
df.sort_values(by='COBALT_JOBID', ascending=True, inplace=True)

# Convert the 'QUEUED_TIMESTAMP' column to datetime objects
df['QUEUED_TIMESTAMP'] = pd.to_datetime(df['QUEUED_TIMESTAMP'])

# Get the QUEUED_TIMESTAMP of the first job
first_queued_timestamp = df.iloc[0, 'QUEUED_TIMESTAMP']

# Initialize an empty list to store the formatted rows
formatted_rows = []

# Iterate through the DataFrame to format each row
for index, row in df.iterrows():
    # Calculate the job submit time in seconds
    job_submit_time = (row['QUEUED_TIMESTAMP'] - first_queued_timestamp).total_seconds()
    
    # Format the row according to the specifications
    formatted_row = [
        index,
        int(job_submit_time),
        row['QUEUED_WAIT_SECONDS'],
        row['RUNTIME_SECONDS'],
        row['NODES_REQUESTED'],
        -1, -1,
        row['NODES_REQUESTED'],
        row['WALLTIME_SECONDS'],
        -1, 0, -1, -1, -1, -1, -1, -1, -1, 0
    ]
    formatted_rows.append(formatted_row)

# Convert the list of formatted rows to a DataFrame
formatted_df = pd.DataFrame(formatted_rows, dtype=int)

# Write the DataFrame to a new file named 'theta.swf'
output_file_path = 'theta_2018.swf'
formatted_df.to_csv(output_file_path, header=False, index=False, sep=' ')

header_lines = [
    "; UnixStartTime: 1514764835",
    "; MaxNodes: 4360",
    "; MaxProcs: 4360"
]

# Read the existing content of the file
with open(output_file_path, 'r') as f:
    existing_content = f.read()

# Combine the header lines and existing content
new_content = '\n'.join(header_lines) + '\n' + existing_content

# Write the new content back to the file
with open(output_file_path, 'w') as f:
    f.write(new_content)
