In [1]:
import random
import pandas as pd
import numpy as np

# Define maintenance activity types

In [2]:
# Maintenance activity type information. The key is the type number, and the value is a list containing the type name and estimated duration. 3, 4, and 8 are major maintenance activities, and the others are routine maintenance activities.
maintenance_activity_type_info = {
    1: ["Gage correction", 20],
    2: ["Bolt fasteners oiling", 15],
    3: ["Tamping", 50],
    4: ["Grouped turnout replacement", 60],
    5: ["Fence repairs", 40],
    6: ["Precise track alignment", 40],
    7: ["Replacement of rail components", 30],
    8: ["Rail and turnout grinding with track maintenance machines", 50],
    9: ["Ballast adjustment", 20],
    10: ["Replacement of faulty equipment in color light signal", 15],
    11: ["Internal maintenance of electric turnout switch devices", 30],
    12: ["Section track circuit insulation device maintenance", 40],
    13: ["Internal operations of electro-hydraulic turnout switch devices", 30],
    14: ["Balise equipment maintenance", 20],
    15: ["Antenna equipment maintenance", 30],
    16: ["Cleaning and maintenance of overhead contact system insulation components", 30],
    17: ["Replacement of overhead contact system components", 50]
}

# Randomly generate maintenance needs

In [3]:
# Define the major maintenance activity type, including type numbers 3, 4, and 8
major_activity_types = {
    3: 0,
    4: 0,
    8: 0
}

In [4]:
num_of_sections = 6
# Randomly select 3 sections from 1 to 6 as sections with major maintenance activities
major_maintenance_sections = random.sample(range(1, num_of_sections + 1), 3)

Take the relatively balanced distribution of maintenance volume as an example

In [5]:
# Store randomly generated maintenance activities for each section
# Store maintenance activities numbers and information under maintenance volume of 180
balanced_180_maintenance_activities_with_id = {}
# Store maintenance activities numbers and information under maintenance volume of 240
balanced_240_maintenance_activities_with_id = {}
# Under the maintenance volume of 180, a total of 30 maintenance activities are generated in three days in one section.
num_of_activities_180 = 30
# Under the maintenance volume of 240, a total of 40 maintenance activities are generated in three days in one section.
num_of_activities_240 = 40
# Record the number of each type of maintenance activity generated in each section
section_maintenance_activities_num_reporter = {}
# Total maintenance activity counter under maintenance volume of 180
total_180_maintenance_needs = 0
# Total maintenance activity counter under maintenance volume of 240
total_240_maintenance_needs = 0

# Randomly generate maintenance needs for each section, and the requirement is to gradually fill the maintenance needs, first fill the maintenance volume of 180, then fill the maintenance volume of 240
# Each maintenance activity is assigned a number and filled with information, including the located section, activity type, start date of the flexible scheduling range, end date of the flexible scheduling range, fixed scheduling date, estimated duration, maintenance type (1 for major maintenance, 2 for routine maintenance based on flexible scheduling range, 3 for routine maintenance based on maintenance threshold), and importance level (1 for normal, 2 for important, 3 for very important).
# 1. Assign fixed scheduling date. For routine maintenance activities, first evenly distribute the fixed scheduling dates of them and randomly select one from 1, 2, and 3 as the fixed scheduling date. For each major maintenance activity, since it is required to be scheduled every day within the planning horizon, its fixed scheduling date is set to 0, indicating that there is no fixed scheduling date.
# 2. Assign importance level. There is a 50% probability of 1, a 30% probability of 2, and a 20% probability of 3. For major maintenance activities, the importance level can only be 3.
# 3.According to the fixed scheduling date and importance level, the earliest and latest dates of the flexible scheduling range of routine maintenance activities are allocated. If its fixed scheduling date is 1, then according to the importance level, if the importance level is 1, its scheduling range is 1 to 3, if the importance level is 2, its scheduling range is 1 to 2, and if the importance level is 3, the scheduling range can only be 1; if its fixed scheduling date is 2, then according to the importance level, if the importance level is 1, its scheduling range is 1 to 3, if the importance level is 2, its scheduling range is 1 to 2 or 2 to 3, if the importance level is 3, its scheduling range can only be 2; if its fixed scheduling date is 3, then according to the importance level, if the importance level is 1, its scheduling range is 1 to 3, if the importance level is 2, its scheduling range is 2 to 3 or 2 to 3, if the importance level is 3, its scheduling range can only be 3; for major maintenance activities, its scheduling range is 1 to 3, that is, throughout the entire planning horizon.
# 4.Assign maintenance type. For major maintenance activities with types 3, 4, and 8, the maintenance type can only be 1. For routine maintenance activities with other types, if the flexible scheduling range is only 1, 1 to 2, or 1 to 3, the maintenance type has a 20% probability of being 2 and an 80% probability of being 3. Otherwise, the maintenance type can only be 2, where 1 is the major maintenance, 2 is the routine maintenance based on the flexible scheduling range, and 3 is the routine maintenance based on the maintenance threshold.
for section_id in range(1, num_of_sections + 1):
    section_maintenance_activities_num_reporter[section_id] = {}
    # Section maintenance activity counter
    section_maintenance_needs = 0
    # If the section is a section with major maintenance activity, a type number is randomly selected from 3, 4 or 8 as the activity type.
    if section_id in major_maintenance_sections:
        # Randomly select a major maintenance type number
        available_major_activity_types = [key for key, value in major_activity_types.items() if value == 0]
        selected_major_activity_type = random.choice(available_major_activity_types)
        major_activity_types[selected_major_activity_type] = 1  # Set the value of the selected major maintenance activity type number to 1, indicating that it has been set.
        # Set maintenance activity information
        flexible_schedule_start_date = 1
        flexible_schedule_end_date = 3
        fixed_schedule_date = 0
        estimated_duration = maintenance_activity_type_info[selected_major_activity_type][1]
        maintenance_type = 1
        importance_level = 3
        total_180_maintenance_needs += 1
        total_240_maintenance_needs += 1
        section_maintenance_needs += 1
        balanced_180_maintenance_activities_with_id[total_180_maintenance_needs] = [section_id, selected_major_activity_type, flexible_schedule_start_date, flexible_schedule_end_date, fixed_schedule_date, estimated_duration, maintenance_type, importance_level]
        balanced_240_maintenance_activities_with_id[total_240_maintenance_needs] = [section_id, selected_major_activity_type, flexible_schedule_start_date, flexible_schedule_end_date, fixed_schedule_date, estimated_duration, maintenance_type, importance_level]
        section_maintenance_activities_num_reporter[section_id][selected_major_activity_type] = 1
    while section_maintenance_needs < num_of_activities_240:
        # Randomly select a routine maintenance activity type number
        available_routine_activity_types = [key for key in maintenance_activity_type_info.keys() if key != 3 and key != 4 and key != 8]
        activity_type = random.choice(available_routine_activity_types)
        maintenance_needs_increasing_indicator = False
        # Check if the activity type is not in 'section_maintenance_activities_num_reporter[section_id]'
        if activity_type not in section_maintenance_activities_num_reporter[section_id]:
            section_maintenance_activities_num_reporter[section_id][activity_type] = 1
            section_maintenance_needs += 1
            maintenance_needs_increasing_indicator = True
        # If less than 3, increase a maintenance activity of this type
        elif section_maintenance_activities_num_reporter[section_id][activity_type] < 3:
            section_maintenance_activities_num_reporter[section_id][activity_type] += 1
            section_maintenance_needs += 1
            maintenance_needs_increasing_indicator = True
        else:
            continue
        
        # If a routine maintenance activity are added, that is, 'maintenance_needs_increasing_indicator' is True, then the maintenance activity information will continue to be generated. Otherwise, there is no need to generate maintenance activity information.
        if not maintenance_needs_increasing_indicator:
            continue
        # Generate the information of the newly added routine maintenance activity
        # 1. Randomly select a fixed scheduling date from 1, 2, or 3
        fixed_schedule_date = random.choice([1, 2, 3])

        # 2. Randomly determine the importance level
        random_value = random.random()
        if random_value < 0.5:  # 50% probability of normal
            importance_level = 1
        elif random_value < 0.8:  # 30% probability of important
            importance_level = 2
        else:  # 20% probability of very important
            importance_level = 3

        # 3.According to the fixed scheduling date and importance level, determine the earliest and latest dates of the flexible scheduling range
        if fixed_schedule_date == 1:
            if importance_level == 1:
                flexible_schedule_range = [1, 3]
            elif importance_level == 2:
                flexible_schedule_range = [1, 2]
            else:  # importance_level == 3
                flexible_schedule_range = [1, 1]
        elif fixed_schedule_date == 2:
            if importance_level == 1:
                flexible_schedule_range = [1, 3]
            elif importance_level == 2:
                if random.choice([True, False]):
                    flexible_schedule_range = [1, 2]
                else:
                    flexible_schedule_range = [2, 3]
            else:  # importance_level == 3
                flexible_schedule_range = [2, 2]
        else:  # fixed_schedule_date == 3
            if importance_level == 1:
                flexible_schedule_range = [1, 3]
            elif importance_level == 2:
                flexible_schedule_range = [2, 3]
            else:  # importance_level == 3
                flexible_schedule_range = [3, 3]
        flexible_schedule_start_date = flexible_schedule_range[0]  # The start date of the flexible scheduling range
        flexible_schedule_end_date = flexible_schedule_range[1]  # The end date of the flexible scheduling range

        # 4.According to the flexible scheduling range, determine the maintenance type
        if flexible_schedule_range == [1, 1] or flexible_schedule_range == [1, 2] or flexible_schedule_range == [1, 3]:
            # 20% probability of being routine maintenance based on flexible scheduling range, 80% probability of being routine maintenance based on maintenance threshold
            if random.choice([True, False, False, False, False]):
                maintenance_type = 2  # Routine maintenance based on flexible scheduling range
            else:
                maintenance_type = 3  # Routine maintenance based on maintenance threshold
        else:
            maintenance_type = 2  # Routine maintenance based on flexible scheduling range

        if section_maintenance_needs <= num_of_activities_180:
            total_180_maintenance_needs += 1
            total_240_maintenance_needs += 1
            balanced_180_maintenance_activities_with_id[total_180_maintenance_needs] = [section_id, activity_type, flexible_schedule_start_date, flexible_schedule_end_date, fixed_schedule_date, maintenance_activity_type_info[activity_type][1], maintenance_type, importance_level]
            balanced_240_maintenance_activities_with_id[total_240_maintenance_needs] = [section_id, activity_type, flexible_schedule_start_date, flexible_schedule_end_date, fixed_schedule_date, maintenance_activity_type_info[activity_type][1], maintenance_type, importance_level]
        else:
            total_240_maintenance_needs += 1
            balanced_240_maintenance_activities_with_id[total_240_maintenance_needs] = [section_id, activity_type, flexible_schedule_start_date, flexible_schedule_end_date, fixed_schedule_date, maintenance_activity_type_info[activity_type][1], maintenance_type, importance_level]

# Save maintenance activity information as .csv files

### The maintenance volume of 180

In [6]:
balanced_180_maintenance_needs_df = pd.DataFrame.from_dict(balanced_180_maintenance_activities_with_id, orient='index', columns=['Section', 'Activity Type', 'Start Date', 'End Date', 'Fixed Scheduling Date', 'Estimated Duration', 'Maintenance Type', 'Importance Level'])
balanced_180_maintenance_needs_df.to_csv('balanced_180_maintenance_volume.csv', index_label='Activity ID')

### The maintenance volume of 240

In [7]:
balanced_240_maintenance_needs_df = pd.DataFrame.from_dict(balanced_240_maintenance_activities_with_id, orient='index', columns=['Section', 'Activity Type', 'Start Date', 'End Date', 'Fixed Scheduling Date', 'Estimated Duration', 'Maintenance Type', 'Importance Level'])
balanced_240_maintenance_needs_df.to_csv('balanced_240_maintenance_volume.csv', index_label='Activity ID')

# 生成不同列车数据

In [8]:
num_of_passenger_trains_per_day = 8
# Passenger train departure time list
passenger_train_departure_times = [30, 90, 150, 210, 615, 645, 675, 705]  # min
# Passenger train dwell time list
passenger_train_stop_times = [0, 0, 8, 0, 15]  # min

In [9]:
# There are three train volumes, 30, 40 and 50 respectively. After deducting the number of passenger trains that must operate every day (8), the number of freight trains per day is 22, 32 and 42 respectively.
# First, define the freight train information under train volume of 50, and the train information under other train volumes can be deleted from them
# Then randomly generate the stop plan for 42 freight trains

def generate_freight_train_stop_matrix(num_trains=42, num_stations=5, min_stops=1):
    stop_matrix = np.zeros((num_trains, num_stations))

    available_stations = [i for i in range(num_stations) if i not in [2, 4]]

    for train in range(num_trains):
        num_stops = np.random.randint(min_stops, len(available_stations) + 1)
        stop_stations = np.random.choice(available_stations, size=num_stops, replace=False)
        stop_matrix[train, stop_stations] = 3

    return stop_matrix

freight_train_stop_matrix = generate_freight_train_stop_matrix()

In [10]:
# Generate a train data dictionary, where the key is the train number and the value is a list containing the train type (1 for passenger train, 2 for freight train), train speed type (160 for 1, 120 for 2), departure time window (the earliest and latest departure times for passenger trains are the corresponding numbers in 'passenger_train_departure_times'), and stop plan (if it is a passenger train, it is 'passenger_train_stop_times'; if it is a freight train, it is a list, and the rows in 'freight_train_stop_matrix' are stored in order)
# In the train data dictionary, the first 8 trains are passenger trains, and the rest are freight trains. The departure time window of freight trains 1 to 11 (a total of 11 trains) is 18:00-21:00, the departure time window of freight trains 12 to 22 (a total of 11 trains) is 21:00-0:00(+1), the departure time window of freight trains 23 to 34 (a total of 12 trains) is 0:00(+1)-3:00(+1), and the departure time window of freight trains 35 to 42 (a total of 8 trains) is 3:00(+1)-6:00(+1).
train_data = {}
# Add passenger train data
for i in range(1, num_of_passenger_trains_per_day + 1):
    train_data[i] = [1, 1, [passenger_train_departure_times[i - 1], passenger_train_departure_times[i - 1]], passenger_train_stop_times]
# Add freight train data
for i in range(num_of_passenger_trains_per_day + 1, num_of_passenger_trains_per_day + 1 + 42):
    # Departure time range for freight trains
    if i <= num_of_passenger_trains_per_day + 11:
        departure_time_range = [0, 180]
    elif i <= num_of_passenger_trains_per_day + 22:
        departure_time_range = [180, 360]
    elif i <= num_of_passenger_trains_per_day + 34:
        departure_time_range = [360, 540]
    else:
        departure_time_range = [540, 720]
    # For freight trains, randomly select a speed type, where 1 represents a speed of 160 km/h and 2 represents a speed of 120 km/h. The probability of selecting speed type 1 is 75%, and the probability of selecting speed type 2 is 25%.
    speed_type = 1 if random.random() < 0.75 else 2
    train_data[i] = [2, speed_type, departure_time_range, freight_train_stop_matrix[i - num_of_passenger_trains_per_day - 1].tolist()]

In [11]:
train_df = pd.DataFrame.from_dict(train_data, orient='index', columns=['Train Type', 'Speed Type', 'Departure Time Range', 'Stop Scheme'])
train_df.to_csv('reduced_train_data_50.csv', index_label='Train ID')