In [1]:
import csv
import random
import os
from datetime import datetime

# 🔧 Cấu hình
MAX_OUTPUT_ROWS = 1000000  # Tổng số dòng muốn sinh ra
PASSENGER_FILE = "level_1/Passenger.csv"
BOOKING_FILE = "level_3/Booking.csv"
OUTPUT_FILE = "level_4/BookingPassenger.csv"

# 📥 Load dữ liệu Booking & Passenger
with open(BOOKING_FILE, newline='') as f:
    bookings = list(csv.DictReader(f))

with open(PASSENGER_FILE, newline='') as f:
    passengers = list(csv.DictReader(f))

# 📦 Sinh dữ liệu BookingPassenger
booking_passenger_records = []
total_generated = 0

for booking in bookings:
    if total_generated >= MAX_OUTPUT_ROWS:
        break

    booking_id = booking["BookingID"]
    num_passengers = random.randint(1, 5)

    selected_passengers = random.sample(passengers, num_passengers)
    primary_contact = random.choice(selected_passengers)["PassengerID"]

    for passenger in selected_passengers:
        booking_passenger_records.append({
            "BookingID": booking_id,
            "PassengerID": passenger["PassengerID"],
            "IsPrimaryContact": "TRUE" if passenger["PassengerID"] == primary_contact else "FALSE",
            "CreatedAt": int(datetime.utcnow().timestamp())
        })
        total_generated += 1

        if total_generated >= MAX_OUTPUT_ROWS:
            break

# 💾 Ghi file CSV
os.makedirs(os.path.dirname(OUTPUT_FILE), exist_ok=True)
with open(OUTPUT_FILE, mode="w", newline='') as f:
    writer = csv.DictWriter(f, fieldnames=["BookingID", "PassengerID", "IsPrimaryContact", "CreatedAt"])
    writer.writeheader()
    writer.writerows(booking_passenger_records)

print(f"✅ Đã ghi {len(booking_passenger_records)} dòng vào {OUTPUT_FILE}")


  "CreatedAt": int(datetime.utcnow().timestamp())


✅ Đã ghi 1000000 dòng vào level_4/BookingPassenger.csv


In [4]:
import csv

input_file = "level_3/OperationalFlightLeg_full.csv"
output_file = "level_3/LegID_OperatingAirlineID.csv"

# Mở file input và output
with open(input_file, newline='', encoding='utf-8') as infile, \
     open(output_file, mode='w', newline='', encoding='utf-8') as outfile:
    
    reader = csv.DictReader(infile)
    writer = csv.DictWriter(outfile, fieldnames=['LegID', 'OperatingAirlineID'])
    
    writer.writeheader()
    for row in reader:
        writer.writerow({
            'LegID': row['LegID'],
            'OperatingAirlineID': row['OperatingAirlineID']
        })

print(f"✅ Đã tạo file mới: {output_file}")


✅ Đã tạo file mới: level_3/LegID_OperatingAirlineID.csv


In [6]:
import csv

input_file = "level_2/crewMem.csv"
output_file = "level_2/short_crewMem.csv"

# Mở file input và output
with open(input_file, newline='', encoding='utf-8') as infile, \
     open(output_file, mode='w', newline='', encoding='utf-8') as outfile:
    
    reader = csv.DictReader(infile)
    writer = csv.DictWriter(outfile, fieldnames=['CrewMemberID', 'Role', 'AirlineID'])
    
    writer.writeheader()
    for row in reader:
        writer.writerow({
            'CrewMemberID': row['CrewMemberID'],
            'Role': row['Role'],
            'AirlineID': row['AirlineID']
        })

print(f"✅ Đã tạo file mới: {output_file}")

✅ Đã tạo file mới: level_2/short_crewMem.csv


In [15]:
import csv
import random
import os
from datetime import datetime

# ==== CẤU HÌNH ====
MAX_OUTPUT_ROWS = 1000000
CHUNK_SIZE = 500
OUTPUT_DIR = "level_4/split_FlightCrewAssignment"

# ==== ĐỌC DỮ LIỆU ====
def read_csv_to_dictlist(path):
    with open(path, newline='', encoding='utf-8') as f:
        return list(csv.DictReader(f))

legs = read_csv_to_dictlist("level_3/LegID_OperatingAirlineID.csv")
crew = read_csv_to_dictlist("level_2/short_crewMem.csv")

# ==== TÁCH PHI HÀNH ĐOÀN THEO ROLE ====
crew_by_role = {'Captain': [], 'First Officer': [], 'Purser': [], 'Flight Attendant': []}
for member in crew:
    role = member['Role']
    if role in crew_by_role:
        crew_by_role[role].append(member)

# ==== GÁN PHI HÀNH ĐOÀN ====
assignments = []
roles = ['Captain', 'First Officer', 'Purser', 'Flight Attendant']

for leg in legs:
    if len(assignments) >= MAX_OUTPUT_ROWS:
        break

    leg_id = leg['LegID']
    airline_id = leg['OperatingAirlineID']
    used_crew_ids = set()
    leg_assignments = []

    # Gán ít nhất 1 phi công
    pilot_roles = ['Captain', 'First Officer']
    pilot_assigned = False
    for role in pilot_roles:
        available = [c for c in crew_by_role[role] if c['AirlineID'] == airline_id and c['CrewMemberID'] not in used_crew_ids]
        if available:
            selected = random.choice(available)
            crew_id = selected['CrewMemberID']
            used_crew_ids.add(crew_id)
            leg_assignments.append({
                'OperationalFlightLegID': leg_id,
                'CrewMemberID': crew_id,
                'AssignedRole': role,
                'CreatedAt': int(datetime.utcnow().timestamp())
            })
            pilot_assigned = True
            break  # Chỉ cần 1 phi công là đủ

    if pilot_assigned:
        # Có thể gán thêm thành viên khác nếu muốn (ví dụ tiếp viên)
        for role in ['Purser', 'Flight Attendant']:
            available = [c for c in crew_by_role[role] if c['AirlineID'] == airline_id and c['CrewMemberID'] not in used_crew_ids]
            if available:
                selected = random.choice(available)
                crew_id = selected['CrewMemberID']
                used_crew_ids.add(crew_id)
                leg_assignments.append({
                    'OperationalFlightLegID': leg_id,
                    'CrewMemberID': crew_id,
                    'AssignedRole': role,
                    'CreatedAt': int(datetime.utcnow().timestamp())
                })

        assignments.extend(leg_assignments)

# ==== GHI FILE CSV THEO TỪNG KHỐI 500 DÒNG ====
os.makedirs(OUTPUT_DIR, exist_ok=True)

total_written = 0

for i in range(0, len(assignments), CHUNK_SIZE):
    chunk = assignments[i:i+CHUNK_SIZE]
    filename = os.path.join(OUTPUT_DIR, f"FlightCrewAssignment_{str(i // CHUNK_SIZE + 1).zfill(3)}.csv")
    
    with open(filename, mode='w', newline='', encoding='utf-8') as f:
        writer = csv.DictWriter(f, fieldnames=['OperationalFlightLegID', 'CrewMemberID', 'AssignedRole', 'CreatedAt'])
        writer.writeheader()
        writer.writerows(chunk)

    total_written += len(chunk)
    print(f"✅ Ghi {len(chunk)} dòng vào {filename} — 📦 Tổng cộng đã sinh: {total_written} dòng")


  'CreatedAt': int(datetime.utcnow().timestamp())
  'CreatedAt': int(datetime.utcnow().timestamp())


✅ Ghi 500 dòng vào level_4/split_FlightCrewAssignment\FlightCrewAssignment_001.csv — 📦 Tổng cộng đã sinh: 500 dòng
✅ Ghi 500 dòng vào level_4/split_FlightCrewAssignment\FlightCrewAssignment_002.csv — 📦 Tổng cộng đã sinh: 1000 dòng
✅ Ghi 500 dòng vào level_4/split_FlightCrewAssignment\FlightCrewAssignment_003.csv — 📦 Tổng cộng đã sinh: 1500 dòng
✅ Ghi 500 dòng vào level_4/split_FlightCrewAssignment\FlightCrewAssignment_004.csv — 📦 Tổng cộng đã sinh: 2000 dòng
✅ Ghi 500 dòng vào level_4/split_FlightCrewAssignment\FlightCrewAssignment_005.csv — 📦 Tổng cộng đã sinh: 2500 dòng
✅ Ghi 500 dòng vào level_4/split_FlightCrewAssignment\FlightCrewAssignment_006.csv — 📦 Tổng cộng đã sinh: 3000 dòng
✅ Ghi 500 dòng vào level_4/split_FlightCrewAssignment\FlightCrewAssignment_007.csv — 📦 Tổng cộng đã sinh: 3500 dòng
✅ Ghi 500 dòng vào level_4/split_FlightCrewAssignment\FlightCrewAssignment_008.csv — 📦 Tổng cộng đã sinh: 4000 dòng
✅ Ghi 500 dòng vào level_4/split_FlightCrewAssignment\FlightCrewAssignmen