In [None]:

# Aircraft Table
def generate_aircraft(num_records):
    return [{
        'RegistrationNumber': unique_value(unique_registration, lambda: faker.bothify(text='??-####').upper()),
        'ICAOTypeCode': random.choice(['A321', 'B789', 'B737', 'A380']),
        'Model': random.choice(['Airbus A321', 'Boeing 787', 'Boeing 737', 'Airbus A380']),
        'Manufacturer': random.choice(['Airbus', 'Boeing']),
        'Capacity': random.randint(100, 400),
        'Status': random.choice(['Active', 'Maintenance']),
        'OwnerAirlineID': None, # Placeholder for FK to Airline
        'DefaultConfigurationCode': faker.bothify(text='CFG####').upper(),
        'CreatedAt': generate_timestamp(),
        'UpdatedAt': generate_timestamp()
    } for _ in range(num_records)]

# Passenger Table
def generate_passenger(num_records):
    return [{
        'FirstName': faker.first_name(),
        'LastName': faker.last_name(),
        'DateOfBirth': faker.date_of_birth().strftime('%Y-%m-%d'),
        'Gender': random.choice(['Male', 'Female']),
        'Email': unique_value(unique_email, lambda: faker.user_name() + '@' + random.choice(domains)),
        'PhoneNumber': faker.phone_number(),
        'PassportNumber': faker.bothify(text='??######').upper(),
        'Nationality': faker.country(),
        'PassengerType': random.choice(['ADT', 'CHD', 'INF']),
        'CreatedAt': generate_timestamp(),
        'UpdatedAt': generate_timestamp()
    } for _ in range(num_records)]

# CrewMember Table
def generate_crew_member(num_records):
    return [{
        'EmployeeNumber': unique_value(unique_registration, lambda: faker.bothify(text='EMP####')),
        'FirstName': faker.first_name(),
        'LastName': faker.last_name(),
        'Role': random.choice(['Captain', 'First Officer', 'Flight Attendant']),
        'AirlineID': None, # Placeholder for FK to Airline
        'LicenseNumber': faker.bothify(text='LIC#####'),
        'IsActive': random.choice([True, False]),
        'CreatedAt': generate_timestamp(),
        'UpdatedAt': generate_timestamp()
    } for _ in range(num_records)]

# Main function to generate data
if __name__ == '__main__':
    num_records = int(input('Enter the number of records to generate: '))

    write_to_csv('airports.csv', generate_airport(1)[0].keys(), generate_airport(num_records))
    write_to_csv('airlines.csv', generate_airline(1)[0].keys(), generate_airline(num_records))
    write_to_csv('aircrafts.csv', generate_aircraft(1)[0].keys(), generate_aircraft(num_records))
    write_to_csv('passengers.csv', generate_passenger(1)[0].keys(), generate_passenger(num_records))
    write_to_csv('crew_members.csv', generate_crew_member(1)[0].keys(), generate_crew_member(num_records))


In [26]:
from faker import Faker
import random
import time
import csv
import pandas as pd
import string 



In [27]:
faker = Faker()
# Load data from CSV
def load_csv(filename):
    return pd.read_csv(filename)
# Generate Unix timestamp
def generate_timestamp():
    return int(time.time())
# Write data to CSV
def write_to_csv(filename, fieldnames, data):
    with open(filename, mode='a', newline='', encoding='utf-8') as file:
        writer = csv.DictWriter(file, fieldnames=fieldnames)
        if file.tell() == 0:
            writer.writeheader()
        writer.writerows(data)
# Function to generate nullable values
def nullable_value(generator, null_probability=0.2):
    if random.random() < null_probability:
        return None
    return generator()
# Unique value generator
def unique_value(existing_set, generator):
    value = generator()
    while value in existing_set:
        value = generator()
    existing_set.add(value)
    return value

In [51]:

unique_iata = set()
unique_icao = set()
unique_airport_id = set()
unique_airline_id = set()
unique_aircraft_id = set()
unique_registration = set()
unique_email = set()
unique_passenger_id = set()
unique_crew_member_id = set()   

In [4]:

# Generate AirportID 
def generate_airport_id():
    # Tạo số nguyên từ 1 đến 99,999,999 và định dạng thành chuỗi 8 chữ số
    return f"{random.randint(1, 99999999):08d}"

# Generate Airport data
def generate_airport(num_records):
    return [{
        'AirportID': unique_value(unique_airport_id, generate_airport_id),
        'IATACode': unique_value(unique_iata, lambda: faker.bothify(text='???').upper()),
        'ICAOCode': nullable_value(lambda: unique_value(unique_icao, lambda: faker.bothify(text='????').upper())),
        'Name': faker.company() + ' Airport',
        'City': nullable_value(lambda: faker.city()),
        'Country': nullable_value(lambda: faker.country()),
        'TimeZone': nullable_value(lambda: faker.timezone()),
        'CreatedAt': nullable_value(lambda: generate_timestamp()),
        'UpdatedAt': nullable_value(lambda: generate_timestamp())
    } for _ in range(num_records)]
num_records = int(input('Enter the number of records to generate: '))
# Generate and write data
write_to_csv('airports.csv', generate_airport(1)[0].keys(), generate_airport(num_records))

In [29]:
# Generate Airlines
def generate_airline_id():
    # Tạo ID từ chữ hoa và số, độ dài 8 ký tự
    return ''.join(random.choices(string.ascii_uppercase + string.digits, k=8))
def generate_airline(num_records):
    return [{
        'AirlineID': unique_value(unique_airline_id, generate_airline_id),
        'IATACode': unique_value(unique_iata, lambda: faker.bothify(text='??').upper()),
        'ICAOCode': nullable_value(lambda: unique_value(unique_icao, lambda: faker.bothify(text='???').upper())),
        'Name': faker.company() + ' Airlines',
        'Country': nullable_value(lambda: faker.country()),
        'CreatedAt': nullable_value(lambda: generate_timestamp()),
        'UpdatedAt': nullable_value(lambda: generate_timestamp())
    } for _ in range(num_records)]
num_records = int(input('Enter the number of records to generate: '))
# Generate and write data
write_to_csv('airlines.csv', generate_airline(1)[0].keys(), generate_airline(num_records))

In [20]:
# Danh sách mã ICAO, tên model và nhà sản xuất được cung cấp
aircraft_data = """
A124, Antonov An-124 Ruslan
A140, Antonov An-140
A148, Antonov An-148
A158, Antonov An-158
A19N, Airbus A319neo
A20N, Airbus A320neo
A21N, Airbus A321neo
A225, Antonov An-225 Mriya
A306, Airbus A300-600
A30B, Airbus A300B2, A300B4 and A300C4
A310, Airbus A310-200
A318, Airbus A318
A319, Airbus A319
A320, Airbus A320
A321, Airbus A321
A332, Airbus A330-200
A333, Airbus A330-300
A337, Airbus A330-700 "BelugaXL"
A338, Airbus A330-800
A339, Airbus A330-900
A342, Airbus A340-200
A343, Airbus A340-300
A345, Airbus A340-500
A346, Airbus A340-600
A359, Airbus A350-900
A35K, Airbus A350-1000
A388, Airbus A380-800
A3ST, Airbus A300-600ST "Super Transporter" / "Beluga"
A400, Airbus A400M Atlas
A748, Hawker Siddeley HS 748
AC90, Gulfstream/Rockwell (Aero) Turbo Commander 690
AJ27, Comac ARJ21-700 / C909
AN12, Antonov An-12
AN24, Antonov An-24
AN26, Antonov An-26
AN28, Antonov An-28
AN30, Antonov An-30
AN32, Antonov An-32
AN72, Antonov An-72 / An-74
AT43, Aerospatiale/Alenia ATR 42-300 / 320
AT45, Aerospatiale/Alenia ATR 42-500
AT46, Aerospatiale/Alenia ATR 42-600
AT72, Aerospatiale/Alenia ATR 72-201/-202
AT73, Aerospatiale/Alenia ATR 72-211/-212
AT75, Aerospatiale/Alenia ATR 72-212A (500)
AT76, Aerospatiale/Alenia ATR 72-212A (600)
ATP, British Aerospace ATP
B190, Beechcraft 1900
B37M, Boeing 737 MAX 7
B38M, Boeing 737 MAX 8
B39M, Boeing 737 MAX 9
B3XM, Boeing 737 MAX 10
"""

import csv
import random

# Hàm sinh sức chứa dựa vào nhà sản xuất
def generate_capacity(manufacturer):
    if manufacturer in ['Airbus', 'Boeing']:
        return random.randint(150, 400)
    elif manufacturer in ['Antonov', 'Lockheed']:
        return random.randint(50, 250)
    elif manufacturer in ['Comac', 'ATR', 'Beechcraft', 'Hawker']:
        return random.randint(30, 100)
    elif manufacturer in ['Gulfstream', 'Dassault', 'Cessna']:
        return random.randint(10, 30)
    else:
        return random.randint(20, 200)

# Dictionary để lưu kết quả
aircraft_lookup = {}

# Xử lý dữ liệu và đưa vào dictionary
for line in aircraft_data.strip().split("\n"):
    parts = line.split(", ")
    if len(parts) == 2:
        icao = parts[0].strip()
        model = parts[1].strip()
        manufacturer = model.split()[0]  # Lấy từ đầu tiên làm tên nhà sản xuất
        capacity = generate_capacity(manufacturer)  # Sinh sức chứa
        aircraft_lookup[icao] = (model, manufacturer, capacity)

# Ghi kết quả ra file CSV
csv_filename = "aircraft_lookup.csv"
with open(csv_filename, mode="w", newline="", encoding="utf-8") as file:
    writer = csv.writer(file)
    writer.writerow(["ICAO", "Model", "Manufacturer", "Capacity"])
    for icao, (model, manufacturer, capacity) in aircraft_lookup.items():
        writer.writerow([icao, model, manufacturer, capacity])

csv_filename



'aircraft_lookup.csv'

In [24]:
# Load aircraft data from the uploaded CSV file
aircraft_data = load_csv('aircraft_lookup.csv')

# Convert the data into a list of tuples (ICAO, Model, Manufacturer)
aircraft_lookup = list(aircraft_data[['ICAO', 'Model', 'Manufacturer', 'Capacity']].itertuples(index=False, name=None))

In [42]:
# Load aircraft data from the uploaded CSV file
airlinesID_data = load_csv('airlines.csv')

# Convert the data into a list of tuples (AirlinesID)
airlines_lookup = list(airlinesID_data[['AirlineID']].itertuples(index=False, name=None))

In [None]:
# Aircraft Table
def generate_aircraft(num_records):
    return [{
        'AircraftID': unique_value(unique_aircraft_id, lambda: faker.bothify(text='AC######')),
        'RegistrationNumber': unique_value(unique_registration, lambda: faker.bothify(text='??-####').upper()),
        'ICAOTypeCode': (icao := random.choice(aircraft_lookup))[0],
        'Model': icao[1],
        'Manufacturer': icao[2],
        'Capacity':icao[3],
        'Status': random.choice(['Active', 'Maintenance']),
        'OwnerAirlineID': (airid := random.choice(airlines_lookup))[0], # Placeholder for FK to Airline
        'DefaultConfigurationCode': faker.bothify(text='CFG####').upper(),
        'CreatedAt': nullable_value(lambda: generate_timestamp()),
        'UpdatedAt': nullable_value(lambda: generate_timestamp())
    } for _ in range(num_records)]
num_records = int(input('Enter the number of records to generate: '))
# Generate and write data
write_to_csv('aircraft.csv', generate_aircraft(1)[0].keys(), generate_aircraft(num_records))
# Passenger Table

In [48]:
domains = ['gmail.com', 'yahoo.com', 'outlook.com', 'hotmail.com', 'icloud.com']
def generate_passenger(num_records):
    return [{
        'PassengerID' : unique_value(unique_passenger_id, lambda: faker.bothify(text='PAX##########')),
        'FirstName': faker.first_name(),
        'LastName': faker.last_name(),
        'DateOfBirth': nullable_value(lambda: faker.date_of_birth().strftime('%Y-%m-%d')),
        'Gender': nullable_value(lambda: random.choice(['Female', 'Male'])),
        'Email': unique_value(unique_email, lambda: faker.user_name() + '@' + random.choice(domains)),
        'PhoneNumber': nullable_value(lambda: faker.phone_number()),
        'PassportNumber': nullable_value(lambda:faker.bothify(text='??########')),
        'Nationality': nullable_value(lambda: faker.country()),
        'CreatedAt': nullable_value( lambda: generate_timestamp()),
        'UpdatedAt': nullable_value(lambda: generate_timestamp())
    } for _ in range(num_records)
    ]
num_records = int(input('Enter the number of records to generate: '))
# Generate and write data
write_to_csv('passenger.csv', generate_passenger(1)[0].keys(), generate_passenger(num_records))


In [52]:
def generate_crewMember(num_records):
    return [{
        'CrewMemberID': unique_value(unique_crew_member_id, lambda: faker.bothify(text='CM########')),
        'FirstName': faker.first_name(),
        'LastName': faker.last_name(),
        'Role': random.choices(
            ['Captain', 'First Officer', 'Purser', 'Flight Attendant'],
            weights=[1, 2, 3, 4],  # Tỷ lệ tương ứng
            k=1  # Chọn 1 giá trị
            )[0],
        'AirlineID': (airid := random.choice(airlines_lookup))[0], 
        'LicenseNumber': faker.bothify(text='LIC#########'),
        'IsActive': random.choice([True, False]),
        'CreatedAt': nullable_value(lambda: generate_timestamp()),
        'UpdatedAt': nullable_value(lambda: generate_timestamp())
    } for _ in range(num_records)]
num_records = int(input('Enter the number of records to generate: '))
# Generate and write data
write_to_csv('crewMem.csv', generate_crewMember(1)[0].keys(), generate_crewMember(num_records))