In [None]:
# Generate Unix timestamp
def generate_timestamp():
    return int(time.time())

# Common domains
domains = ['gmail.com', 'yahoo.com', 'outlook.com', 'hotmail.com', 'icloud.com']

# Write data to CSV
def write_to_csv(filename, fieldnames, data):
    with open(filename, mode='a', newline='', encoding='utf-8') as file:
        writer = csv.DictWriter(file, fieldnames=fieldnames)
        if file.tell() == 0:
            writer.writeheader()
        writer.writerows(data)

# Load data from CSV
def load_csv(filename):
    return pd.read_csv(filename)

# Set to ensure uniqueness
unique_iata = set()
unique_icao = set()
unique_registration = set()
unique_email = set()
unique_id = set()

# Unique value generator
def unique_value(existing_set, generator):
    value = generator()
    while value in existing_set:
        value = generator()
    existing_set.add(value)
    return value

# Airport Table
def generate_airport(num_records):
    return [{
        'AirportID': unique_value(unique_id, lambda: faker.uuid4()),
        'IATACode': unique_value(unique_iata, lambda: faker.bothify(text='???').upper()),
        'ICAOCode': unique_value(unique_icao, lambda: faker.bothify(text='????').upper()),
        'Name': faker.company() + ' Airport',
        'City': faker.city(),
        'Country': faker.country(),
        'TimeZone': faker.timezone(),
        'CreatedAt': generate_timestamp(),
        'UpdatedAt': generate_timestamp()
    } for _ in range(num_records)]

# Airline Table
def generate_airline(num_records):
    return [{
        'IATACode': unique_value(unique_iata, lambda: faker.bothify(text='??').upper()),
        'ICAOCode': unique_value(unique_icao, lambda: faker.bothify(text='???').upper()),
        'Name': faker.company() + ' Airlines',
        'Country': faker.country(),
        'CreatedAt': generate_timestamp(),
        'UpdatedAt': generate_timestamp()
    } for _ in range(num_records)]

# Aircraft Table
def generate_aircraft(num_records):
    return [{
        'RegistrationNumber': unique_value(unique_registration, lambda: faker.bothify(text='??-####').upper()),
        'ICAOTypeCode': random.choice(['A321', 'B789', 'B737', 'A380']),
        'Model': random.choice(['Airbus A321', 'Boeing 787', 'Boeing 737', 'Airbus A380']),
        'Manufacturer': random.choice(['Airbus', 'Boeing']),
        'Capacity': random.randint(100, 400),
        'Status': random.choice(['Active', 'Maintenance']),
        'OwnerAirlineID': None, # Placeholder for FK to Airline
        'DefaultConfigurationCode': faker.bothify(text='CFG####').upper(),
        'CreatedAt': generate_timestamp(),
        'UpdatedAt': generate_timestamp()
    } for _ in range(num_records)]

# Passenger Table
def generate_passenger(num_records):
    return [{
        'FirstName': faker.first_name(),
        'LastName': faker.last_name(),
        'DateOfBirth': faker.date_of_birth().strftime('%Y-%m-%d'),
        'Gender': random.choice(['Male', 'Female']),
        'Email': unique_value(unique_email, lambda: faker.user_name() + '@' + random.choice(domains)),
        'PhoneNumber': faker.phone_number(),
        'PassportNumber': faker.bothify(text='??######').upper(),
        'Nationality': faker.country(),
        'PassengerType': random.choice(['ADT', 'CHD', 'INF']),
        'CreatedAt': generate_timestamp(),
        'UpdatedAt': generate_timestamp()
    } for _ in range(num_records)]

# CrewMember Table
def generate_crew_member(num_records):
    return [{
        'EmployeeNumber': unique_value(unique_registration, lambda: faker.bothify(text='EMP####')),
        'FirstName': faker.first_name(),
        'LastName': faker.last_name(),
        'Role': random.choice(['Captain', 'First Officer', 'Flight Attendant']),
        'AirlineID': None, # Placeholder for FK to Airline
        'LicenseNumber': faker.bothify(text='LIC#####'),
        'IsActive': random.choice([True, False]),
        'CreatedAt': generate_timestamp(),
        'UpdatedAt': generate_timestamp()
    } for _ in range(num_records)]

# Main function to generate data
if __name__ == '__main__':
    num_records = int(input('Enter the number of records to generate: '))

    write_to_csv('airports.csv', generate_airport(1)[0].keys(), generate_airport(num_records))
    write_to_csv('airlines.csv', generate_airline(1)[0].keys(), generate_airline(num_records))
    write_to_csv('aircrafts.csv', generate_aircraft(1)[0].keys(), generate_aircraft(num_records))
    write_to_csv('passengers.csv', generate_passenger(1)[0].keys(), generate_passenger(num_records))
    write_to_csv('crew_members.csv', generate_crew_member(1)[0].keys(), generate_crew_member(num_records))


In [1]:
from faker import Faker
import random
import time
import csv
import pandas as pd
import string 



In [2]:
faker = Faker()
# Load data from CSV
def load_csv(filename):
    return pd.read_csv(filename)
# Generate Unix timestamp
def generate_timestamp():
    return int(time.time())
# Write data to CSV
def write_to_csv(filename, fieldnames, data):
    with open(filename, mode='a', newline='', encoding='utf-8') as file:
        writer = csv.DictWriter(file, fieldnames=fieldnames)
        if file.tell() == 0:
            writer.writeheader()
        writer.writerows(data)
# Function to generate nullable values
def nullable_value(generator, null_probability=0.2):
    if random.random() < null_probability:
        return None
    return generator()
# Unique value generator
def unique_value(existing_set, generator):
    value = generator()
    while value in existing_set:
        value = generator()
    existing_set.add(value)
    return value

In [3]:
unique_iata = set()
unique_icao = set()
unique_airport_id = set()
unique_airline_id = set()

In [4]:

# Generate AirportID 
def generate_airport_id():
    # Tạo số nguyên từ 1 đến 99,999,999 và định dạng thành chuỗi 8 chữ số
    return f"{random.randint(1, 99999999):08d}"

# Generate Airport data
def generate_airport(num_records):
    return [{
        'AirportID': unique_value(unique_airport_id, generate_airport_id),
        'IATACode': unique_value(unique_iata, lambda: faker.bothify(text='???').upper()),
        'ICAOCode': nullable_value(lambda: unique_value(unique_icao, lambda: faker.bothify(text='????').upper())),
        'Name': faker.company() + ' Airport',
        'City': nullable_value(lambda: faker.city()),
        'Country': nullable_value(lambda: faker.country()),
        'TimeZone': nullable_value(lambda: faker.timezone()),
        'CreatedAt': nullable_value(lambda: generate_timestamp()),
        'UpdatedAt': nullable_value(lambda: generate_timestamp())
    } for _ in range(num_records)]
num_records = int(input('Enter the number of records to generate: '))
# Generate and write data
write_to_csv('airports.csv', generate_airport(1)[0].keys(), generate_airport(num_records))

In [None]:
# Generate Airlines
def generate_airline_id():
    # Tạo ID từ chữ hoa và số, độ dài 8 ký tự
    return ''.join(random.choices(string.ascii_uppercase + string.digits, k=8))
def generate_airline(num_records):
    return [{
        'AirlineID': unique_value(unique_airline_id, generate_airline_id),
        'IATACode': unique_value(unique_iata, lambda: faker.bothify(text='??').upper()),
        'ICAOCode': nullable_value(lambda: unique_value(unique_icao, lambda: faker.bothify(text='???').upper())),
        'Name': faker.company() + ' Airlines',
        'Country': nullable_value(lambda: faker.country()),
        'CreatedAt': nullable_value(lambda: generate_timestamp()),
        'UpdatedAt': nullable_value(lambda: generate_timestamp())
    } for _ in range(num_records)]
num_records = int(input('Enter the number of records to generate: '))
# Generate and write data
write_to_csv('airlines.csv', generate_airline(1)[0].keys(), generate_airline(num_records))

In [None]:
# Aircraft Table
def generate_aircraft(num_records):
    return [{
        'Aircraft '
        'RegistrationNumber': unique_value(unique_registration, lambda: faker.bothify(text='??-####').upper()),
        'ICAOTypeCode': random.choice(['A321', 'B789', 'B737', 'A380']),
        'Model': random.choice(['Airbus A321', 'Boeing 787', 'Boeing 737', 'Airbus A380']),
        'Manufacturer': random.choice(['Airbus', 'Boeing']),
        'Capacity': random.randint(100, 400),
        'Status': random.choice(['Active', 'Maintenance']),
        'OwnerAirlineID': None, # Placeholder for FK to Airline
        'DefaultConfigurationCode': faker.bothify(text='CFG####').upper(),
        'CreatedAt': generate_timestamp(),
        'UpdatedAt': generate_timestamp()
    } for _ in range(num_records)]

# Passenger Table