In [2]:
# Newark Arrivals scrape proof of concept - This is isolated and not needed for core edct functionality.

from routes.root.api.newark_departures import Newark_departures_scrape
nds = Newark_departures_scrape()
# soups = nds.soup_scrape_UA_arrivals()

# r=nds.extract_flight_id_and_link(soups=soups)


In [1]:
# Part 1: Proof of concept for EDCT scrape using just requests library instead of heavy selenium.

import requests
from bs4 import BeautifulSoup

class EDCT_LookUp:
    def __init__(self):
        # URL of the EDCT lookup page
        self.url = "https://www.fly.faa.gov/edct/jsp/showEDCT.jsp"
        
    def extract_edct(self, call_sign: str, origin: str, destination: str):
        # Form data
        data = {
            "callsign": call_sign.upper(),  # Convert to uppercase to match form behavior
            "dept": origin.upper(),
            "arr": destination.upper(),
        }

        # Send POST request
        response = requests.post(self.url, data=data)
        edct_collective = []

        if response.status_code == 200:
            soup = BeautifulSoup(response.text, 'html.parser')
            
            # Find the results table (same as Selenium's border=1 table)
            table = soup.find('table', {'border': '1'})
            
            if table:
                # Process all rows except header (same as rows[1:] in Selenium)
                rows = table.find_all('tr')[1:]
                
                for row in rows:
                    cols = row.find_all('td')
                    if len(cols) >= 4:  # Ensure we have enough columns
                        edct = cols[0].get_text(strip=True)
                        filed_departure_time = cols[1].get_text(strip=True)
                        control_element = cols[2].get_text(strip=True)
                        flight_cancelled = cols[3].get_text(strip=True)
                        
                        if edct != '--':
                            edct_collective.append({
                                "filedDepartureTime": filed_departure_time,
                                "edct": edct,
                                "controlElement": control_element,
                                "flightCancelled": flight_cancelled
                            })
            
            return edct_collective
        else:
            print(f"Request failed with status {response.status_code}")
            return None

# Example usage:
# edct = EDCT_LookUp()
# results = edct.extract_edct("GJS4384", "ILM", "EWR")
# print(results)

In [9]:
# Part 2: Proof of concept - Mongo returns flights for particular arrival given datetime range - Mind the datetime format.
# This is supposed to leaad to the EDCT scrape proof of concept.
from config.database import collection_flights
def find_matching_flights(db_collection, flight_number_list: list=None, arrival_airport='KEWR',
                          start_time='2025-08-09T13:00:00Z', end_time='2025-08-09T23:00:00Z'):
    
    # This was used to pick out a list of flight numbers from the db.
    # flight_number_regex = '^|'.join(flight_number_list)

    pipeline = [
        # Match flights going to KEWR within our time window
        {
            '$match': {
                # 'flightID': {'$regex': flight_number_regex},
                'versions.arrival': arrival_airport,
                'versions.estimatedDepartureTime': {
                    '$gte': start_time,
                    '$lt': end_time
                }
            }
        },
        # Unwind the versions array to process each version
        {'$unwind': '$versions'},
        # Filter versions to only KEWR arrivals in our time window
        {
            '$match': {
                'versions.arrival': arrival_airport,
                'versions.estimatedDepartureTime': {
                    '$gte': start_time,
                    '$lt': end_time
                }
            }
        },
        # Group by flightID and take the first version (or sort first if you want most recent)
        {
            '$group': {
                '_id': '$flightID',
                'registration': {'$first': '$versions.registration'},
                'departure': {'$first': '$versions.departure'},
                'arrival': {'$first': '$versions.arrival'},
                'estimatedDepartureTime': {'$first': '$versions.estimatedDepartureTime'}
            }
        },
        # Project to clean up the output format
        {
            '$project': {
                '_id': 0,
                'flightID': '$_id',
                'registration': 1,
                'departure': 1,
                'arrival': 1,
                'estimatedDepartureTime': 1
            }
        }
    ]
    
    return list(db_collection.aggregate(pipeline))

flight_numbers = None
# matching_flights = find_matching_flights(collection_flights, flight_numbers)
matching_flights = find_matching_flights(collection_flights, flight_numbers, arrival_airport='KPHL',start_time='2025-08-12T13:00:00Z', end_time='2025-08-12T23:00:00Z')
len(matching_flights), matching_flights


(235,
 [{'registration': 'N663FR',
   'departure': 'KRSW',
   'arrival': 'KPHL',
   'estimatedDepartureTime': '2025-08-12T16:56:00Z',
   'flightID': 'FFT2026'},
  {'registration': 'N775XF',
   'departure': 'KBOS',
   'arrival': 'KPHL',
   'estimatedDepartureTime': '2025-08-12T22:06:00Z',
   'flightID': 'AAL1672'},
  {'registration': 'N37EC',
   'departure': 'KCGF',
   'arrival': 'KPHL',
   'estimatedDepartureTime': '2025-08-12T14:00:00Z',
   'flightID': 'LXJ37'},
  {'registration': 'N636AE',
   'departure': 'KAVL',
   'arrival': 'KPHL',
   'estimatedDepartureTime': '2025-08-12T14:50:00Z',
   'flightID': 'PDT5817'},
  {'registration': 'N86534',
   'departure': 'KORD',
   'arrival': 'KPHL',
   'estimatedDepartureTime': '2025-08-12T19:20:00Z',
   'flightID': 'UAL1617'},
  {'registration': 'N804AN',
   'departure': 'KMIA',
   'arrival': 'KPHL',
   'estimatedDepartureTime': '2025-08-12T15:59:00Z',
   'flightID': 'AAL1872'},
  {'registration': None,
   'departure': 'KFSW',
   'arrival': 'KPH

In [10]:
# Part 3: Proof of concept for EDCT scrape in bulk given mongo db collection returns in a particular format.

edct = EDCT_LookUp()
flightID_data= {}
registration_data = {}
unsuccessful_flights = {}

for i in matching_flights:
    # print(i)
    # call_sign = i['registration'] if i['registration'] else i['flightID']
    call_sign = i.get('flightID')
    origin = i['departure'][1:]
    destination = i['arrival'][1:]
    results = edct.extract_edct(call_sign=call_sign, origin=origin, destination=destination)
    if results:
        flightID_data[i['flightID']] = {
            'registration': i.get('registration'),
            'origin': origin,
            'destination': destination,
            'estimatedDepartureTime': i['estimatedDepartureTime'],
            'results': results
        }
    else:
        # Trying when flightID shows none.
        registration = i.get('registration')
        if registration:
            call_sign = registration
            results = edct.extract_edct(call_sign=call_sign, origin=origin, destination=destination)
            if results:
                registration_data[i.get('registration')] = {
                    'origin': origin,
                    'destination': destination,
                    'estimatedDepartureTime': i['estimatedDepartureTime'],
                    'results': results
                }
            else:
                unsuccessful_flights[i['flightID']] = {
                    'registration': i.get('registration'),
                    'origin': origin,
                    'destination': destination,
                    'estimatedDepartureTime': i['estimatedDepartureTime'],
                    'results': results
                }

In [None]:
# TODO test: Schedule tests duing ground stop using this script to get arrival airport and its flight numebrs
        # Use it to fetch edct data and asert using test on the frontend.
flightID_data
# registration_data
# unsuccessful_flights


In [None]:
list(collection_flights.find({"flightID": "AAL1451"}))

In [7]:
edct = EDCT_LookUp()
results = edct.extract_edct(call_sign="GJS4509", origin="ORF", destination="EWR")
results


[{'filedDepartureTime': '08/15/2025 21:27',
  'edct': '08/16/2025 00:40',
  'controlElement': 'EWR',
  'flightCancelled': 'No'},
 {'filedDepartureTime': '08/17/2025 21:27',
  'edct': '08/17/2025 22:46',
  'controlElement': 'EWR',
  'flightCancelled': 'No'}]