In [None]:
# Mechanism to mock test submits in MongoDB with random dates.
# the term `test_sic_collection` refers to collection track search and has been refactored to sic collection.
from bson import ObjectId
from config.database import db_UJ        # UJ mongoDB
from datetime import datetime, timedelta
from pymongo import UpdateOne
import random

class MockTestSubmits:
    def __init__(self):
        self.test_sic_collection = db_UJ['test_st']

    def random_date_gen(self, simplify=False, total_dates=5):
        def df(dt):
            return dt.strftime("%m-%d")

        now = datetime.now()  # Get current date and time
        dates = []

        # Generate dates for the last 3 days (today and the two previous days)
        for i in range(3):
            date = now - timedelta(days=i)
            dates.append(date)

        # Randomly select 5 items from these dates (with replacement)
        random_items = random.choices(dates, k=random.randint(1, total_dates))
        # If simplify is True, format the dates
        if simplify:
            # Format the dates using your df function
            formatted_items = [df(item) for item in random_items]
            return formatted_items    
        else:
            return random_items
        # Format the dates using your df function
        # formatted_items = [df(item) for item in random_items]
        # return formatted_items

    def update_operations(self,mdb_set):
        test_sic_collection = db_UJ['test_st']   # create/get a collection
        doc_ids = list(test_sic_collection.find({},{"_id":1}))       # Get limited test document IDs
        update_operation = []
        for i in doc_ids:
            juice = {'submits':self.random_date_gen(simplify=False, total_dates=15)}
            # Use this to remove all the submits fields
            # Use this to simulate setting the new submits field with mock timestamps
            set_ops = {'$set': juice} if mdb_set else {'$unset': {'submits': ''}}
            update_operation.append(
                UpdateOne(
                {"_id": ObjectId(i['_id'])},
                set_ops
            ))
        return update_operation

    def update_submits(self, mdb_set):
        pass
        # self.test_sic_collection.bulk_write(self.update_operations(mdb_set=mdb_set))

    def check_transformed_submits(self):
        original = list(self.test_sic_collection.find({'submits': {'$exists': True}},{"_id":0,"ph":0,"r_id":0}))
        transformed = [{v1: v2} for d in original for v1, v2 in zip(d.values(), list(d.values())[1:])]
        return transformed

mts = MockTestSubmits()
# mts.update_operations(mdb_set=False)
# mts.update_submits(mdb_set=False)

In [None]:
# Permutations and combinations -- Simulating all combinations of searches on frontend -> backend
import itertools
import string

# Generate all combinations of length 2. numbers are not considered.
test_items = 5
alphabets = string.ascii_lowercase[:test_items]
combinations = [''.join(c) for c in itertools.combinations(alphabets, 2)]
permutations = [''.join(p) for p in itertools.permutations(alphabets, 2)]

# Combinations would miss some items for example dl is once considered but ld wont be considered
    # in permutations however, dl and ld both is considered.
    # Another example would be aa  -- `aa` is considered in permutations but is not considered in combinations
    # So use permutations instead of combinations.
print('combs',combinations)
print('permuts',permutations)
print(f"Total combinations: {len(combinations)}", f"Total permut: {len(permutations)}")

In [None]:
from config.database import collection_flights
import matplotlib.pyplot as plt


# UA414 414
# GJS4433, UA4433, 4433, g74433, g7 4433, ual4433
# skw6002, oo6002, ua6002, ual6002, 
#   - Shows as SKW6002 (OO6002) - OO/UA    OR   SKW6002 (UA6002) - OO/UA

# Same delta, america, republic, jetblue, frontier, etc.

# TODO test: maybe use latest date flight and see if it will return data correctly for all. using JMS.


In [None]:
from config.database import collection_airports_cache_legacy, collection_weather_cache_legacy, airport_bulk_collection_uj, db_UJ, collection_searchTrackUsers, collection_flights
search_index_collection = db_UJ['search_index']


# start fresh reset test collection replace from scratch
# db_UJ['search_index_test'].delete_many({})
# search_index_collection.aggregate([{'$out': 'search_index_legacy_backup'}])

new_airport_cache_collection = db_UJ['airport-cache-test']

search_index_collection_test = db_UJ['search_index_test']
ctrs = db_UJ['test_rst']   # create/get a collection

suggestions_cache_collection = db_UJ['suggestions-cache']
suggestions_test_collection = db_UJ['suggestions-cache-test-refills']


In [None]:
suggestions_test_collection.update_many(
    {'type': 'airport'},                        # find_crit
    {'$rename': {'metadata.ICAO': 'ICAOAirportCode'}}         # perform_crit
)


In [23]:
collection_flights.find_one({'flightID':'AAL611'})

{'_id': ObjectId('68418a739bf38573108041a7'),
 'flightID': 'AAL611',
 'versions': [{'_id': ObjectId('6925c8ae2f36c7d9de5416e8'),
   'timestamp': datetime.datetime(2025, 11, 25, 15, 15, 32, 162000),
   'organization': 'AAL',
   'aircraftType': 'A321',
   'registration': 'N161AA',
   'departure': 'KCLT',
   'arrival': 'MBPV',
   'estimatedDepartureTime': '2025-11-25T16:30:00Z',
   'arrivalAlternate': 'MDPP',
   'route': [{'timestamp': '2025-11-25T15:15:32.162Z',
     'value': 'KCLT.KWEEN5.PITRW.Y436.OGGRE..NUCAR.L463.BTLER..MBPV/0223'},
    {'timestamp': '2025-11-25T15:15:32.509765+00:00',
     'value': 'KCLT./.OGGRE..NUCAR.L463.BTLER..MBPV/0223'}],
   'requestedAltitude': '33000.0',
   'version_created_at': datetime.datetime(2025, 11, 25, 15, 18, 6, 910000)},
  {'_id': ObjectId('6925e3772f36c7d9de544bfa'),
   'timestamp': datetime.datetime(2025, 11, 25, 17, 10, 44, 28000),
   'organization': 'AAL',
   'aircraftType': 'A321',
   'registration': 'N161AA',
   'departure': 'KCLT',
   'arriv

In [19]:
from core.api.source_links_and_api import Source_links_and_api


Source_links_and_api().flight_stats_url('AA611')

'https://www.flightstats.com/v2/flight-tracker/UA/AA611?year=2025&month=11&date=30'

In [None]:
from core.search.search_interface import SearchInterface


SearchInterface().raw_submit_handler(search='ua482')

In [20]:
from core.search.search_interface import ExhaustionCriteria


search = '4382'
parsed_query = SearchInterface().parse_query(query=search)
exhaust = ExhaustionCriteria()

parsed_query
query_type = parsed_query.get('type')
if query_type in ['flight', 'digits', 'nNumber']:
    flight_category = parsed_query.get('value')
flight_category
# flight_category = parsed_query.get('value')
# exhaust.extended_flight_suggestions_formatting(flight_category)

'4382'

In [22]:
await exhaust.faa_airport_info_fetch_w_weather('MBPV')

{'airportName': 'PROVIDENCIALES/PROVIDENCIALES INTL',
 'ICAO': 'MBPV',
 'IATA': 'PLS',
 'regionName': '-',
 'countryCode': 'TC',
 'weather': {'datis': {'combined': 'N/A', 'arr': None, 'dep': None},
  'metar': '',
  'taf': 'TAF MBPV 300605Z 3006/0106 12012KT 9999 VCSH FEW018CB SCT022 BKN045\n'}}

In [None]:
# return all flightIDs with versions -all active flights from past 15 days.
find_crit = {"versions.version_created_at": {"$exists": True}}
return_crit = {'flightID':1,'_id':0}
flightIDs = list(collection_flights.find(find_crit,return_crit))

In [None]:
from core.search.query_classifier import QueryClassifier
from services.flight_aggregator_service import aws_jms_service


qc = QueryClassifier()
# qc.parse_flight_query('GJS4433')
await aws_jms_service('GJS4182')

In [None]:
import re

from core.api.source_links_and_api import Source_links_and_api
test = flightIDs
# Regular expression to match ICAO airline code (3 letters) and digits
pattern = re.compile(r'([A-Z]{3})(.*)')

popular_ICAO_airline_codes = Source_links_and_api().popular_ICAO_airline_codes()
# Dictionary to store grouped flight IDs
grouped_flight_ids = {}

for flightID in test:
    flightID = flightID['flightID']
    if flightID[:3] == 'JIA':
    # if flightID[:3] in popular_ICAO_airline_codes:
        flightID = flightID if flightID[:3].isalpha() and flightID[3:].isdigit() else None
        if flightID:
            match = pattern.match(flightID['flightID'])
        if match:
            airline_code, digits = match.groups()
            if airline_code not in grouped_flight_ids:
                grouped_flight_ids[airline_code] = []
            grouped_flight_ids[airline_code].append(int(digits))



In [None]:
y = list(grouped_flight_ids.values())[0]
# plt.hist(y, bins=10)
y = sorted(y)[:-20]
ranges = range(len(y))
# len(y)
plt.scatter(ranges,y, s=1)
# plt.plot(ranges,y)

# user types UA, DL, AA and reuturns are from flights
# collect those digits that match flightID from flightCollection's ones that have versions specifically RPA, SKW, for all, and for UA- GJS, UCA, SKY, RPA.

# for eg ua844 or ua3341 - find the digits 844 or 3341 in flightID from flightCollection's ones that have versions specifically RPA, SKW, for all, and for UA- GJS, UCA, SKY, RPA.








In [None]:
fn = '3341'
pipeline = [
    {"$sort": {"versions.version_created_at": -1}},
    # {"$limit": 1},
    {"$project": {
        "flightID": 1,
        "latest_version_time": {"$arrayElemAt": ["$versions.version_created_at", -1]}
    }}
]

result = list(collection_flights.aggregate(pipeline))
# collection_flights.find_one({'flightID': {'$regex': f'^{fn}$'}},{'versions': {'$exists': 'true'}})

In [None]:
with_version_count = collection_flights.count_documents({"versions.version_created_at": {"$exists": True}})
without_version_count = collection_flights.count_documents({"versions.version_created_at": {"$exists": False}})
with_version_count, without_version_count

In [None]:
for airline_code, digits in grouped_flight_ids.items():
    plt.figure()
    plt.hist(digits, bins=10)
    plt.title(f'Digit distribution for {airline_code}')
    plt.xlabel('Digits')
    plt.ylabel('Frequency')
    plt.show()

In [None]:
# Create a bar plot
plt.figure(figsize=(10, 6))
for flightID, (airline_code, digits) in enumerate(grouped_flight_ids.items()):
    plt.bar(flightID, sum(digits)/len(digits), label=airline_code)

plt.xticks(range(len(grouped_flight_ids)), grouped_flight_ids.keys())
plt.xlabel('ICAO Code')
plt.ylabel('Average Flight Number')
plt.title('Average Flight Numbers by ICAO Code')
plt.legend()
plt.show()

In [None]:
from config.database import db_UJ        # UJ mongoDB
# Use search interface to find sic items that match query given search index collection docs as 'sic'
from routes.root.search.search_interface import SearchInterface
from routes.root.search.fuzz_find import fuzz_find
from routes.root.search.query_classifier import QueryClassifier

search_index_collection = db_UJ['search_index']

count_crit = {'ph':{"$exists":True}}       # return ones with popularity hits
# return_crit = {'ph':0, 'submits':0}       # return only...
return_crit = {'submits':0}       # return only...
sic = list(search_index_collection.find(count_crit,return_crit).sort('ph',-1))     # Reverse sort


In [None]:
qc = QueryClassifier(icao_file_path="unique_icao.pkl")
limit = 500
query='denv'
ff = SearchInterface()
formatted_suggestions = ff.search_suggestion_frontned_format(c_docs=sic)
sti_items_match_w_query = fuzz_find(query=query, data=formatted_suggestions, qc=qc, limit=limit)



In [None]:
from routes.route import get_search_suggestions


query='den'
x = await get_search_suggestions(email='Anonymous',query=query)
# x