In [None]:
from fast_flights import FlightData, Passengers, create_filter

def generate_flight_url(
        departure_date: str,
        from_airport: str,
        to_airport: str,
        trip_type: str="one-way",
        seat_type: str="economy",
        adults: int=1,
        children: int=0,
        infants_in_seat: int=0,
        infants_on_lap: int=0,
        currency: str="EUR"
) -> str:
    """ 
    Generate Flights URL form parameters.

    Parameters:
    - departure_date: str,date of departure, formated as YYYY-DD-MM.
    - from_airport: str, IATA code of the departure airport (e.g., "CDG".)
    - to_airport: str, IATA code of the arrival airport (e.g., "JFK")
    - trip_type: str, type of trip, either "one-way" or "round-trip" (default: "one-way").
    - seat_type: str, class of service, e.g., "economy", "business" (default: "economy").
    - adults: int, number of adult passengers (default: 1).
    - children: int, number of child passengers (default: 0).
    - infants in seat: int, number of infants sitting on laps ( default: 0).
    - currency: str, currency for pricing (default: "EUR").

    Returns:
    - str, a URL string with the flight search parameters.
    """
        
    filter = create_filter(
        flight_data=[
            FlightData(
                date=departure_date,
                from_airport=from_airport,
                to_airport=to_airport,
            )
        ],
        trip=trip_type,
        seat=seat_type,
        passengers=Passengers(
                adults=adults,
                children=children,
                infants_in_seat=infants_in_seat,
                infants_on_lap=infants_on_lap,
        ),
    )

    b64=filter.as_b64().decode('utf-8')
    return f"https://www.google.com/travel/flights?tfs={b64}&curr={currency}"

#Exemple
def main():
    url= generate_flight_url(
        departure_date="2025-28-03",
        from_airport="CDG",
        to_airport="KIX",
        trip_type="one-way",
        seat_type="economy",
        adults=2,
        children=1,
    )
    print(url)

if __name__=="__main__":
    main()

https://www.google.com/travel/flights?tfs=GhoSCjIwMjUtMjgtMDNqBRIDQ0RHcgUSA0tJWEIDAQECSAGYAQI=&curr=EUR


In [41]:
import requests


url = "https://api.brightdata.com/request"

payload = {
    "zone": "serp_api_flights_test",
    "url": "https://www.google.com/travel/flights?tfs=GhoSCjIwMjUtMjgtMDNqBRIDQ0RHcgUSA0tJWEIDAQECSAGYAQI=&curr=EUR",
    "format": "raw",
    "method": "GET",
}
headers = {
    "Authorization": "Bearer 036fd02e-73d1-4e27-802c-11f975c0b8a4",
    "Content-Type": "application/json"
}

response = requests.request("POST", url, json=payload, headers=headers)

print(response.text)

<!doctype html><html lang="en" dir="ltr"><head><base href="https://www.google.com/"><link rel="preconnect" href="//www.gstatic.com"><meta name="referrer" content="strict-origin-when-cross-origin"><meta name="viewport" content="width=device-width,initial-scale=1.0,minimum-scale=1.0,maximum-scale=1.0,user-scalable=no"><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta name="color-scheme" content="light dark"><meta name="google-site-verification" content="sxp7zFOFUzk09RdlFhuH2SoCn5nOkXgomiLeLIQ48p0"><meta name="google-site-verification" content="uceYfkdbu7tdKGywiwSr1p0cIbqdYOwMDkNq5jVFwMA"><meta name="google-site-verification" content="O5G3B9VUIil1GEVlPw3BfdeYn_kZeNd_6rsDolHah5w"><meta name="google-site-verification" content="hU5-JhTB7DyiEACObYa4GcZxXOTY5FykMqegq9lCAqA"><meta name="application-name" content="Google Flights"><meta name="apple-mobile-web-app-title" content="Google Flights"><meta name="apple-mobile-web-app-status-bar-style" content="black"><meta name="m

In [52]:
from bs4 import BeautifulSoup
soup = BeautifulSoup(response.text, "html.parser")
list(set([tag.attrs['aria-label'] for tag in soup.select('li > div > div') if 'aria-label' in tag.attrs]))

["From 1110 euros.This price does not include overhead bin access. 3 stops flight with easyJet, Hainan and Spring. Leaves Paris Charles de Gaulle Airport at 4:55\u202fPM on Sunday, March 9 and arrives at Kansai International Airport at 4:00\u202fPM on Tuesday, March 11. Total duration 39 hr 5 min. Layover (1 of 3) is a 16 hr overnight layover at Budapest Ferenc Liszt International Airport in Budapest. Layover (2 of 3) is a 2 hr 20 min layover at Shenzhen Bao'an International Airport in Shenzhen. Layover (3 of 3) is a 2 hr 50 min layover at Shanghai Pudong International Airport in Shanghai. Select flight",
 'From 4307 euros. Nonstop flight with Air France. Leaves Paris Charles de Gaulle Airport at 1:45\u202fPM on Sunday, March 9 and arrives at Kansai International Airport at 10:35\u202fAM on Monday, March 10. Total duration 12 hr 50 min.  Select flight',
 'From 1441 euros.This price does not include overhead bin access. 2 stops flight with easyJet and Air China. Leaves Paris Charles de 

In [53]:
import re
import json

def extract_flight_info(flight_texts):
    flight_list = []

    for text in flight_texts:
        flight_info = {}

        # Extract price
        price_match = re.search(r'From (\d+) euros', text)
        flight_info['price'] = int(price_match.group(1)) if price_match else None

        # Extract airlines
        airline_match = re.findall(r'flight with ([\w\s&\-]+)', text)
        flight_info['airlines'] = airline_match[0].split(" and ") if airline_match else []

        # Extract departure time
        departure_match = re.search(r'Leaves [\w\s]+ at (\d{1,2}:\d{2}\s*[APM]{2}?)', text)
        flight_info['departure_time'] = departure_match.group(1) if departure_match else None

        # Extract arrival time
        arrival_match = re.search(r'arrives at [\w\s]+ at (\d{1,2}:\d{2}\s*[APM]{2}?)', text)
        flight_info['arrival_time'] = arrival_match.group(1) if arrival_match else None

        # Extract flight duration
        duration_match = re.search(r'Total duration (\d+\s*hr\s*\d*\s*min)', text)
        flight_info['flight_duration'] = duration_match.group(1) if duration_match else None

        # Extract layovers
        layover_match = re.search(r'(\d+) stop', text)
        flight_info['layovers'] = int(layover_match.group(1)) if layover_match else 0

        # Extract layover details
        layover_time_match = re.findall(r'Layover \(\d+ of \d+\) is a (\d+\s*hr\s*\d*\s*min) layover', text)
        layover_airport_match = re.findall(r'layover at ([\w\s]+) in ([\w\s]+)\.', text)

        layovers_list = []
        for i in range(len(layover_time_match)):
            layovers_list.append({
                "layover_time": layover_time_match[i],
                "layover_airport": f"{layover_airport_match[i][0]} ({layover_airport_match[i][1]})" if i < len(layover_airport_match) else None
            })

        flight_info['layover_details'] = layovers_list

        flight_list.append(flight_info)

    return flight_list

# Extract and print flight data
flight_texts = get_flight_texts(response)
flight_data = extract_flight_info(flight_texts)

print(json.dumps(flight_data, indent=4))


[
    {
        "price": 1110,
        "airlines": [
            "easyJet"
        ],
        "departure_time": "4:55\u202fPM",
        "arrival_time": "4:00\u202fPM",
        "flight_duration": "39 hr 5 min",
        "layovers": 3,
        "layover_details": [
            {
                "layover_time": "2 hr 20 min",
                "layover_airport": "Budapest Ferenc Liszt International Airport (Budapest)"
            },
            {
                "layover_time": "2 hr 50 min",
                "layover_airport": "Shanghai Pudong International Airport (Shanghai)"
            }
        ]
    },
    {
        "price": 4307,
        "airlines": [
            "Air France"
        ],
        "departure_time": "1:45\u202fPM",
        "arrival_time": "10:35\u202fAM",
        "flight_duration": "12 hr 50 min",
        "layovers": 0,
        "layover_details": []
    },
    {
        "price": 1441,
        "airlines": [
            "easyJet",
            "Air China"
        ],
        "d

In [1]:
import requests
import re
import json
from bs4 import BeautifulSoup

def fetch_flight_data(
    api_url: str,
    api_key: str,
    flight_url: str,
    zone: str,
  ) -> requests.Response:

    """
  Fetches flight data from the specified API URL.

  Parameters:
  - api_url: str, SERP API Url of BrightData.
  - api_key: str, The API Key BrightData generate in your account.
  - flight_url: str, URL containing flight search parameters.
  - zone: str, Name of your API zone configuration project on BrightData.

  Returns:
  - HTML code response for of the flight_url. 

  """
    payload = {
        "zone": zone,
        "url": flight_url,
        "format": "raw",
        "method": "GET",
    }
    headers = {
        "Authorization": f"Bearer {api_key}",
        "Content-Type": "application/json"
    }
    return requests.post(api_url, json=payload, headers=headers)

def get_flight_texts(response):

    soup = BeautifulSoup(response.text, "html.parser")
    return list(set([tag.attrs['aria-label'] for tag in soup.select('li > div > div') if 'aria-label' in tag.attrs]))


"""
    Extract flight information texts from the HTML response.
    
    Parameters:
    - response: The API response containing flight data.
    
    Returns:
    - List of flight descriptions extracted from HTML.
    """
  
def extract_currency(flight_url):
    
    match = re.search(r"curr=([A-Z]+)", flight_url)
    return match.group(1) if match else "UNKNOWN"

    """
    Extract currency from the flight URL.

    Parameters:
    - flight_url: The URL containing the flight search parameters.

    Returns:
    - The detected currency (default: "UNKNOWN").
    """

def extract_flight_info(flight_texts, currency):

    flight_list = []
    
    for text in flight_texts:
        flight_info = {}

        # Extract price
        price_match = re.search(r'From (\d+) euros', text)
        flight_info['price'] = f"{price_match.group(1)} {currency}" if price_match else None

        # Extract airlines
        airline_match = re.findall(r'flight with ([\w\s&\-]+)', text)
        flight_info['airlines'] = airline_match[0].split(" and ") if airline_match else []

        # Extract departure time
        departure_match = re.search(r'Leaves [\w\s]+ at (\d{1,2}:\d{2}\s*[APM]{2}?)', text)
        flight_info['departure_time'] = departure_match.group(1).replace("\u202f", " ").strip() if departure_match else None

        # Extract arrival time
        arrival_match = re.search(r'arrives at [\w\s]+ at (\d{1,2}:\d{2}\s*[APM]{2}?)', text)
        flight_info['arrival_time'] = arrival_match.group(1).replace("\u202f", " ").strip() if arrival_match else None

        # Extract flight duration
        duration_match = re.search(r'Total duration (\d+\s*hr\s*\d*\s*min)', text)
        flight_info['flight_duration'] = duration_match.group(1) if duration_match else None

        # Extract layovers
        layover_match = re.search(r'(\d+) stop', text)
        flight_info['layovers'] = int(layover_match.group(1)) if layover_match else 0

        # Extract layover details
        layover_time_match = re.findall(r'Layover \(\d+ of \d+\) is a (\d+\s*hr\s*\d*\s*min) layover', text)
        layover_airport_match = re.findall(r'layover at ([\w\s]+) in ([\w\s]+)\.', text)

        layovers_list = [
            {"layover_time": layover_time_match[i], "layover_airport": f"{layover_airport_match[i][0]} ({layover_airport_match[i][1]})"}
            for i in range(len(layover_time_match))
            if i < len(layover_airport_match)
        ]

        flight_info['layover_details'] = layovers_list
        flight_list.append(flight_info)

    return flight_list

    """
    Extract structured flight information from raw text data.
    
    Parameters:
    - flight_texts: List of raw text descriptions of flights.
    
    Returns:
    - List of dictionaries containing structured flight details.
    """

 # Exemple

def main():
    api_url = "https://api.brightdata.com/request"
    api_key = "036fd02e-73d1-4e27-802c-11f975c0b8a4"  # Replace with your actual API key
    flight_url = "https://www.google.com/travel/flights?tfs=GhoSCjIwMjUtMjgtMDNqBRIDQ0RHcgUSA0tJWEIDAQECSAGYAQI=&curr=EUR" # Replace with your actual flight_url
    zone = "serp_api_flights_test"  # Replace with the actual zone generate from BrightData
    
    response = fetch_flight_data(api_url, api_key, flight_url, zone)
    if response.status_code == 200:
        flight_texts = get_flight_texts(response)
        currency = extract_currency(flight_url)
        flight_data = extract_flight_info(flight_texts, currency)
        print(json.dumps(flight_data, indent=4)) # Convert dictionaries to json.
    else:
        print(f"Failed to fetch data: {response.status_code}")

if __name__ == "__main__":
    main()


[
    {
        "price": "1472 EUR",
        "airlines": [
            "China Eastern"
        ],
        "departure_time": "12:25 PM",
        "arrival_time": "12:10 PM",
        "flight_duration": "15 hr 45 min",
        "layovers": 1,
        "layover_details": [
            {
                "layover_time": "1 hr 45 min",
                "layover_airport": "Shanghai Pudong International Airport (Shanghai)"
            }
        ]
    },
    {
        "price": "1017 EUR",
        "airlines": [
            "Hainan"
        ],
        "departure_time": "12:50 PM",
        "arrival_time": "12:00 PM",
        "flight_duration": "39 hr 10 min",
        "layovers": 2,
        "layover_details": []
    },
    {
        "price": "4081 EUR",
        "airlines": [
            "Air France",
            "KLM"
        ],
        "departure_time": "11:40 AM",
        "arrival_time": "11:20 AM",
        "flight_duration": "15 hr 40 min",
        "layovers": 1,
        "layover_details": [
        