In [1]:
import h2o
import zipfile
import os
import sys
from pyspark.sql import SparkSession
from IPython.display import display
from pyspark.sql.functions import regexp_extract, col, split, udf, trim, when, from_unixtime, unix_timestamp, minute, hour, datediff, lit
from pyspark.sql import functions as F
from pyspark.sql.types import IntegerType, StringType, BooleanType, ArrayType
import datetime
import argparse
import json
import glob, os, shutil
import pandas as pd
from pandas.io.json import json_normalize
from pyspark import SparkContext

pd.options.display.max_columns = 99

sc = SparkContext()

spark = SparkSession \
        .builder \
        .appName("Data ETL") \
        .config("spark.some.config.option", "some-value") \
        .getOrCreate()
        
display(spark.version)

'2.1.1'

# Load Data

In [4]:
# Version 1.0
flight = spark.read.parquet("/home/ubuntu/s3/comb/flight_v1_0.pq")
display(flight.count())
display(flight.show(2))

txt_exception_folder = '/home/ubuntu/s3/comb/txt_exception/'
print(txt_exception_folder)

# Version 1.1
flightv1_1 = spark.read.json(os.path.join(txt_exception_folder, "flight_15_13_price_2017-05-11*.txt"))
display(flightv1_1.count())
display(flightv1_1.show(1))

2288103

+--------------+----------+---------+----------------+-------+------------+----+-------+------------+-------------+--------------------+-------------+--------------------+--------------------+--------+-----------+-------------+-----+--------------------+-----+------+----------+-----------+---------+----+-------------------+-----------+-----+----+
|from_city_name|start_date|stay_days|      table_name|task_id|to_city_name|trip|version|airline_code|airline_codes|            arr_time|check_bag_inc|             company|            dep_time|duration|flight_code|flight_number|index|               plane|power| price|price_code|search_date|span_days|stop|          stop_info|ticket_left|video|wifi|
+--------------+----------+---------+----------------+-------+------------+----+-------+------------+-------------+--------------------+-------------+--------------------+--------------------+--------+-----------+-------------+-----+--------------------+-----+------+----------+-----------+---------+--

None

/home/ubuntu/s3/comb/txt_exception/


74603

+------------+----------+--------------------+--------------------+--------+-----+----------+--------+------------------+-------+--------------------+--------------------+--------+----+--------------------+-------+
|currencyCode|   depDate|         flight_leg1|         flight_leg2|fromCity|price|searchDate|stayDays|         tableName|task_id|       timeline_leg1|       timeline_leg2|  toCity|trip|                 url|version|
+------------+----------+--------------------+--------------------+--------+-----+----------+--------+------------------+-------+--------------------+--------------------+--------+----+--------------------+-------+
|         AUD|2017-05-18|[[Hangzhou,HGH],2...|[[Bangkok,BKK],20...| Bangkok|401.3|2017-05-11|       7|flight_15_13_price|  16232|[[[Macau, Macau,M...|[[[Macau, Macau,M...|Hangzhou|   2|https://www.exped...|    1.1|
+------------+----------+--------------------+--------------------+--------+-----+----------+--------+------------------+-------+-----------

None

# Modify version 1.0

In [5]:
# for one way trips, display None in stay_days
def correct_stay_days(trip, stay_days):
    if trip == '1':
        return None
    else:
        return int(stay_days)

correct_stay_days_UDF = udf(correct_stay_days, IntegerType())

def correct_tickets_left(noOfTicketsLeft):
    if noOfTicketsLeft == 0:
        return 999
    else:
        return noOfTicketsLeft
    
correct_tickets_left_UDF = udf(correct_tickets_left, IntegerType())


flight2 = (flight.withColumn('stayDays', correct_stay_days_UDF(col('trip'), col('stay_days')))
                 .drop('stay_days')
                 .withColumnRenamed('start_date', 'depDate')                 
                 .selectExpr('*', 'date_add(depDate, stayDays) as retDate')# this is when the return trip starts, might arrive a day later
                 .withColumnRenamed('from_city_name', 'fromCity')
                 .withColumnRenamed('to_city_name', 'toCity')                 
                 .withColumnRenamed('search_date', 'searchDate')                 
                 .withColumnRenamed('company', 'airlineName')                 
                 .withColumnRenamed('dep_time', 'departureTime')                                  
                 .withColumnRenamed('arr_time', 'arrivalTime')                                                   
                 .withColumn('duration_h', split(flight.duration,'h').getItem(0))
                 .withColumn('duration_m', F.substring_index(split(flight.duration,'h').getItem(1), 'm', 1))
#                  .withColumn('duration', F.struct(col('duration_h'), col('duration_m')))
                 .withColumn('duration_m', (col('duration_h')*60 + col('duration_m')))
                 .drop('duration', 'duration_h', 'flight_number')
                 .withColumnRenamed('price_code', 'currencyCode')                                  
                 .withColumnRenamed('stop', 'stops')                                  
                 .withColumn('stop_info', split(col('stop_info'), ';'))
                 .withColumn('noOfTicketsLeft', correct_tickets_left_UDF('ticket_left'))
                .drop('ticket_left')
               .withColumnRenamed('table_name', 'tableName')
                .select('price', 'version', 'searchDate', 'tableName', 'task_id', 'currencyCode', 
                        'fromCity', 'toCity', 'trip', 'depDate', 'retDate',
                        'stayDays', 
                       'departureTime', 'arrivalTime', 
                        'airlineName',  'duration_m', 
                        'flight_code', 'plane', 'stops', 'noOfTicketsLeft',
                       'airline_code', 'airline_codes',
                       'stop_info', 'span_days', 'power', 'video', 'wifi')                
          )
# varaibles added in v1.1: 'departureTime_leg2', 'arrivalTime_leg2', 'airlineName_leg2','duration_m_leg2','stops_leg2'
#  'noOfTicketsLeft_leg2','airline_codes_leg2', 
# 'stop_list', 'url'

# variables dropped in v1.1:
# 'span_days', 'power', 'video', 'wifi', 'stop_info'

display(flight2.where(col('trip') == 1).show(1))
display(flight2.where(col('trip') == 2).show(1, truncate=False))
flight2.printSchema()

# flight2.select('flight_code', 'flight_number').distinct().show(1000)
# flight2.select('stop_info').distinct().show()
# flight2.select('stop_list').distinct().show(100, truncate=False)

+------+-------+----------+----------------+-------+------------+--------+-------+----+----------+-------+--------+--------------------+--------------------+--------------+----------+-----------+----------------+-----+---------------+------------+-------------+--------------------+---------+-----+-----+-----+
| price|version|searchDate|       tableName|task_id|currencyCode|fromCity| toCity|trip|   depDate|retDate|stayDays|       departureTime|         arrivalTime|   airlineName|duration_m|flight_code|           plane|stops|noOfTicketsLeft|airline_code|airline_codes|           stop_info|span_days|power|video| wifi|
+------+-------+----------+----------------+-------+------------+--------+-------+----+----------+-------+--------+--------------------+--------------------+--------------+----------+-----------+----------------+-----+---------------+------------+-------------+--------------------+---------+-----+-----+-----+
|605.72|    1.0|2017-05-01|flight_1_5_price|    676|         AUD|  

None

+-----+-------+----------+----------------+-------+------------+--------+-------+----+----------+----------+--------+-----------------------------+-----------------------------+--------------+----------+-----------+-----------------------------------+-----+---------------+------------+-------------+---------------------+---------+-----+-----+----+
|price|version|searchDate|tableName       |task_id|currencyCode|fromCity|toCity |trip|depDate   |retDate   |stayDays|departureTime                |arrivalTime                  |airlineName   |duration_m|flight_code|plane                              |stops|noOfTicketsLeft|airline_code|airline_codes|stop_info            |span_days|power|video|wifi|
+-----+-------+----------+----------------+-------+------------+--------+-------+----+----------+----------+--------+-----------------------------+-----------------------------+--------------+----------+-----------+-----------------------------------+-----+---------------+------------+-------------+

None

root
 |-- price: double (nullable = true)
 |-- version: string (nullable = true)
 |-- searchDate: string (nullable = true)
 |-- tableName: string (nullable = true)
 |-- task_id: string (nullable = true)
 |-- currencyCode: string (nullable = true)
 |-- fromCity: string (nullable = true)
 |-- toCity: string (nullable = true)
 |-- trip: string (nullable = true)
 |-- depDate: string (nullable = true)
 |-- retDate: date (nullable = true)
 |-- stayDays: integer (nullable = true)
 |-- departureTime: string (nullable = true)
 |-- arrivalTime: string (nullable = true)
 |-- airlineName: string (nullable = true)
 |-- duration_m: double (nullable = true)
 |-- flight_code: string (nullable = true)
 |-- plane: string (nullable = true)
 |-- stops: long (nullable = true)
 |-- noOfTicketsLeft: integer (nullable = true)
 |-- airline_code: string (nullable = true)
 |-- airline_codes: array (nullable = true)
 |    |-- element: string (containsNull = true)
 |-- stop_info: array (nullable = true)
 |    |-- 

# Modify Version 1.1

In [33]:
flightv1_1.select('timeline_leg1').take(1).select

<bound method DataFrame.first of DataFrame[timeline_leg1: array<struct<arrivalAirport:struct<airportCityState:string,city:string,code:string,localName:string,longName:string,name:string>,arrivalTime:struct<date:string,dateLongStr:string,dateTime:bigint,hour:string,isoStr:string,time:string,travelDate:string>,brandedFareName:string,carrier:struct<airlineCode:string,airlineImageFileNameWithoutExtension:string,airlineName:string,bookingCode:string,cabinClass:string,flightNumber:string,plane:string,planeCode:string>,departureAirport:struct<airportCityState:string,city:string,code:string,localName:string,longName:string,name:string>,departureTime:struct<date:string,dateLongStr:string,dateTime:bigint,hour:string,isoStr:string,time:string,travelDate:string>,distance:struct<formattedTotal:string,total:bigint,unit:string>,duration:struct<hours:bigint,minutes:bigint>,layover:boolean,segment:boolean,type:string>>]>

In [46]:
from pyspark.sql.types import ArrayType, StringType
from pyspark.sql.functions import array, lit

# take_all_level1_str = udf(lambda rows, a: [row[a] for row in rows], ArrayType(StringType()))
take_all_level2_str = udf(lambda rows, a, b:  [None if row is None else row[a][b] for row in rows], ArrayType(StringType()))
# take_all = udf(lambda rows, a: [row[a]['city'] for row in rows], ArrayType(StringType()))


flightv1_1.withColumn("city", take_all_level2_str(flightv1_1.timeline_leg1, lit('arrivalAirport'), lit('city')))\
                      .select('airports').show(10)
# flightv1_1.withColumn("airports", take_all_level1_str(flightv1_1.timeline_leg1, 'type')).select('airports').show(10)
# flightv1_1.withColumn("airports", take_all(flightv1_1.timeline_leg1, lit('arrivalAirport'))).select('airports').show(10)

# display(flightv1_1.select('timeline_leg1').show(100, truncate=False))



+--------------------+
|            airports|
+--------------------+
|   [Macau, Hangzhou]|
|   [Macau, Hangzhou]|
|          [Hangzhou]|
|   [Wuhan, Hangzhou]|
|[Shenzhen, Hangzhou]|
|[Shenzhen, Hangzhou]|
|          [Hangzhou]|
|[Shenzhen, Hangzhou]|
|[Guangzhou, Hangz...|
|[Shenzhen, Hangzhou]|
+--------------------+
only showing top 10 rows



In [89]:
# df.selectExpr("explode(check) as e").select("e.*").show()

# flightv1_1.selectExpr('explode(timeline_leg1) as e').select('e.*').show(truncate=False)

In [84]:
from pyspark.sql.types import StructType, StructField, LongType, TimestampType
import datetime
# print(
#     datetime.datetime.fromtimestamp(
#         int("1284101485")
#     ).strftime('%Y-%m-%d %H:%M:%S')
# )



timeFmt = "yyyy-MM-dd'T'HH:mm:ss.SSS"

take_all_level2_str = udf(lambda rows, a, b: None if rows is None else [None if row is None else row[a][b] for row in rows], ArrayType(StringType()))
take_all_level2_long = udf(lambda rows, a, b: None if rows is None else [None if row is None else datetime.datetime.fromtimestamp(row[a][b]) for row in rows], ArrayType(TimestampType()))

# airport = ArrayType(StructType([
#                                 StructField("airportCityState", StringType()),
#                                 StructField("city", StringType()),
#                                 StructField("code", StringType()),
#                                 StructField("localName", StringType()),
#                                 StructField("longName", StringType()),
#                                 StructField("name", StringType()),              
#           ]))

# take_all_airport = udf(lambda rows, a:  [None if row is None else row[a] for row in rows], ArrayType(airport))


flightv1_1_2 = (flightv1_1.withColumn('trip', col('trip').cast('string'))
                    .withColumn('stayDays', correct_stay_days_UDF(col('trip'), col('stayDays')))                    
                    .selectExpr('*', 'date_add(depDate, stayDays) as retDate')# this is when the return trip starts, might arrive a day later
                    .withColumn('airline_code', flightv1_1.flight_leg1.carrierSummary.airlineCodes.getItem(0))                   
                    .withColumn('airline_codes', flightv1_1.flight_leg1.carrierSummary.airlineCodes)                    
                    .withColumn('airline_codes_leg2', flightv1_1.flight_leg2.carrierSummary.airlineCodes)                    
                    .withColumn('departureTime', flightv1_1.flight_leg1.departureTime)
                    .withColumn('departureTime_leg2', flightv1_1.flight_leg2.departureTime)
                    .withColumn('arrivalTime', flightv1_1.flight_leg1.arrivalTime)
                    .withColumn('arrivalTime_leg2', flightv1_1.flight_leg2.arrivalTime)
#                 .withColumn('check_bag_inc', flightv1_1.flight_leg1.arrivalTime)
                    .withColumn('airlineName', flightv1_1.flight_leg1.carrierSummary.airlineName)
                    .withColumn('airlineName_leg2', flightv1_1.flight_leg2.carrierSummary.airlineName)
                    .withColumn('duration_m', (F.unix_timestamp('arrivalTime', format=timeFmt) - 
                                               F.unix_timestamp('departureTime', format=timeFmt))/60)                    
                .withColumn('duration_m_leg2', (F.unix_timestamp('arrivalTime_leg2', format=timeFmt) - 
                                               F.unix_timestamp('departureTime_leg2', format=timeFmt))/60)                    
#                     .withColumn('duration', flightv1_1.timeline_leg1.getItem(1).duration)
                .withColumn('airlineCode', flightv1_1.timeline_leg1.getItem(0).carrier.airlineCode)
                .withColumn('flightNumber', flightv1_1.timeline_leg1.getItem(0).carrier.flightNumber.cast('string'))                
                .select('*', F.concat(col('airlineCode'), col('flightNumber')).alias('flight_code'))
                .drop('airlineCode', 'flightNumber')
                .withColumn('plane', flightv1_1.timeline_leg1.getItem(0).carrier.plane)                
                .withColumn('stops', flightv1_1.flight_leg1.stops)                
                .withColumn('stops_leg2', flightv1_1.flight_leg2.stops)                
                .withColumn('stop_list', flightv1_1.flight_leg1.stop_list)# need to do more work                
                .withColumn('stop_list_leg2', flightv1_1.flight_leg1.stop_list)
                .withColumn('noOfTicketsLeft', correct_tickets_left_UDF(flightv1_1.flight_leg1.carrierSummary.noOfTicketsLeft))
                .withColumn('noOfTicketsLeft_leg2', correct_tickets_left_UDF(flightv1_1.flight_leg2.carrierSummary.noOfTicketsLeft))
                
                .withColumn('fromCityAirportCode', flightv1_1.flight_leg1.departureLocation.airportCode)                
                .withColumn('toCityAirportCode', flightv1_1.flight_leg1.arrivalLocation.airportCode)
                .withColumn('fromCityAirportCode_leg2', flightv1_1.flight_leg2.departureLocation.airportCode)
                .withColumn('toCityAirportCode_leg2', flightv1_1.flight_leg2.arrivalLocation.airportCode)
                
                .withColumn('carrierAirProviderId', flightv1_1.flight_leg1.carrierSummary.airProviderId)
                .withColumn('carrierAirlineImageFileName', flightv1_1.flight_leg1.carrierSummary.airlineImageFileName)
                .withColumn('carrierMixedCabinClass', flightv1_1.flight_leg1.carrierSummary.mixedCabinClass)
                .withColumn('carrierMultiStop', flightv1_1.flight_leg1.carrierSummary.multiStop)
                .withColumn('carrierNextDayArrival', flightv1_1.flight_leg1.carrierSummary.nextDayArrival)
                
                .withColumn('carrierAirProviderId_leg2', flightv1_1.flight_leg2.carrierSummary.airProviderId)
                .withColumn('carrierAirlineImageFileName_leg2', flightv1_1.flight_leg2.carrierSummary.airlineImageFileName)
                .withColumn('carrierMixedCabinClass_leg2', flightv1_1.flight_leg2.carrierSummary.mixedCabinClass)
                .withColumn('carrierMultiStop_leg2', flightv1_1.flight_leg2.carrierSummary.multiStop)
                .withColumn('carrierNextDayArrival_leg2', flightv1_1.flight_leg2.carrierSummary.nextDayArrival)
                
                ### Leg 1
                ## Leg 1 departure
#                 .withColumn('timeline_departureAirport', take_all_airport(flightv1_1.timeline_leg1, lit('departureAirport')))                               
                .withColumn('timeline_departureAirport_cityState', take_all_level2_str(flightv1_1.timeline_leg1, lit('departureAirport'), lit('airportCityState')))
                .withColumn('timeline_departureAirport_city', take_all_level2_str(flightv1_1.timeline_leg1, lit('departureAirport'), lit('city')))
                .withColumn('timeline_departureAirport_code', take_all_level2_str(flightv1_1.timeline_leg1, lit('departureAirport'), lit('code')))
                .withColumn('timeline_departureAirport_localName', take_all_level2_str(flightv1_1.timeline_leg1, lit('departureAirport'), lit('localName')))
                .withColumn('timeline_departureAirport_longName', take_all_level2_str(flightv1_1.timeline_leg1, lit('departureAirport'), lit('longName')))
                .withColumn('timeline_departureAirport_name', take_all_level2_str(flightv1_1.timeline_leg1, lit('departureAirport'), lit('name')))
                
                .withColumn('timeline_departureTime', take_all_level2_str(flightv1_1.timeline_leg1, lit('departureTime'), lit('dateTime')))
                
                

                ## Leg 1 arrival
                .withColumn('timeline_arrivalAirport_cityState', take_all_level2_str(flightv1_1.timeline_leg1, lit('arrivalAirport'), lit('airportCityState')))
                .withColumn('timeline_arrivalAirport_city', take_all_level2_str(flightv1_1.timeline_leg1, lit('arrivalAirport'), lit('city')))
                .withColumn('timeline_arrivalAirport_code', take_all_level2_str(flightv1_1.timeline_leg1, lit('arrivalAirport'), lit('code')))
                .withColumn('timeline_arrivalAirport_localName', take_all_level2_str(flightv1_1.timeline_leg1, lit('arrivalAirport'), lit('localName')))
                .withColumn('timeline_arrivalAirport_longName', take_all_level2_str(flightv1_1.timeline_leg1, lit('arrivalAirport'), lit('longName')))
                .withColumn('timeline_arrivalAirport_name', take_all_level2_str(flightv1_1.timeline_leg1, lit('arrivalAirport'), lit('name')))                
                
                
                
                ### Leg 2
                ## Leg 2 departure
                .withColumn('timeline_departureAirport_cityState_leg2', take_all_level2_str(flightv1_1.timeline_leg2, lit('departureAirport'), lit('airportCityState')))
                .withColumn('timeline_departureAirport_city_leg2', take_all_level2_str(flightv1_1.timeline_leg2, lit('departureAirport'), lit('city')))
                .withColumn('timeline_departureAirport_code_leg2', take_all_level2_str(flightv1_1.timeline_leg2, lit('departureAirport'), lit('code')))
                .withColumn('timeline_departureAirport_localName_leg2', take_all_level2_str(flightv1_1.timeline_leg2, lit('departureAirport'), lit('localName')))
                .withColumn('timeline_departureAirport_longName_leg2', take_all_level2_str(flightv1_1.timeline_leg2, lit('departureAirport'), lit('longName')))
                .withColumn('timeline_departureAirport_name_leg2', take_all_level2_str(flightv1_1.timeline_leg2, lit('departureAirport'), lit('name')))

                ## Leg 2 arrival
                .withColumn('timeline_arrivalAirport_cityState_leg2', take_all_level2_str(flightv1_1.timeline_leg2, lit('arrivalAirport'), lit('airportCityState')))
                .withColumn('timeline_arrivalAirport_city_leg2', take_all_level2_str(flightv1_1.timeline_leg2, lit('arrivalAirport'), lit('city')))
                .withColumn('timeline_arrivalAirport_code_leg2', take_all_level2_str(flightv1_1.timeline_leg2, lit('arrivalAirport'), lit('code')))
                .withColumn('timeline_arrivalAirport_localName_leg2', take_all_level2_str(flightv1_1.timeline_leg2, lit('arrivalAirport'), lit('localName')))
                .withColumn('timeline_arrivalAirport_longName_leg2', take_all_level2_str(flightv1_1.timeline_leg2, lit('arrivalAirport'), lit('longName')))
                .withColumn('timeline_arrivalAirport_name_leg2', take_all_level2_str(flightv1_1.timeline_leg2, lit('arrivalAirport'), lit('name')))                
               

                
                .select('price', 'version', 'searchDate', 'tableName', 'task_id', 'currencyCode', 
                        'fromCity', 'toCity', 'trip', 'depDate', 'retDate',
                        'stayDays', 
                       'departureTime', 'arrivalTime', 'departureTime_leg2', 'arrivalTime_leg2',
                        'airlineName', 'airlineName_leg2', 'duration_m', 'duration_m_leg2',                
                        'flight_code', 'plane', 'stops', 'stops_leg2', 'stop_list', 'stop_list_leg2',
                        'noOfTicketsLeft', 'noOfTicketsLeft_leg2',
                       'airline_code', 'airline_codes', 'airline_codes_leg2', 
                        'url', 'fromCityAirportCode', 'toCityAirportCode', 'fromCityAirportCode_leg2', 'toCityAirportCode_leg2',
                       'carrierAirProviderId', 'carrierAirlineImageFileName', 'carrierMixedCabinClass', 'carrierMultiStop', 'carrierNextDayArrival',
                        'carrierAirProviderId_leg2', 'carrierAirlineImageFileName_leg2', 'carrierMixedCabinClass_leg2', 'carrierMultiStop_leg2', 'carrierNextDayArrival_leg2',

                        ## leg 1
                        # departure
                        'timeline_departureAirport_cityState', 'timeline_departureAirport_city', 'timeline_departureAirport_code', 'timeline_departureAirport_localName', 
                        'timeline_departureAirport_longName', 'timeline_departureAirport_name',
                        
                        'timeline_departureTime',

                        # arrival
                        'timeline_arrivalAirport_cityState', 'timeline_arrivalAirport_city', 'timeline_arrivalAirport_code', 'timeline_arrivalAirport_localName', 
                        'timeline_arrivalAirport_longName', 'timeline_arrivalAirport_name',
                        
                        ## leg 2                        
                        # departure
                        'timeline_departureAirport_cityState_leg2', 'timeline_departureAirport_city_leg2', 'timeline_departureAirport_code_leg2', 'timeline_departureAirport_localName_leg2', 
                        'timeline_departureAirport_longName_leg2', 'timeline_departureAirport_name_leg2',

                        # arrival
                        'timeline_arrivalAirport_cityState_leg2', 'timeline_arrivalAirport_city_leg2', 'timeline_arrivalAirport_code_leg2', 'timeline_arrivalAirport_localName_leg2', 
                        'timeline_arrivalAirport_longName_leg2', 'timeline_arrivalAirport_name_leg2'
                       )                
               )

display(flightv1_1_2.where(col('trip')=='1').show(1))
display(flightv1_1_2.where(col('trip')=='2').show(1))
display(flightv1_1_2.printSchema())

display(flightv1_1.select("flight_leg1.stop_list.airport").show(100, truncate=False))
        
#      |         flight_leg1|flight_leg2|| |||   airline_code      ||       timeline_leg1|timeline_leg2|  |                 |||||       ||
               

# flightv1_1_2.show(3)

# temp = flightv1_1.select("flight_leg1.stop_list").show(100, truncate=False)
# flightv1_1_2.printSchema()         
# flightv1_1_2.crosstab('noOfTicketsLeft', 'noOfTicketsLeft_leg2')
# flightv1_1_2.cube('trip', flightv1_1_2.noOfTicketsLeft, flightv1_1_2.noOfTicketsLeft_leg2).count().orderBy('trip', "noOfTicketsLeft", "noOfTicketsLeft_leg2").show(truncate=False)

+------+-------+----------+------------------+-------+------------+--------+--------+----+----------+-------+--------+--------------------+--------------------+------------------+----------------+------------+----------------+----------+---------------+-----------+-----+-----+----------+---------+--------------+---------------+--------------------+------------+-------------+------------------+--------------------+-------------------+-----------------+------------------------+----------------------+--------------------+---------------------------+----------------------+----------------+---------------------+-------------------------+--------------------------------+---------------------------+---------------------+--------------------------+-----------------------------------+------------------------------+------------------------------+-----------------------------------+----------------------------------+------------------------------+----------------------+---------------------------

None

+-----+-------+----------+------------------+-------+------------+--------+--------+----+----------+----------+--------+--------------------+--------------------+--------------------+--------------------+-----------------+-----------------+----------+---------------+-----------+-----------+-----+----------+--------------------+--------------------+---------------+--------------------+------------+-------------+------------------+--------------------+-------------------+-----------------+------------------------+----------------------+--------------------+---------------------------+----------------------+----------------+---------------------+-------------------------+--------------------------------+---------------------------+---------------------+--------------------------+-----------------------------------+------------------------------+------------------------------+-----------------------------------+----------------------------------+------------------------------+-------------

None

root
 |-- price: double (nullable = true)
 |-- version: string (nullable = true)
 |-- searchDate: string (nullable = true)
 |-- tableName: string (nullable = true)
 |-- task_id: long (nullable = true)
 |-- currencyCode: string (nullable = true)
 |-- fromCity: string (nullable = true)
 |-- toCity: string (nullable = true)
 |-- trip: string (nullable = true)
 |-- depDate: string (nullable = true)
 |-- retDate: date (nullable = true)
 |-- stayDays: integer (nullable = true)
 |-- departureTime: string (nullable = true)
 |-- arrivalTime: string (nullable = true)
 |-- departureTime_leg2: string (nullable = true)
 |-- arrivalTime_leg2: string (nullable = true)
 |-- airlineName: string (nullable = true)
 |-- airlineName_leg2: string (nullable = true)
 |-- duration_m: double (nullable = true)
 |-- duration_m_leg2: double (nullable = true)
 |-- flight_code: string (nullable = true)
 |-- plane: string (nullable = true)
 |-- stops: long (nullable = true)
 |-- stops_leg2: long (nullable = true)
 |-

None

+---------------------------------------------------------------------+
|airport                                                              |
+---------------------------------------------------------------------+
|[Macau, Macau (MFM-Macau Intl.)]                                     |
|[Macau, Macau (MFM-Macau Intl.)]                                     |
|[]                                                                   |
|[Wuhan, China (WUH-Tianhe Intl.)]                                    |
|[Shenzhen, China (SZX-Shenzhen Intl.)]                               |
|[Shenzhen, China (SZX-Shenzhen Intl.)]                               |
|[]                                                                   |
|[Shenzhen, China (SZX-Shenzhen Intl.)]                               |
|[Guangzhou, China (CAN-Baiyun Intl.)]                                |
|[Shenzhen, China (SZX-Shenzhen Intl.)]                               |
|[Guangzhou, China (CAN-Baiyun Intl.)]                          

None

In [88]:
flightv1_1_2.withColumn('time', col('timeline_departureTime').cast('timestamp')).show(2, truncate=False)


AnalysisException: "cannot resolve 'CAST(`timeline_departureTime` AS TIMESTAMP)' due to data type mismatch: cannot cast ArrayType(StringType,true) to TimestampType;;\n'Project [price#453, version#463, searchDate#454, tableName#456, task_id#457L, currencyCode#448, fromCity#452, toCity#460, trip#36722, depDate#449, retDate#36758, stayDays#36740, departureTime#36840, arrivalTime#36887, departureTime_leg2#36863, arrivalTime_leg2#36912, airlineName#36938, airlineName_leg2#36965, duration_m#36993, duration_m_leg2#37022, flight_code#37115, plane#37178, stops#37210L, stops_leg2#37243L, ... 48 more fields]\n+- Project [price#453, version#463, searchDate#454, tableName#456, task_id#457L, currencyCode#448, fromCity#452, toCity#460, trip#36722, depDate#449, retDate#36758, stayDays#36740, departureTime#36840, arrivalTime#36887, departureTime_leg2#36863, arrivalTime_leg2#36912, airlineName#36938, airlineName_leg2#36965, duration_m#36993, duration_m_leg2#37022, flight_code#37115, plane#37178, stops#37210L, stops_leg2#37243L, ... 47 more fields]\n   +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, arrivalTime#36887, arrivalTime_leg2#36912, ... 51 more fields]\n      +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, arrivalTime#36887, arrivalTime_leg2#36912, ... 50 more fields]\n         +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, arrivalTime#36887, arrivalTime_leg2#36912, ... 49 more fields]\n            +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, arrivalTime#36887, arrivalTime_leg2#36912, ... 48 more fields]\n               +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, arrivalTime#36887, arrivalTime_leg2#36912, ... 47 more fields]\n                  +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, arrivalTime#36887, arrivalTime_leg2#36912, ... 46 more fields]\n                     +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, arrivalTime#36887, arrivalTime_leg2#36912, ... 45 more fields]\n                        +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, arrivalTime#36887, arrivalTime_leg2#36912, ... 44 more fields]\n                           +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, arrivalTime#36887, arrivalTime_leg2#36912, ... 43 more fields]\n                              +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, arrivalTime#36887, arrivalTime_leg2#36912, ... 42 more fields]\n                                 +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, arrivalTime#36887, arrivalTime_leg2#36912, ... 41 more fields]\n                                    +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, arrivalTime#36887, arrivalTime_leg2#36912, ... 40 more fields]\n                                       +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, arrivalTime#36887, arrivalTime_leg2#36912, ... 39 more fields]\n                                          +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, arrivalTime#36887, arrivalTime_leg2#36912, ... 38 more fields]\n                                             +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, arrivalTime#36887, arrivalTime_leg2#36912, ... 37 more fields]\n                                                +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, arrivalTime#36887, arrivalTime_leg2#36912, ... 36 more fields]\n                                                   +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, arrivalTime#36887, arrivalTime_leg2#36912, ... 35 more fields]\n                                                      +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, arrivalTime#36887, arrivalTime_leg2#36912, ... 34 more fields]\n                                                         +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, arrivalTime#36887, arrivalTime_leg2#36912, ... 33 more fields]\n                                                            +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, arrivalTime#36887, arrivalTime_leg2#36912, ... 32 more fields]\n                                                               +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, arrivalTime#36887, arrivalTime_leg2#36912, ... 31 more fields]\n                                                                  +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, arrivalTime#36887, arrivalTime_leg2#36912, ... 30 more fields]\n                                                                     +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, arrivalTime#36887, arrivalTime_leg2#36912, ... 29 more fields]\n                                                                        +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, arrivalTime#36887, arrivalTime_leg2#36912, ... 28 more fields]\n                                                                           +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, arrivalTime#36887, arrivalTime_leg2#36912, ... 27 more fields]\n                                                                              +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, arrivalTime#36887, arrivalTime_leg2#36912, ... 26 more fields]\n                                                                                 +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, arrivalTime#36887, arrivalTime_leg2#36912, ... 25 more fields]\n                                                                                    +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, arrivalTime#36887, arrivalTime_leg2#36912, ... 24 more fields]\n                                                                                       +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, arrivalTime#36887, arrivalTime_leg2#36912, ... 23 more fields]\n                                                                                          +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, arrivalTime#36887, arrivalTime_leg2#36912, ... 22 more fields]\n                                                                                             +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, arrivalTime#36887, arrivalTime_leg2#36912, ... 21 more fields]\n                                                                                                +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, arrivalTime#36887, arrivalTime_leg2#36912, ... 20 more fields]\n                                                                                                   +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, arrivalTime#36887, arrivalTime_leg2#36912, ... 19 more fields]\n                                                                                                      +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, arrivalTime#36887, arrivalTime_leg2#36912, ... 18 more fields]\n                                                                                                         +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, arrivalTime#36887, arrivalTime_leg2#36912, ... 17 more fields]\n                                                                                                            +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, arrivalTime#36887, arrivalTime_leg2#36912, ... 16 more fields]\n                                                                                                               +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, arrivalTime#36887, arrivalTime_leg2#36912, ... 15 more fields]\n                                                                                                                  +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, arrivalTime#36887, arrivalTime_leg2#36912, ... 14 more fields]\n                                                                                                                     +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, arrivalTime#36887, arrivalTime_leg2#36912, ... 13 more fields]\n                                                                                                                        +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, arrivalTime#36887, arrivalTime_leg2#36912, ... 12 more fields]\n                                                                                                                           +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, arrivalTime#36887, arrivalTime_leg2#36912, ... 11 more fields]\n                                                                                                                              +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, arrivalTime#36887, arrivalTime_leg2#36912, ... 10 more fields]\n                                                                                                                                 +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, arrivalTime#36887, arrivalTime_leg2#36912, ... 9 more fields]\n                                                                                                                                    +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, arrivalTime#36887, arrivalTime_leg2#36912, ... 8 more fields]\n                                                                                                                                       +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, arrivalTime#36887, arrivalTime_leg2#36912, ... 7 more fields]\n                                                                                                                                          +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, arrivalTime#36887, arrivalTime_leg2#36912, ... 6 more fields]\n                                                                                                                                             +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, arrivalTime#36887, arrivalTime_leg2#36912, ... 5 more fields]\n                                                                                                                                                +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, arrivalTime#36887, arrivalTime_leg2#36912, ... 7 more fields]\n                                                                                                                                                   +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, arrivalTime#36887, arrivalTime_leg2#36912, ... 6 more fields]\n                                                                                                                                                      +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, arrivalTime#36887, arrivalTime_leg2#36912, ... 5 more fields]\n                                                                                                                                                         +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, arrivalTime#36887, arrivalTime_leg2#36912, ... 4 more fields]\n                                                                                                                                                            +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, arrivalTime#36887, arrivalTime_leg2#36912, ... 3 more fields]\n                                                                                                                                                               +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, arrivalTime#36887, arrivalTime_leg2#36912, ... 2 more fields]\n                                                                                                                                                                  +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, arrivalTime#36887, arrivalTime_leg2#36912, flight_leg1#450.carrierSummary.airlineName AS airlineName#36938]\n                                                                                                                                                                     +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, arrivalTime#36887, flight_leg2#451.arrivalTime AS arrivalTime_leg2#36912]\n                                                                                                                                                                        +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, departureTime_leg2#36863, flight_leg1#450.arrivalTime AS arrivalTime#36887]\n                                                                                                                                                                           +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, departureTime#36840, flight_leg2#451.departureTime AS departureTime_leg2#36863]\n                                                                                                                                                                              +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, airline_codes_leg2#36818, flight_leg1#450.departureTime AS departureTime#36840]\n                                                                                                                                                                                 +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, airline_codes#36797, flight_leg2#451.carrierSummary.airlineCodes AS airline_codes_leg2#36818]\n                                                                                                                                                                                    +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, airline_code#36777, flight_leg1#450.carrierSummary.airlineCodes AS airline_codes#36797]\n                                                                                                                                                                                       +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, retDate#36758, flight_leg1#450.carrierSummary.airlineCodes[0] AS airline_code#36777]\n                                                                                                                                                                                          +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463, date_add(cast(depDate#449 as date), stayDays#36740) AS retDate#36758]\n                                                                                                                                                                                             +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, correct_stay_days(trip#36722, stayDays#455L) AS stayDays#36740, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, trip#36722, url#462, version#463]\n                                                                                                                                                                                                +- Project [currencyCode#448, depDate#449, flight_leg1#450, flight_leg2#451, fromCity#452, price#453, searchDate#454, stayDays#455L, tableName#456, task_id#457L, timeline_leg1#458, timeline_leg2#459, toCity#460, cast(trip#461L as string) AS trip#36722, url#462, version#463]\n                                                                                                                                                                                                   +- Relation[currencyCode#448,depDate#449,flight_leg1#450,flight_leg2#451,fromCity#452,price#453,searchDate#454,stayDays#455L,tableName#456,task_id#457L,timeline_leg1#458,timeline_leg2#459,toCity#460,trip#461L,url#462,version#463] json\n"