In [1]:
import h2o
import zipfile
import os
import sys
from pyspark.sql import SparkSession
from IPython.display import display
from pyspark.sql.functions import regexp_extract, col, split, udf, \
                                 trim, when, from_unixtime, unix_timestamp, minute, hour, datediff, lit, array,\
                                 to_date
from pyspark.sql import functions as F
from pyspark.sql.types import IntegerType, StringType, BooleanType, ArrayType, StructType, StructField, LongType, TimestampType
import datetime
import argparse
import json
import glob, os, shutil
import pandas as pd
from pandas.io.json import json_normalize
from pyspark import SparkContext

pd.options.display.max_columns = 99

sc = SparkContext()

spark = SparkSession \
        .builder \
        .appName("Data ETL") \
        .config("spark.some.config.option", "some-value") \
        .getOrCreate()
        
display(spark.version)

'2.1.1'

# Load Data

In [2]:
# Version 1.0
flight = spark.read.parquet("/home/ubuntu/s3/comb/flight_v1_0.pq")
display(flight.count())
display(flight.show(2))

txt_exception_folder = '/home/ubuntu/s3/comb/txt_exception/'
print(txt_exception_folder)

# Version 1.1
flightv1_1 = spark.read.json(os.path.join(txt_exception_folder, "flight_15_13_price_2017-05-11*.txt"))
display(flightv1_1.count())
display(flightv1_1.show(1))

2288103

+--------------+----------+---------+----------------+-------+------------+----+-------+------------+-------------+--------------------+-------------+--------------------+--------------------+--------+-----------+-------------+-----+--------------------+-----+------+----------+-----------+---------+----+-------------------+-----------+-----+----+
|from_city_name|start_date|stay_days|      table_name|task_id|to_city_name|trip|version|airline_code|airline_codes|            arr_time|check_bag_inc|             company|            dep_time|duration|flight_code|flight_number|index|               plane|power| price|price_code|search_date|span_days|stop|          stop_info|ticket_left|video|wifi|
+--------------+----------+---------+----------------+-------+------------+----+-------+------------+-------------+--------------------+-------------+--------------------+--------------------+--------+-----------+-------------+-----+--------------------+-----+------+----------+-----------+---------+--

None

/home/ubuntu/s3/comb/txt_exception/


74603

+------------+----------+--------------------+--------------------+--------+-----+----------+--------+------------------+-------+--------------------+--------------------+--------+----+--------------------+-------+
|currencyCode|   depDate|         flight_leg1|         flight_leg2|fromCity|price|searchDate|stayDays|         tableName|task_id|       timeline_leg1|       timeline_leg2|  toCity|trip|                 url|version|
+------------+----------+--------------------+--------------------+--------+-----+----------+--------+------------------+-------+--------------------+--------------------+--------+----+--------------------+-------+
|         AUD|2017-05-18|[[Hangzhou,HGH],2...|[[Bangkok,BKK],20...| Bangkok|401.3|2017-05-11|       7|flight_15_13_price|  16232|[[[Macau, Macau,M...|[[[Macau, Macau,M...|Hangzhou|   2|https://www.exped...|    1.1|
+------------+----------+--------------------+--------------------+--------+-----+----------+--------+------------------+-------+-----------

None

# Modify version 1.0

In [37]:
# for one way trips, display None in stay_days
def correct_stay_days(trip, stay_days):
    if trip == '1':
        return None
    else:
        return int(stay_days)

correct_stay_days_UDF = udf(correct_stay_days, IntegerType())

def correct_tickets_left(noOfTicketsLeft):
    if noOfTicketsLeft == 0:
        return 99
    else:
        return noOfTicketsLeft
    
correct_tickets_left_UDF = udf(correct_tickets_left, IntegerType())


# take_all_duration_UDF = udf(lambda rows: None if rows is None else [None if row is None else row.split(":", 1)[1].replace("h", "h:") for row in rows], ArrayType(StringType()))
# couldn't get it to work


flight2 = (flight.withColumn('stayDays', correct_stay_days_UDF(col('trip'), col('stay_days')))
                 .drop('stay_days')           
                 .withColumnRenamed('start_date', 'depDate')                 
                 .withColumn('depDate', to_date('depDate'))
                 .selectExpr('*', 'date_add(depDate, stayDays) as retDate')# this is when the return trip starts, might arrive a day later
                 .withColumnRenamed('from_city_name', 'fromCity')
                 .withColumnRenamed('to_city_name', 'toCity')                 
                 .withColumnRenamed('search_date', 'searchDate')                 
                 .withColumn('searchDate', to_date('searchDate'))
                 .withColumnRenamed('company', 'airlineName')                 
                 .withColumnRenamed('dep_time', 'departureTime')                                  
                 .withColumnRenamed('arr_time', 'arrivalTime')                                                   
                 .withColumn('duration_h', split(flight.duration,'h').getItem(0))
                 .withColumn('duration_m', F.substring_index(split(flight.duration,'h').getItem(1), 'm', 1))
#                  .withColumn('duration', F.struct(col('duration_h'), col('duration_m')))
                 .withColumn('duration_m', (col('duration_h')*60 + col('duration_m')))
                 .drop('duration', 'duration_h', 'flight_number')
                 .withColumnRenamed('price_code', 'currencyCode')                                  
                 .withColumnRenamed('stop', 'stops')
                 .withColumn('stops', col('stops').cast('byte')) 
                 .withColumn('stop_info', split(col('stop_info'), ';'))
                 .withColumn('stop_duration', take_all_duration_UDF(col('stop_info')))
                 .withColumn('noOfTicketsLeft', correct_tickets_left_UDF('ticket_left'))
                 .withColumn('noOfTicketsLeft', col('noOfTicketsLeft').cast('byte')) 
                .drop('ticket_left')
               .withColumnRenamed('table_name', 'tableName')
                .select('price', 'version', 'searchDate', 'tableName', 'task_id', 'currencyCode', 
                        'fromCity', 'toCity', 'trip', 'depDate', 'retDate',
                        'stayDays', 
                       'departureTime', 'arrivalTime', 
                        'airlineName',  'duration_m', 
                        'flight_code', 'plane', 'stops', 'stop_duration', 'noOfTicketsLeft',
                       'airline_code', 'airline_codes',
                       'stop_info', 'span_days', 'power', 'video', 'wifi')                
          )
# varaibles added in v1.1: 'departureTime_leg2', 'arrivalTime_leg2', 'airlineName_leg2','duration_m_leg2','stops_leg2'
#  'noOfTicketsLeft_leg2','airline_codes_leg2', 
# 'stop_list', 'url'

# variables dropped in v1.1:
# 'span_days', 'power', 'video', 'wifi', 'stop_info'

display(flight2.where(col('trip') == 1).show(1))
display(flight2.where(col('trip') == 2).show(1, truncate=False))
flight2.printSchema()

# flight2.select('flight_code', 'flight_number').distinct().show(1000)
# flight2.select('stop_info').distinct().show()
# flight2.select('stop_list').distinct().show(100, truncate=False)

+------+-------+----------+----------------+-------+------------+--------+-------+----+----------+-------+--------+--------------------+--------------------+--------------+----------+-----------+----------------+-----+----------------+---------------+------------+-------------+--------------------+---------+-----+-----+-----+
| price|version|searchDate|       tableName|task_id|currencyCode|fromCity| toCity|trip|   depDate|retDate|stayDays|       departureTime|         arrivalTime|   airlineName|duration_m|flight_code|           plane|stops|   stop_duration|noOfTicketsLeft|airline_code|airline_codes|           stop_info|span_days|power|video| wifi|
+------+-------+----------+----------------+-------+------------+--------+-------+----+----------+-------+--------+--------------------+--------------------+--------------+----------+-----------+----------------+-----+----------------+---------------+------------+-------------+--------------------+---------+-----+-----+-----+
|605.72|    1.0|

None

+-----+-------+----------+----------------+-------+------------+--------+-------+----+----------+----------+--------+-----------------------------+-----------------------------+--------------+----------+-----------+-----------------------------------+-----+---------------+---------------+------------+-------------+---------------------+---------+-----+-----+----+
|price|version|searchDate|tableName       |task_id|currencyCode|fromCity|toCity |trip|depDate   |retDate   |stayDays|departureTime                |arrivalTime                  |airlineName   |duration_m|flight_code|plane                              |stops|stop_duration  |noOfTicketsLeft|airline_code|airline_codes|stop_info            |span_days|power|video|wifi|
+-----+-------+----------+----------------+-------+------------+--------+-------+----+----------+----------+--------+-----------------------------+-----------------------------+--------------+----------+-----------+-----------------------------------+-----+-----------

None

root
 |-- price: double (nullable = true)
 |-- version: string (nullable = true)
 |-- searchDate: date (nullable = true)
 |-- tableName: string (nullable = true)
 |-- task_id: string (nullable = true)
 |-- currencyCode: string (nullable = true)
 |-- fromCity: string (nullable = true)
 |-- toCity: string (nullable = true)
 |-- trip: string (nullable = true)
 |-- depDate: date (nullable = true)
 |-- retDate: date (nullable = true)
 |-- stayDays: integer (nullable = true)
 |-- departureTime: string (nullable = true)
 |-- arrivalTime: string (nullable = true)
 |-- airlineName: string (nullable = true)
 |-- duration_m: double (nullable = true)
 |-- flight_code: string (nullable = true)
 |-- plane: string (nullable = true)
 |-- stops: byte (nullable = true)
 |-- stop_duration: array (nullable = true)
 |    |-- element: string (containsNull = true)
 |-- noOfTicketsLeft: byte (nullable = true)
 |-- airline_code: string (nullable = true)
 |-- airline_codes: array (nullable = true)
 |    |-- ele

In [38]:
flight2.select('stop_info').distinct().show(100, truncate=False)

+-----------------------------------------+
|stop_info                                |
+-----------------------------------------+
|[Hong Kong(HKG):13h10m]                  |
|[Denpasar(DPS):6h50m]                    |
|[Kunming(KMG):10h30m]                    |
|[Auckland(AKL):3h5m]                     |
|[Tokyo(NRT):7h50m]                       |
|[Guangzhou(CAN):4h15m]                   |
|[Shenzhen(SZX):4h45m]                    |
|[Guangzhou(CAN):9h35m]                   |
|[Xi'an(XIY):4h45m]                       |
|[Hong Kong(HKG):5h15m]                   |
|[Bangkok(BKK):9h15m]                     |
|[Fuzhou(FOC):11h5m]                      |
|[Melbourne(MEL):6h35m]                   |
|[Chengdu(CTU):12h35m]                    |
|[Xi'an(XIY):3h55m]                       |
|[Changsha(CSX):7h55m]                    |
|[Chongqing(CKG):13h20m]                  |
|[Hangzhou(HGH):4h10m]                    |
|[Melbourne(MEL):12h35m]                  |
|[Chengdu(CTU):6h5m]            

In [39]:
# flight2.sample(False, 0.001, 42).toPandas()

flight2.limit(10).toPandas()


Unnamed: 0,price,version,searchDate,tableName,task_id,currencyCode,fromCity,toCity,trip,depDate,retDate,stayDays,departureTime,arrivalTime,airlineName,duration_m,flight_code,plane,stops,stop_duration,noOfTicketsLeft,airline_code,airline_codes,stop_info,span_days,power,video,wifi
0,0.0,1.0,2017-05-08,flight_1_5_price,620,,sydney,beijing,2,2017-09-09,2017-10-07,28,2017-09-09T11:15:00.000+10:00,2017-09-10T04:10:00.000+08:00,Qantas Airways,1135.0,QF145,BOEING 737-800 (WINGLETS) PASSENGER,1,[Auckland(AKL)],99,QF,"[QF, CA]",[Auckland(AKL):2h35m],0,,,
1,472.14,1.0,2017-05-08,flight_1_5_price,620,AUD,sydney,beijing,2,2017-09-09,2017-10-07,28,2017-09-09T20:50:00.000+10:00,2017-09-10T17:25:00.000+08:00,China Eastern Airlines,1355.0,MU778,AIRBUS INDUSTRIE A330-200,1,[Kunming(KMG)],4,MU,"[MU, MU]",[Kunming(KMG):8h30m],0,,,
2,1095.74,1.0,2017-05-08,flight_1_5_price,620,AUD,sydney,beijing,2,2017-09-09,2017-10-07,28,2017-09-09T10:00:00.000+10:00,2017-09-10T06:55:00.000+08:00,Garuda Indonesia,1375.0,GA715,AIRBUS INDUSTRIE A330-300,1,[Denpasar(DPS)],9,GA,"[GA, GA]",[Denpasar(DPS):9h0m],0,,,
3,953.94,1.0,2017-05-08,flight_1_5_price,620,AUD,sydney,beijing,2,2017-09-09,2017-10-07,28,2017-09-09T21:55:00.000+10:00,2017-09-10T11:40:00.000+08:00,Cathay Pacific,945.0,CX138,BOEING 777-300ER,1,[Hong Kong(HKG)],4,CX,"[CX, CX]",[Hong Kong(HKG):3h15m],0,,,
4,1006.14,1.0,2017-05-08,flight_1_5_price,620,AUD,sydney,beijing,2,2017-09-09,2017-10-07,28,2017-09-09T10:45:00.000+10:00,2017-09-09T23:15:00.000+08:00,China Southern Airlines,870.0,CZ326,Airbus A330,1,[Guangzhou(CAN)],9,CZ,"[CZ, CZ]",[Guangzhou(CAN):1h30m],0,,,
5,1113.74,1.0,2017-05-08,flight_1_5_price,620,AUD,sydney,beijing,2,2017-09-09,2017-10-07,28,2017-09-09T10:05:00.000+10:00,2017-09-10T02:15:00.000+08:00,Cathay Pacific,1090.0,CX162,AIRBUS INDUSTRIE A330-300,1,[Hong Kong(HKG)],9,CX,"[CX, CX]",[Hong Kong(HKG):5h10m],0,,,
6,1006.14,1.0,2017-05-08,flight_1_5_price,620,AUD,sydney,beijing,2,2017-09-09,2017-10-07,28,2017-09-09T21:45:00.000+10:00,2017-09-10T13:15:00.000+08:00,China Southern Airlines,1050.0,CZ302,Airbus A330,1,[Guangzhou(CAN)],9,CZ,"[CZ, CZ]",[Guangzhou(CAN):4h35m],0,,,
7,1119.06,1.0,2017-05-08,flight_1_5_price,620,AUD,sydney,beijing,2,2017-09-09,2017-10-07,28,2017-10-07T11:50:00.000+08:00,2017-10-08T07:05:00.000+10:00,Korean Air,1035.0,KE2852,772 - BOEING 777/200,1,[Seoul(GMP)],7,KE,"[KE, KE]",[Seoul(GMP):4h10m],0,,,
8,1164.64,1.0,2017-05-08,flight_1_5_price,620,AUD,sydney,beijing,2,2017-09-09,2017-10-07,28,2017-09-09T07:55:00.000+10:00,2017-09-09T23:00:00.000+08:00,Singapore Airlines,1025.0,SQ212,BOEING 777-300ER,1,[Singapore(SIN)],9,SQ,"[SQ, SQ]",[Singapore(SIN):2h35m],0,,,
9,881.54,1.0,2017-05-08,flight_1_5_price,620,AUD,sydney,beijing,2,2017-09-09,2017-10-07,28,2017-10-07T14:50:00.000+08:00,2017-10-08T08:40:00.000+10:00,Qantas Airways,950.0,QF5004,AIRBUS INDUSTRIE A330-200,1,[Hangzhou(HGH)],9,QF,[QF],[Hangzhou(HGH):1h50m],0,,,


# Modify Version 1.1

In [6]:
flightv1_1.show(2)

+------------+----------+--------------------+--------------------+--------+-----+----------+--------+------------------+-------+--------------------+--------------------+--------+----+--------------------+-------+
|currencyCode|   depDate|         flight_leg1|         flight_leg2|fromCity|price|searchDate|stayDays|         tableName|task_id|       timeline_leg1|       timeline_leg2|  toCity|trip|                 url|version|
+------------+----------+--------------------+--------------------+--------+-----+----------+--------+------------------+-------+--------------------+--------------------+--------+----+--------------------+-------+
|         AUD|2017-05-18|[[Hangzhou,HGH],2...|[[Bangkok,BKK],20...| Bangkok|401.3|2017-05-11|       7|flight_15_13_price|  16232|[[[Macau, Macau,M...|[[[Macau, Macau,M...|Hangzhou|   2|https://www.exped...|    1.1|
|         AUD|2017-05-18|[[Hangzhou,HGH],2...|[[Bangkok,BKK],20...| Bangkok|401.3|2017-05-11|       7|flight_15_13_price|  16232|[[[Macau, M

In [7]:

# # take_all_level1_str = udf(lambda rows, a: [row[a] for row in rows], ArrayType(StringType()))
# take_all_level2_str = udf(lambda rows, a, b:  [None if row is None else row[a][b] for row in rows], ArrayType(StringType()))
# # take_all = udf(lambda rows, a: [row[a]['city'] for row in rows], ArrayType(StringType()))


# flightv1_1.withColumn("city", take_all_level2_str(flightv1_1.timeline_leg1, lit('arrivalAirport'), lit('city')))\
#                       .select('airports').show(10)
# # flightv1_1.withColumn("airports", take_all_level1_str(flightv1_1.timeline_leg1, 'type')).select('airports').show(10)
# # flightv1_1.withColumn("airports", take_all(flightv1_1.timeline_leg1, lit('arrivalAirport'))).select('airports').show(10)

# # display(flightv1_1.select('timeline_leg1').show(100, truncate=False))



In [8]:
# df.selectExpr("explode(check) as e").select("e.*").show()

# flightv1_1.selectExpr('explode(timeline_leg1) as e').select('e.*').show(truncate=False)

### Need to split stop list into duration and make it compatible with v1.0 - Can't figure out how to do this for v1.0

In [40]:
# print(
#     datetime.datetime.fromtimestamp(
#         int("1284101485")
#     ).strftime('%Y-%m-%d %H:%M:%S')
# )

timeFmt = "yyyy-MM-dd'T'HH:mm:ss.SSS"

take_all_level2_str = udf(lambda rows, a, b: None if rows is None else [None if row is None else row[a][b] for row in rows], ArrayType(StringType()))
take_all_level1_str = udf(lambda rows, a: None if rows is None else [None if row is None else row[a] for row in rows], ArrayType(StringType()))
# take_all_level2_long = udf(lambda rows, a, b: None if rows is None else [None if row is None else datetime.datetime.fromtimestamp(row[a][b]) for row in rows], ArrayType(TimestampType()))

# airport = ArrayType(StructType([
#                                 StructField("airportCityState", StringType()),
#                                 StructField("city", StringType()),
#                                 StructField("code", StringType()),
#                                 StructField("localName", StringType()),
#                                 StructField("longName", StringType()),
#                                 StructField("name", StringType()),              
#           ]))

# take_all_airport = udf(lambda rows, a:  [None if row is None else row[a] for row in rows], ArrayType(airport))


flightv1_1_2 = (flightv1_1.withColumn('trip', col('trip').cast('string'))
                    .withColumn('stayDays', correct_stay_days_UDF(col('trip'), col('stayDays')))                    
                    .withColumn('depDate', to_date('depDate'))
                    .withColumn('searchDate', to_date('searchDate'))
                    .selectExpr('*', 'date_add(depDate, stayDays) as retDate')# this is when the return trip starts, might arrive a day later
                    .withColumn('airline_code', flightv1_1.flight_leg1.carrierSummary.airlineCodes.getItem(0))                   
                    .withColumn('airline_codes', flightv1_1.flight_leg1.carrierSummary.airlineCodes)                    
                    .withColumn('airline_codes_leg2', flightv1_1.flight_leg2.carrierSummary.airlineCodes)                    
                    .withColumn('departureTime', flightv1_1.flight_leg1.departureTime)
                    .withColumn('departureTime_leg2', flightv1_1.flight_leg2.departureTime)
                    .withColumn('arrivalTime', flightv1_1.flight_leg1.arrivalTime)
                    .withColumn('arrivalTime_leg2', flightv1_1.flight_leg2.arrivalTime)
#                 .withColumn('check_bag_inc', flightv1_1.flight_leg1.arrivalTime)
                    .withColumn('airlineName', flightv1_1.flight_leg1.carrierSummary.airlineName)
                    .withColumn('airlineName_leg2', flightv1_1.flight_leg2.carrierSummary.airlineName)
                    .withColumn('duration_m', (F.unix_timestamp('arrivalTime', format=timeFmt) - 
                                               F.unix_timestamp('departureTime', format=timeFmt))/60)                    
                .withColumn('duration_m_leg2', (F.unix_timestamp('arrivalTime_leg2', format=timeFmt) - 
                                               F.unix_timestamp('departureTime_leg2', format=timeFmt))/60)                    
#                     .withColumn('duration', flightv1_1.timeline_leg1.getItem(1).duration)
                .withColumn('airlineCode', flightv1_1.timeline_leg1.getItem(0).carrier.airlineCode)
                .withColumn('flightNumber', flightv1_1.timeline_leg1.getItem(0).carrier.flightNumber.cast('string'))                
                .select('*', F.concat(col('airlineCode'), col('flightNumber')).alias('flight_code'))
                .drop('airlineCode', 'flightNumber')
                .withColumn('plane', flightv1_1.timeline_leg1.getItem(0).carrier.plane)                
                .withColumn('stops', flightv1_1.flight_leg1.stops.cast('byte'))                                
                .withColumn('stops_leg2', flightv1_1.flight_leg2.stops.cast('byte'))                
                
#                 .withColumn('stop_list', flightv1_1.flight_leg1.stop_list)# need to do more work                
                .withColumn('stop_airport', take_all_level1_str(flightv1_1.flight_leg1.stop_list, lit('airport')))                                               
                .withColumn('stop_duration', take_all_level1_str(flightv1_1.flight_leg1.stop_list, lit('duration')))                                               
                
#                 .withColumn('stop_list_leg2', flightv1_1.flight_leg2.stop_list)               
                .withColumn('stop_airport_leg2', take_all_level1_str(flightv1_1.flight_leg2.stop_list, lit('airport')))                                               
                .withColumn('stop_duration_leg2', take_all_level1_str(flightv1_1.flight_leg2.stop_list, lit('duration')))                                               
                
                
                
                .withColumn('noOfTicketsLeft', correct_tickets_left_UDF(flightv1_1.flight_leg1.carrierSummary.noOfTicketsLeft))
                .withColumn('noOfTicketsLeft', col('noOfTicketsLeft').cast('byte'))                
                .withColumn('noOfTicketsLeft_leg2', correct_tickets_left_UDF(flightv1_1.flight_leg2.carrierSummary.noOfTicketsLeft))
                .withColumn('noOfTicketsLeft_leg2', col('noOfTicketsLeft_leg2').cast('byte'))
                .withColumn('fromCityAirportCode', flightv1_1.flight_leg1.departureLocation.airportCode)                
                .withColumn('toCityAirportCode', flightv1_1.flight_leg1.arrivalLocation.airportCode)
                .withColumn('fromCityAirportCode_leg2', flightv1_1.flight_leg2.departureLocation.airportCode)
                .withColumn('toCityAirportCode_leg2', flightv1_1.flight_leg2.arrivalLocation.airportCode)
                
                # carrier leg 1
                .withColumn('carrierAirProviderId', flightv1_1.flight_leg1.carrierSummary.airProviderId)
                .withColumn('carrierAirlineImageFileName', flightv1_1.flight_leg1.carrierSummary.airlineImageFileName)
                .withColumn('carrierMixedCabinClass', flightv1_1.flight_leg1.carrierSummary.mixedCabinClass)
                .withColumn('carrierMultiStop', flightv1_1.flight_leg1.carrierSummary.multiStop)
                .withColumn('carrierNextDayArrival', flightv1_1.flight_leg1.carrierSummary.nextDayArrival)
                
                # carrier leg 2
                .withColumn('carrierAirProviderId_leg2', flightv1_1.flight_leg2.carrierSummary.airProviderId)
                .withColumn('carrierAirlineImageFileName_leg2', flightv1_1.flight_leg2.carrierSummary.airlineImageFileName)
                .withColumn('carrierMixedCabinClass_leg2', flightv1_1.flight_leg2.carrierSummary.mixedCabinClass)
                .withColumn('carrierMultiStop_leg2', flightv1_1.flight_leg2.carrierSummary.multiStop)
                .withColumn('carrierNextDayArrival_leg2', flightv1_1.flight_leg2.carrierSummary.nextDayArrival)
                
                ### Leg 1
                ## Leg 1 departure
#                 .withColumn('timeline_departureAirport', take_all_airport(flightv1_1.timeline_leg1, lit('departureAirport')))                               
                .withColumn('timeline_departureAirport_cityState', take_all_level2_str(flightv1_1.timeline_leg1, lit('departureAirport'), lit('airportCityState')))
                .withColumn('timeline_departureAirport_city', take_all_level2_str(flightv1_1.timeline_leg1, lit('departureAirport'), lit('city')))
                .withColumn('timeline_departureAirport_code', take_all_level2_str(flightv1_1.timeline_leg1, lit('departureAirport'), lit('code')))
                .withColumn('timeline_departureAirport_localName', take_all_level2_str(flightv1_1.timeline_leg1, lit('departureAirport'), lit('localName')))
                .withColumn('timeline_departureAirport_longName', take_all_level2_str(flightv1_1.timeline_leg1, lit('departureAirport'), lit('longName')))
                .withColumn('timeline_departureAirport_name', take_all_level2_str(flightv1_1.timeline_leg1, lit('departureAirport'), lit('name')))
                
                .withColumn('timeline_departureTime', take_all_level2_str(flightv1_1.timeline_leg1, lit('departureTime'), lit('isoStr')))
                
                

                ## Leg 1 arrival
                .withColumn('timeline_arrivalAirport_cityState', take_all_level2_str(flightv1_1.timeline_leg1, lit('arrivalAirport'), lit('airportCityState')))
                .withColumn('timeline_arrivalAirport_city', take_all_level2_str(flightv1_1.timeline_leg1, lit('arrivalAirport'), lit('city')))
                .withColumn('timeline_arrivalAirport_code', take_all_level2_str(flightv1_1.timeline_leg1, lit('arrivalAirport'), lit('code')))
                .withColumn('timeline_arrivalAirport_localName', take_all_level2_str(flightv1_1.timeline_leg1, lit('arrivalAirport'), lit('localName')))
                .withColumn('timeline_arrivalAirport_longName', take_all_level2_str(flightv1_1.timeline_leg1, lit('arrivalAirport'), lit('longName')))
                .withColumn('timeline_arrivalAirport_name', take_all_level2_str(flightv1_1.timeline_leg1, lit('arrivalAirport'), lit('name')))                
                
                .withColumn('timeline_arrivalTime', take_all_level2_str(flightv1_1.timeline_leg1, lit('arrivalTime'), lit('isoStr')))
                
                # distance
                .withColumn('timeline_distance', take_all_level2_str(flightv1_1.timeline_leg1, lit('distance'), lit('formattedTotal')))
                
                # carrier
                .withColumn('timeline_plane', take_all_level2_str(flightv1_1.timeline_leg1, lit('carrier'), lit('plane')))
                
                # brandedFareName
                .withColumn('timeline_brandedFareName', take_all_level1_str(flightv1_1.timeline_leg1, lit('brandedFareName')))                               
                
                # type
                .withColumn('timeline_type', take_all_level1_str(flightv1_1.timeline_leg1, lit('type')))                               
                
                ### Leg 2
                ## Leg 2 departure
                .withColumn('timeline_departureAirport_cityState_leg2', take_all_level2_str(flightv1_1.timeline_leg2, lit('departureAirport'), lit('airportCityState')))
                .withColumn('timeline_departureAirport_city_leg2', take_all_level2_str(flightv1_1.timeline_leg2, lit('departureAirport'), lit('city')))
                .withColumn('timeline_departureAirport_code_leg2', take_all_level2_str(flightv1_1.timeline_leg2, lit('departureAirport'), lit('code')))
                .withColumn('timeline_departureAirport_localName_leg2', take_all_level2_str(flightv1_1.timeline_leg2, lit('departureAirport'), lit('localName')))
                .withColumn('timeline_departureAirport_longName_leg2', take_all_level2_str(flightv1_1.timeline_leg2, lit('departureAirport'), lit('longName')))
                .withColumn('timeline_departureAirport_name_leg2', take_all_level2_str(flightv1_1.timeline_leg2, lit('departureAirport'), lit('name')))
                
                .withColumn('timeline_departureTime_leg2', take_all_level2_str(flightv1_1.timeline_leg2, lit('departureTime'), lit('isoStr')))                
                

                ## Leg 2 arrival
                .withColumn('timeline_arrivalAirport_cityState_leg2', take_all_level2_str(flightv1_1.timeline_leg2, lit('arrivalAirport'), lit('airportCityState')))
                .withColumn('timeline_arrivalAirport_city_leg2', take_all_level2_str(flightv1_1.timeline_leg2, lit('arrivalAirport'), lit('city')))
                .withColumn('timeline_arrivalAirport_code_leg2', take_all_level2_str(flightv1_1.timeline_leg2, lit('arrivalAirport'), lit('code')))
                .withColumn('timeline_arrivalAirport_localName_leg2', take_all_level2_str(flightv1_1.timeline_leg2, lit('arrivalAirport'), lit('localName')))
                .withColumn('timeline_arrivalAirport_longName_leg2', take_all_level2_str(flightv1_1.timeline_leg2, lit('arrivalAirport'), lit('longName')))
                .withColumn('timeline_arrivalAirport_name_leg2', take_all_level2_str(flightv1_1.timeline_leg2, lit('arrivalAirport'), lit('name')))                
                
                .withColumn('timeline_arrivalTime_leg2', take_all_level2_str(flightv1_1.timeline_leg2, lit('arrivalTime'), lit('isoStr')))
                
                # distance
                .withColumn('timeline_distance_leg2', take_all_level2_str(flightv1_1.timeline_leg2, lit('distance'), lit('formattedTotal')))
               
                # carrier
                .withColumn('timeline_plane_leg2', take_all_level2_str(flightv1_1.timeline_leg2, lit('carrier'), lit('plane')))
                
                # brandedFareName
                .withColumn('timeline_brandedFareName_leg2', take_all_level1_str(flightv1_1.timeline_leg2, lit('brandedFareName')))                           
                
                # type
                .withColumn('timeline_type_leg2', take_all_level1_str(flightv1_1.timeline_leg2, lit('type')))                               
                

                
                .select('price', 'version', 'searchDate', 'tableName', 'task_id', 'currencyCode', 
                        'fromCity', 'toCity', 'trip', 'depDate', 'retDate',
                        'stayDays', 
                       'departureTime', 'arrivalTime', 'departureTime_leg2', 'arrivalTime_leg2',
                        'airlineName', 'airlineName_leg2', 'duration_m', 'duration_m_leg2',                
                        'flight_code', 'plane', 'stops', 'stops_leg2', 'stop_airport', 'stop_duration', 'stop_airport_leg2', 'stop_duration_leg2',
                        'noOfTicketsLeft', 'noOfTicketsLeft_leg2',
                       'airline_code', 'airline_codes', 'airline_codes_leg2', 
                        'url', 'fromCityAirportCode', 'toCityAirportCode', 'fromCityAirportCode_leg2', 'toCityAirportCode_leg2',
                       'carrierAirProviderId', 'carrierAirlineImageFileName', 'carrierMixedCabinClass', 'carrierMultiStop', 'carrierNextDayArrival',
                        'carrierAirProviderId_leg2', 'carrierAirlineImageFileName_leg2', 'carrierMixedCabinClass_leg2', 'carrierMultiStop_leg2', 'carrierNextDayArrival_leg2',

                        ## leg 1
                        # departure
                        'timeline_departureAirport_cityState', 'timeline_departureAirport_city', 'timeline_departureAirport_code', 'timeline_departureAirport_localName', 
                        'timeline_departureAirport_longName', 'timeline_departureAirport_name',
                        
                        'timeline_departureTime',

                        # arrival
                        'timeline_arrivalAirport_cityState', 'timeline_arrivalAirport_city', 'timeline_arrivalAirport_code', 'timeline_arrivalAirport_localName', 
                        'timeline_arrivalAirport_longName', 'timeline_arrivalAirport_name',
                        
                        'timeline_arrivalTime',
                        
                        'timeline_distance',
                        'timeline_plane',
                        'timeline_brandedFareName',
                        'timeline_type',
                        
                        ## leg 2                        
                        # departure
                        'timeline_departureAirport_cityState_leg2', 'timeline_departureAirport_city_leg2', 'timeline_departureAirport_code_leg2', 'timeline_departureAirport_localName_leg2', 
                        'timeline_departureAirport_longName_leg2', 'timeline_departureAirport_name_leg2',
                        
                        'timeline_departureTime_leg2',

                        # arrival
                        'timeline_arrivalAirport_cityState_leg2', 'timeline_arrivalAirport_city_leg2', 'timeline_arrivalAirport_code_leg2', 'timeline_arrivalAirport_localName_leg2', 
                        'timeline_arrivalAirport_longName_leg2', 'timeline_arrivalAirport_name_leg2',
                        
                        'timeline_arrivalTime_leg2',
                        
                        'timeline_distance_leg2',
                        'timeline_plane_leg2',
                        'timeline_brandedFareName_leg2',
                        'timeline_type_leg2'
                       )                
               )

display(flightv1_1_2.where(col('trip')=='1').show(1))
display(flightv1_1_2.where(col('trip')=='2').show(1))
display(flightv1_1_2.printSchema())

# display(flightv1_1.select("flight_leg1.stop_list.airport").show(100, truncate=False))
        
#      |         flight_leg1|flight_leg2|| |||   airline_code      ||       timeline_leg1|timeline_leg2|  |                 |||||       ||
               

# flightv1_1_2.show(3)

# temp = flightv1_1.select("flight_leg1.stop_list").show(100, truncate=False)
# flightv1_1_2.printSchema()         
# flightv1_1_2.crosstab('noOfTicketsLeft', 'noOfTicketsLeft_leg2')
# flightv1_1_2.cube('trip', flightv1_1_2.noOfTicketsLeft, flightv1_1_2.noOfTicketsLeft_leg2).count().orderBy('trip', "noOfTicketsLeft", "noOfTicketsLeft_leg2").show(truncate=False)

+------+-------+----------+------------------+-------+------------+--------+--------+----+----------+-------+--------+--------------------+--------------------+------------------+----------------+------------+----------------+----------+---------------+-----------+-----+-----+----------+------------+-------------+-----------------+------------------+---------------+--------------------+------------+-------------+------------------+--------------------+-------------------+-----------------+------------------------+----------------------+--------------------+---------------------------+----------------------+----------------+---------------------+-------------------------+--------------------------------+---------------------------+---------------------+--------------------------+-----------------------------------+------------------------------+------------------------------+-----------------------------------+----------------------------------+------------------------------+-----------

None

+-----+-------+----------+------------------+-------+------------+--------+--------+----+----------+----------+--------+--------------------+--------------------+--------------------+--------------------+-----------------+-----------------+----------+---------------+-----------+-----------+-----+----------+--------------------+-------------+--------------------+------------------+---------------+--------------------+------------+-------------+------------------+--------------------+-------------------+-----------------+------------------------+----------------------+--------------------+---------------------------+----------------------+----------------+---------------------+-------------------------+--------------------------------+---------------------------+---------------------+--------------------------+-----------------------------------+------------------------------+------------------------------+-----------------------------------+----------------------------------+-----------

None

root
 |-- price: double (nullable = true)
 |-- version: string (nullable = true)
 |-- searchDate: date (nullable = true)
 |-- tableName: string (nullable = true)
 |-- task_id: long (nullable = true)
 |-- currencyCode: string (nullable = true)
 |-- fromCity: string (nullable = true)
 |-- toCity: string (nullable = true)
 |-- trip: string (nullable = true)
 |-- depDate: date (nullable = true)
 |-- retDate: date (nullable = true)
 |-- stayDays: integer (nullable = true)
 |-- departureTime: string (nullable = true)
 |-- arrivalTime: string (nullable = true)
 |-- departureTime_leg2: string (nullable = true)
 |-- arrivalTime_leg2: string (nullable = true)
 |-- airlineName: string (nullable = true)
 |-- airlineName_leg2: string (nullable = true)
 |-- duration_m: double (nullable = true)
 |-- duration_m_leg2: double (nullable = true)
 |-- flight_code: string (nullable = true)
 |-- plane: string (nullable = true)
 |-- stops: byte (nullable = true)
 |-- stops_leg2: byte (nullable = true)
 |-- st

None

In [10]:
# flightv1_1_2.select('timeline_departureTime', 'departureTime', 'departureTime_leg2').show(2, truncate=False)


In [11]:
# flightv1_1.select('timeline_leg1').show(truncate=False)

In [12]:
# datetime.datetime.fromtimestamp(1495084500000/3600)

In [13]:
# from datetime import datetime
# dt = datetime.now()
# dt.microsecond

In [14]:
# import datetime
# print(
#     datetime.datetime.fromtimestamp(
#         int("1495084500")
#     ).strftime('%d/%b/%Y:%H:%M:%S %z')
# )


In [22]:
flightv1_1_2.dtypes




[('price', 'double'),
 ('version', 'string'),
 ('searchDate', 'date'),
 ('tableName', 'string'),
 ('task_id', 'bigint'),
 ('currencyCode', 'string'),
 ('fromCity', 'string'),
 ('toCity', 'string'),
 ('trip', 'string'),
 ('depDate', 'date'),
 ('retDate', 'date'),
 ('stayDays', 'int'),
 ('departureTime', 'string'),
 ('arrivalTime', 'string'),
 ('departureTime_leg2', 'string'),
 ('arrivalTime_leg2', 'string'),
 ('airlineName', 'string'),
 ('airlineName_leg2', 'string'),
 ('duration_m', 'double'),
 ('duration_m_leg2', 'double'),
 ('flight_code', 'string'),
 ('plane', 'string'),
 ('stops', 'tinyint'),
 ('stops_leg2', 'tinyint'),
 ('stop_list', 'array<struct<airport:string,duration:string>>'),
 ('stop_duration', 'array<string>'),
 ('stop_list_leg2', 'array<struct<airport:string,duration:string>>'),
 ('noOfTicketsLeft', 'tinyint'),
 ('noOfTicketsLeft_leg2', 'tinyint'),
 ('airline_code', 'string'),
 ('airline_codes', 'array<string>'),
 ('airline_codes_leg2', 'array<string>'),
 ('url', 'string'

In [43]:
# flightv1_1_2.sample(False, 0.001, 42).toPandas()
flightv1_1_2.where(col('trip')=='1').limit(10).toPandas()

Unnamed: 0,price,version,searchDate,tableName,task_id,currencyCode,fromCity,toCity,trip,depDate,retDate,stayDays,departureTime,arrivalTime,departureTime_leg2,arrivalTime_leg2,airlineName,airlineName_leg2,duration_m,duration_m_leg2,flight_code,plane,stops,stops_leg2,stop_airport,stop_duration,stop_airport_leg2,stop_duration_leg2,noOfTicketsLeft,noOfTicketsLeft_leg2,airline_code,airline_codes,airline_codes_leg2,url,fromCityAirportCode,toCityAirportCode,fromCityAirportCode_leg2,toCityAirportCode_leg2,carrierAirProviderId,carrierAirlineImageFileName,carrierMixedCabinClass,carrierMultiStop,carrierNextDayArrival,carrierAirProviderId_leg2,carrierAirlineImageFileName_leg2,carrierMixedCabinClass_leg2,carrierMultiStop_leg2,carrierNextDayArrival_leg2,timeline_departureAirport_cityState,timeline_departureAirport_city,timeline_departureAirport_code,timeline_departureAirport_localName,timeline_departureAirport_longName,timeline_departureAirport_name,timeline_departureTime,timeline_arrivalAirport_cityState,timeline_arrivalAirport_city,timeline_arrivalAirport_code,timeline_arrivalAirport_localName,timeline_arrivalAirport_longName,timeline_arrivalAirport_name,timeline_arrivalTime,timeline_distance,timeline_plane,timeline_brandedFareName,timeline_type,timeline_departureAirport_cityState_leg2,timeline_departureAirport_city_leg2,timeline_departureAirport_code_leg2,timeline_departureAirport_localName_leg2,timeline_departureAirport_longName_leg2,timeline_departureAirport_name_leg2,timeline_departureTime_leg2,timeline_arrivalAirport_cityState_leg2,timeline_arrivalAirport_city_leg2,timeline_arrivalAirport_code_leg2,timeline_arrivalAirport_localName_leg2,timeline_arrivalAirport_longName_leg2,timeline_arrivalAirport_name_leg2,timeline_arrivalTime_leg2,timeline_distance_leg2,timeline_plane_leg2,timeline_brandedFareName_leg2,timeline_type_leg2
0,192.91,1.1,2017-05-11,flight_15_13_price,17016,AUD,Bangkok,Hangzhou,1,2017-10-22,,,2017-10-22T07:10:00.000+07:00,2017-10-22T11:35:00.000+08:00,,,Thai AirAsia,,265.0,,FD566,,0,,[],[],,,99,,FD,[FD],,https://www.expedia.com.au/Flights-Search?mode...,DMK,HGH,,,75,FD.gif,False,False,False,,,,,,"[Bangkok, Thailand]",[Bangkok],[DMK],[Don Mueang Intl.],"[Bangkok, Thailand (DMK-Don Mueang Intl.)]",[Bangkok (DMK)],[2017-10-22T07:10:00.000+07:00],"[Hangzhou, China]",[Hangzhou],[HGH],[Xiaoshan Intl.],"[Hangzhou, China (HGH-Xiaoshan Intl.)]",[Hangzhou (HGH)],[2017-10-22T11:35:00.000+08:00],[0],[],[],[Segment],,,,,,,,,,,,,,,,,,
1,214.05,1.1,2017-05-11,flight_15_13_price,17016,AUD,Bangkok,Hangzhou,1,2017-10-22,,,2017-10-22T20:20:00.000+07:00,2017-10-23T13:10:00.000+08:00,,,,,1010.0,,AK889,,1,,"[Kuala Lumpur, Malaysia (KUL-Kuala Lumpur Intl.)]",[8h:40m],,,99,,AK,"[AK, D7]",,https://www.expedia.com.au/Flights-Search?mode...,DMK,HGH,,,75,,False,True,True,,,,,,"[Bangkok, Thailand, Kuala Lumpur, Malaysia]","[Bangkok, Kuala Lumpur]","[DMK, KUL]","[Don Mueang Intl., Kuala Lumpur Intl.]","[Bangkok, Thailand (DMK-Don Mueang Intl.), Kua...","[Bangkok (DMK), Kuala Lumpur (Kuala Lumpur Int...","[2017-10-22T20:20:00.000+07:00, 2017-10-23T08:...","[Kuala Lumpur, Malaysia, Hangzhou, China]","[Kuala Lumpur, Hangzhou]","[KUL, HGH]","[Kuala Lumpur Intl., Xiaoshan Intl.]","[Kuala Lumpur, Malaysia (KUL-Kuala Lumpur Intl...","[Kuala Lumpur (Kuala Lumpur Intl.), Hangzhou (...","[2017-10-22T23:30:00.000+08:00, 2017-10-23T13:...","[0, 0]","[, ]","[, ]","[Segment, Segment]",,,,,,,,,,,,,,,,,,
2,214.05,1.1,2017-05-11,flight_15_13_price,17016,AUD,Bangkok,Hangzhou,1,2017-10-22,,,2017-10-22T17:10:00.000+07:00,2017-10-23T13:10:00.000+08:00,,,,,1200.0,,FD315,,1,,"[Kuala Lumpur, Malaysia (KUL-Kuala Lumpur Intl.)]",[12h:0m],,,99,,FD,"[FD, D7]",,https://www.expedia.com.au/Flights-Search?mode...,DMK,HGH,,,75,,False,True,True,,,,,,"[Bangkok, Thailand, Kuala Lumpur, Malaysia]","[Bangkok, Kuala Lumpur]","[DMK, KUL]","[Don Mueang Intl., Kuala Lumpur Intl.]","[Bangkok, Thailand (DMK-Don Mueang Intl.), Kua...","[Bangkok (DMK), Kuala Lumpur (Kuala Lumpur Int...","[2017-10-22T17:10:00.000+07:00, 2017-10-23T08:...","[Kuala Lumpur, Malaysia, Hangzhou, China]","[Kuala Lumpur, Hangzhou]","[KUL, HGH]","[Kuala Lumpur Intl., Xiaoshan Intl.]","[Kuala Lumpur, Malaysia (KUL-Kuala Lumpur Intl...","[Kuala Lumpur (Kuala Lumpur Intl.), Hangzhou (...","[2017-10-22T20:10:00.000+08:00, 2017-10-23T13:...","[0, 0]","[, ]","[, ]","[Segment, Segment]",,,,,,,,,,,,,,,,,,
3,214.05,1.1,2017-05-11,flight_15_13_price,17016,AUD,Bangkok,Hangzhou,1,2017-10-22,,,2017-10-22T15:15:00.000+07:00,2017-10-23T13:10:00.000+08:00,,,,,1315.0,,AK885,,1,,"[Kuala Lumpur, Malaysia (KUL-Kuala Lumpur Intl.)]",[13h:40m],,,99,,AK,"[AK, D7]",,https://www.expedia.com.au/Flights-Search?mode...,DMK,HGH,,,75,,False,True,True,,,,,,"[Bangkok, Thailand, Kuala Lumpur, Malaysia]","[Bangkok, Kuala Lumpur]","[DMK, KUL]","[Don Mueang Intl., Kuala Lumpur Intl.]","[Bangkok, Thailand (DMK-Don Mueang Intl.), Kua...","[Bangkok (DMK), Kuala Lumpur (Kuala Lumpur Int...","[2017-10-22T15:15:00.000+07:00, 2017-10-23T08:...","[Kuala Lumpur, Malaysia, Hangzhou, China]","[Kuala Lumpur, Hangzhou]","[KUL, HGH]","[Kuala Lumpur Intl., Xiaoshan Intl.]","[Kuala Lumpur, Malaysia (KUL-Kuala Lumpur Intl...","[Kuala Lumpur (Kuala Lumpur Intl.), Hangzhou (...","[2017-10-22T18:30:00.000+08:00, 2017-10-23T13:...","[0, 0]","[, ]","[, ]","[Segment, Segment]",,,,,,,,,,,,,,,,,,
4,214.05,1.1,2017-05-11,flight_15_13_price,17016,AUD,Bangkok,Hangzhou,1,2017-10-22,,,2017-10-22T13:10:00.000+07:00,2017-10-23T13:10:00.000+08:00,,,,,1440.0,,AK883,,1,,"[Kuala Lumpur, Malaysia (KUL-Kuala Lumpur Intl.)]",[15h:50m],,,99,,AK,"[AK, D7]",,https://www.expedia.com.au/Flights-Search?mode...,DMK,HGH,,,75,,False,True,True,,,,,,"[Bangkok, Thailand, Kuala Lumpur, Malaysia]","[Bangkok, Kuala Lumpur]","[DMK, KUL]","[Don Mueang Intl., Kuala Lumpur Intl.]","[Bangkok, Thailand (DMK-Don Mueang Intl.), Kua...","[Bangkok (DMK), Kuala Lumpur (Kuala Lumpur Int...","[2017-10-22T13:10:00.000+07:00, 2017-10-23T08:...","[Kuala Lumpur, Malaysia, Hangzhou, China]","[Kuala Lumpur, Hangzhou]","[KUL, HGH]","[Kuala Lumpur Intl., Xiaoshan Intl.]","[Kuala Lumpur, Malaysia (KUL-Kuala Lumpur Intl...","[Kuala Lumpur (Kuala Lumpur Intl.), Hangzhou (...","[2017-10-22T16:20:00.000+08:00, 2017-10-23T13:...","[0, 0]","[, ]","[, ]","[Segment, Segment]",,,,,,,,,,,,,,,,,,
5,214.05,1.1,2017-05-11,flight_15_13_price,17016,AUD,Bangkok,Hangzhou,1,2017-10-22,,,2017-10-22T12:50:00.000+07:00,2017-10-23T13:10:00.000+08:00,,,,,1460.0,,FD319,,1,,"[Kuala Lumpur, Malaysia (KUL-Kuala Lumpur Intl.)]",[16h:10m],,,99,,FD,"[FD, D7]",,https://www.expedia.com.au/Flights-Search?mode...,DMK,HGH,,,75,,False,True,True,,,,,,"[Bangkok, Thailand, Kuala Lumpur, Malaysia]","[Bangkok, Kuala Lumpur]","[DMK, KUL]","[Don Mueang Intl., Kuala Lumpur Intl.]","[Bangkok, Thailand (DMK-Don Mueang Intl.), Kua...","[Bangkok (DMK), Kuala Lumpur (Kuala Lumpur Int...","[2017-10-22T12:50:00.000+07:00, 2017-10-23T08:...","[Kuala Lumpur, Malaysia, Hangzhou, China]","[Kuala Lumpur, Hangzhou]","[KUL, HGH]","[Kuala Lumpur Intl., Xiaoshan Intl.]","[Kuala Lumpur, Malaysia (KUL-Kuala Lumpur Intl...","[Kuala Lumpur (Kuala Lumpur Intl.), Hangzhou (...","[2017-10-22T16:00:00.000+08:00, 2017-10-23T13:...","[0, 0]","[, ]","[, ]","[Segment, Segment]",,,,,,,,,,,,,,,,,,
6,214.05,1.1,2017-05-11,flight_15_13_price,17016,AUD,Bangkok,Hangzhou,1,2017-10-22,,,2017-10-22T12:00:00.000+07:00,2017-10-23T13:10:00.000+08:00,,,,,1510.0,,FD313,,1,,"[Kuala Lumpur, Malaysia (KUL-Kuala Lumpur Intl.)]",[17h:0m],,,99,,FD,"[FD, D7]",,https://www.expedia.com.au/Flights-Search?mode...,DMK,HGH,,,75,,False,True,True,,,,,,"[Bangkok, Thailand, Kuala Lumpur, Malaysia]","[Bangkok, Kuala Lumpur]","[DMK, KUL]","[Don Mueang Intl., Kuala Lumpur Intl.]","[Bangkok, Thailand (DMK-Don Mueang Intl.), Kua...","[Bangkok (DMK), Kuala Lumpur (Kuala Lumpur Int...","[2017-10-22T12:00:00.000+07:00, 2017-10-23T08:...","[Kuala Lumpur, Malaysia, Hangzhou, China]","[Kuala Lumpur, Hangzhou]","[KUL, HGH]","[Kuala Lumpur Intl., Xiaoshan Intl.]","[Kuala Lumpur, Malaysia (KUL-Kuala Lumpur Intl...","[Kuala Lumpur (Kuala Lumpur Intl.), Hangzhou (...","[2017-10-22T15:10:00.000+08:00, 2017-10-23T13:...","[0, 0]","[, ]","[, ]","[Segment, Segment]",,,,,,,,,,,,,,,,,,
7,226.84,1.1,2017-05-11,flight_15_13_price,17016,AUD,Bangkok,Hangzhou,1,2017-10-22,,,2017-10-22T17:15:00.000+07:00,2017-10-22T22:15:00.000+08:00,,,Thai AirAsia,,300.0,,FD568,,0,,[],[],,,99,,FD,[FD],,https://www.expedia.com.au/Flights-Search?mode...,DMK,HGH,,,75,FD.gif,False,False,False,,,,,,"[Bangkok, Thailand]",[Bangkok],[DMK],[Don Mueang Intl.],"[Bangkok, Thailand (DMK-Don Mueang Intl.)]",[Bangkok (DMK)],[2017-10-22T17:15:00.000+07:00],"[Hangzhou, China]",[Hangzhou],[HGH],[Xiaoshan Intl.],"[Hangzhou, China (HGH-Xiaoshan Intl.)]",[Hangzhou (HGH)],[2017-10-22T22:15:00.000+08:00],[0],[],[],[Segment],,,,,,,,,,,,,,,,,,
8,232.6,1.1,2017-05-11,flight_15_13_price,17016,AUD,Bangkok,Hangzhou,1,2017-10-22,,,2017-10-22T18:35:00.000+07:00,2017-10-23T13:10:00.000+08:00,,,,,1115.0,,AK893,,1,,"[Kuala Lumpur, Malaysia (KUL-Kuala Lumpur Intl.)]",[10h:20m],,,99,,AK,"[AK, D7]",,https://www.expedia.com.au/Flights-Search?mode...,DMK,HGH,,,75,,False,True,True,,,,,,"[Bangkok, Thailand, Kuala Lumpur, Malaysia]","[Bangkok, Kuala Lumpur]","[DMK, KUL]","[Don Mueang Intl., Kuala Lumpur Intl.]","[Bangkok, Thailand (DMK-Don Mueang Intl.), Kua...","[Bangkok (DMK), Kuala Lumpur (Kuala Lumpur Int...","[2017-10-22T18:35:00.000+07:00, 2017-10-23T08:...","[Kuala Lumpur, Malaysia, Hangzhou, China]","[Kuala Lumpur, Hangzhou]","[KUL, HGH]","[Kuala Lumpur Intl., Xiaoshan Intl.]","[Kuala Lumpur, Malaysia (KUL-Kuala Lumpur Intl...","[Kuala Lumpur (Kuala Lumpur Intl.), Hangzhou (...","[2017-10-22T21:50:00.000+08:00, 2017-10-23T13:...","[0, 0]","[, ]","[, ]","[Segment, Segment]",,,,,,,,,,,,,,,,,,
9,232.6,1.1,2017-05-11,flight_15_13_price,17016,AUD,Bangkok,Hangzhou,1,2017-10-22,,,2017-10-22T16:00:00.000+07:00,2017-10-23T13:10:00.000+08:00,,,,,1270.0,,AK887,,1,,"[Kuala Lumpur, Malaysia (KUL-Kuala Lumpur Intl.)]",[12h:55m],,,99,,AK,"[AK, D7]",,https://www.expedia.com.au/Flights-Search?mode...,DMK,HGH,,,75,,False,True,True,,,,,,"[Bangkok, Thailand, Kuala Lumpur, Malaysia]","[Bangkok, Kuala Lumpur]","[DMK, KUL]","[Don Mueang Intl., Kuala Lumpur Intl.]","[Bangkok, Thailand (DMK-Don Mueang Intl.), Kua...","[Bangkok (DMK), Kuala Lumpur (Kuala Lumpur Int...","[2017-10-22T16:00:00.000+07:00, 2017-10-23T08:...","[Kuala Lumpur, Malaysia, Hangzhou, China]","[Kuala Lumpur, Hangzhou]","[KUL, HGH]","[Kuala Lumpur Intl., Xiaoshan Intl.]","[Kuala Lumpur, Malaysia (KUL-Kuala Lumpur Intl...","[Kuala Lumpur (Kuala Lumpur Intl.), Hangzhou (...","[2017-10-22T19:15:00.000+08:00, 2017-10-23T13:...","[0, 0]","[, ]","[, ]","[Segment, Segment]",,,,,,,,,,,,,,,,,,
