# Differences in concepts between Polars and pandas
1. Polars does not have a multi-index/index
2. Polars exploits the strong support for concurrency in **Rust** to run many operations in parallel. While some operations in pandas are multi-threaded but the core of the library is single-threaded.
3. Polars supports eager evaluation and lazy evaluation whereas pandas only supports eager evaluation. 

In [194]:
import pandas as pd
import polars as pl

In [195]:
# Example of creating dataframe in pandas
pandas_df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
pandas_df = pandas_df[pandas_df["a"] > 1]  # Filter rows where 'a' > 1
pandas_df["c"] = pandas_df["b"] * 2  # Add a new column
print(pandas_df)


   a  b   c
1  2  5  10
2  3  6  12


In [196]:
# Example of creating dataframe in Polars
polars_df = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
lazy_df = polars_df.lazy()  # Convert to a lazy DataFrame
result = lazy_df.filter(pl.col("a") > 1).with_columns((pl.col("b") * 2).alias("c"))
print(result.collect())  # Triggers execution and optimization

shape: (2, 3)
┌─────┬─────┬─────┐
│ a   ┆ b   ┆ c   │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ i64 │
╞═════╪═════╪═════╡
│ 2   ┆ 5   ┆ 10  │
│ 3   ┆ 6   ┆ 12  │
└─────┴─────┴─────┘


# Key syntax differences
If your Polars code looks like it could be pandas code, it might run, but it likely runs slower than it should.

In [197]:
#This is an example to contrast pandas operation with polars operations below
import pandas as pd

# 1. Create DataFrame
pandas_df1 = pd.DataFrame({
    "col1": [1, 2, 3, 4],
    "col2": [5, 6, 7, 8],
    "col3": ["A", "B", "A", "B"]
})

# 2. Select a column
selected_col = pandas_df1["col1"]
print("Selected Column:\n", selected_col)

# 3. Add a new column
pandas_df1["new_col"] = pandas_df1["col1"] * 2
print("After Adding Column:\n", pandas_df1)

# 4. Filter rows
filtered_df = pandas_df1[pandas_df1["col1"] > 2]
print("Filtered DataFrame:\n", filtered_df)

# 5. Apply a function
pandas_df1["squared"] = pandas_df1["col1"].apply(lambda x: x ** 2)
print("After Applying Function:\n", pandas_df1)

# 6. Group and aggregate
grouped_df = pandas_df1.groupby("col3").agg({"col2": "sum"}).rename(columns={"col2": "sum_col2"})
print("Grouped and Aggregated DataFrame:\n", grouped_df)

# 7. Sort by a column
sorted_df = pandas_df1.sort_values("col1", ascending=False)
print("Sorted DataFrame:\n", sorted_df)

# 8. Pivot table
pivot_df = pandas_df1.pivot_table(values="col2", index="col1", columns="col3", aggfunc="first")
print("Pivot Table:\n", pivot_df)

# 9. Export to CSV
pandas_df1.to_csv("pandas_output.csv", index=False)
print("Pandas DataFrame exported to CSV.")

Selected Column:
 0    1
1    2
2    3
3    4
Name: col1, dtype: int64
After Adding Column:
    col1  col2 col3  new_col
0     1     5    A        2
1     2     6    B        4
2     3     7    A        6
3     4     8    B        8
Filtered DataFrame:
    col1  col2 col3  new_col
2     3     7    A        6
3     4     8    B        8
After Applying Function:
    col1  col2 col3  new_col  squared
0     1     5    A        2        1
1     2     6    B        4        4
2     3     7    A        6        9
3     4     8    B        8       16
Grouped and Aggregated DataFrame:
       sum_col2
col3          
A           12
B           14
Sorted DataFrame:
    col1  col2 col3  new_col  squared
3     4     8    B        8       16
2     3     7    A        6        9
1     2     6    B        4        4
0     1     5    A        2        1
Pivot Table:
 col3    A    B
col1          
1     5.0  NaN
2     NaN  6.0
3     7.0  NaN
4     NaN  8.0
Pandas DataFrame exported to CSV.


In [198]:
# Mimicking the pandas operations above in polars
import polars as pl

# 1. Create DataFrame
polars_df2 = pl.DataFrame({
    "col1": [1, 2, 3, 4],
    "col2": [5, 6, 7, 8],
    "col3": ["A", "B", "A", "B"]
})

# 2. Select a column
selected_col = polars_df2.select("col1")
print("Selected Column:\n", selected_col)

# 3. Add a new column
polars_df2 = polars_df2.with_columns((polars_df2["col1"] * 2).alias("new_col"))
print("After Adding Column:\n", polars_df2)

# 4. Filter rows
filtered_df = polars_df2.filter(pl.col("col1") > 2)
print("Filtered DataFrame:\n", filtered_df)

# 5. Apply a function
polars_df2 = polars_df2.with_columns(
    (pl.col("col1") ** 2).alias("squared")
)
print("After Applying Function:\n", polars_df2)

# 6. Group and aggregate
grouped_df = polars_df2.group_by("col3").agg(pl.sum("col2").alias("sum_col2"))
print("Grouped and Aggregated DataFrame:\n", grouped_df)

# 7. Sort by a column
sorted_df = polars_df2.sort("col1", descending=True)
print("Sorted DataFrame:\n", sorted_df)

# 8. Pivot table
pivot_df = polars_df2.pivot(values="col2", index="col1", on="col3")
print("Pivot Table:\n", pivot_df)

# 9. Export to CSV
polars_df2.write_csv("polars_output.csv")
print("Polars DataFrame exported to CSV.")


Selected Column:
 shape: (4, 1)
┌──────┐
│ col1 │
│ ---  │
│ i64  │
╞══════╡
│ 1    │
│ 2    │
│ 3    │
│ 4    │
└──────┘
After Adding Column:
 shape: (4, 4)
┌──────┬──────┬──────┬─────────┐
│ col1 ┆ col2 ┆ col3 ┆ new_col │
│ ---  ┆ ---  ┆ ---  ┆ ---     │
│ i64  ┆ i64  ┆ str  ┆ i64     │
╞══════╪══════╪══════╪═════════╡
│ 1    ┆ 5    ┆ A    ┆ 2       │
│ 2    ┆ 6    ┆ B    ┆ 4       │
│ 3    ┆ 7    ┆ A    ┆ 6       │
│ 4    ┆ 8    ┆ B    ┆ 8       │
└──────┴──────┴──────┴─────────┘
Filtered DataFrame:
 shape: (2, 4)
┌──────┬──────┬──────┬─────────┐
│ col1 ┆ col2 ┆ col3 ┆ new_col │
│ ---  ┆ ---  ┆ ---  ┆ ---     │
│ i64  ┆ i64  ┆ str  ┆ i64     │
╞══════╪══════╪══════╪═════════╡
│ 3    ┆ 7    ┆ A    ┆ 6       │
│ 4    ┆ 8    ┆ B    ┆ 8       │
└──────┴──────┴──────┴─────────┘
After Applying Function:
 shape: (4, 5)
┌──────┬──────┬──────┬─────────┬─────────┐
│ col1 ┆ col2 ┆ col3 ┆ new_col ┆ squared │
│ ---  ┆ ---  ┆ ---  ┆ ---     ┆ ---     │
│ i64  ┆ i64  ┆ str  ┆ i64     ┆ i64     │
╞

## Loading a massive dataset in polars

In [199]:
df = pl.read_csv("flight data.csv")

In [200]:
df.head()

from_airport_code,from_country,dest_airport_code,dest_country,aircraft_type,airline_number,airline_name,flight_number,departure_time,arrival_time,duration,stops,price,currency,co2_emissions,avg_co2_emission_for_this_route,co2_percentage,scan_date
str,str,str,str,str,str,str,str,str,str,i64,i64,f64,str,i64,str,str,str
"""ALG""","""Algeria""","""AEP""","""Argentina""","""Airbus A318|Canadair RJ 1000|A…","""multi""","""[Air France| Iberia| LATAM]""","""AF1491|AF1491|AF1491|AF1491""","""2022-04-30 14:30:00""","""2022-05-01 10:15:00""",1425,3,1279.0,"""USD""",1320000,"""1320000""","""0%""","""2022-04-29 17:52:59"""
"""ALG""","""Algeria""","""AEP""","""Argentina""","""Airbus A318|Canadair RJ 1000|B…","""multi""","""[Air France| Iberia| LATAM]""","""AF1491|AF1491|AF1491|AF1491""","""2022-04-30 14:30:00""","""2022-05-01 10:15:00""",1425,3,1279.0,"""USD""",1195000,"""1320000""","""-9%""","""2022-04-29 17:52:59"""
"""ALG""","""Algeria""","""AEP""","""Argentina""","""Airbus A320|Airbus A321|Boeing…","""multi""","""[Air France| LATAM]""","""AF1855|AF1855|AF1855|AF1855""","""2022-04-30 12:45:00""","""2022-05-01 10:15:00""",1530,3,1284.0,"""USD""",1248000,"""1320000""","""-5%""","""2022-04-29 17:52:59"""
"""ALG""","""Algeria""","""AEP""","""Argentina""","""Airbus A318|Airbus A320|Boeing…","""multi""","""[Air France| LATAM]""","""AF1491|AF1491|AF1491|AF1491""","""2022-04-30 14:30:00""","""2022-05-01 10:15:00""",1425,3,1290.0,"""USD""",1347000,"""1320000""","""2%""","""2022-04-29 17:52:59"""
"""ALG""","""Algeria""","""AEP""","""Argentina""","""Airbus A321neo|Boeing 777|Airb…","""multi""","""[Lufthansa| LATAM]""","""LH1317|LH1317|LH1317""","""2022-04-30 12:35:00""","""2022-05-01 10:15:00""",1540,2,1347.0,"""USD""",1381000,"""1320000""","""4%""","""2022-04-29 17:52:59"""


In [201]:
print(df.shape)

(998866, 18)


#### Finding the number of arrivals by country 

In [202]:
# Finding the number of arrivals by country 
df.group_by("dest_country").len(name="number_of_arrivals")
# (df.group_by("dest_country").len(name="number_of_arrivals")).filter(pl.col('dest_country')=='Belgium')

dest_country,number_of_arrivals
str,u32
"""Vietnam""",9265
"""South Korea""",12697
"""Thailand""",15575
"""Greece""",14625
"""Malaysia""",11871
…,…
"""Sweden""",15621
"""Austria""",16193
"""Mexico""",17656
"""China""",25723


#### what was the lowest price Algeria to Argentina?

In [203]:
df.filter(
    (pl.col("from_country") == "Algeria") & 
    (pl.col("dest_country") == "Argentina")
).select(pl.col("price")).min()

price
f64
1124.0


In [204]:
df.filter(
    (pl.col("from_country") == "Algeria") & 
    (pl.col("dest_country") == "Argentina")
).sort("price").head(1)

from_airport_code,from_country,dest_airport_code,dest_country,aircraft_type,airline_number,airline_name,flight_number,departure_time,arrival_time,duration,stops,price,currency,co2_emissions,avg_co2_emission_for_this_route,co2_percentage,scan_date
str,str,str,str,str,str,str,str,str,str,i64,i64,f64,str,i64,str,str,str
"""ALG""","""Algeria""","""AEP""","""Argentina""","""Canadair RJ 1000|Airbus A330|A…","""multi""","""[Iberia| LATAM]""","""IB8805|IB8805|IB8805""","""2022-05-02 13:10:00""","""2022-05-04 10:15:00""",2945,2,1124.0,"""USD""",1185000,"""1320000""","""-10%""","""2022-04-29 17:52:59"""


#### Lowest CO2 Emission Routes

In [205]:
df.group_by(["from_country", "dest_country"]).agg([
      pl.col("co2_emissions").mean().alias("avg_route_emissions"),
      pl.col("price").mean().alias("avg_route_price")
  ]).sort("avg_route_emissions", descending=False).head(5)

from_country,dest_country,avg_route_emissions,avg_route_price
str,str,f64,f64
"""Germany""","""Germany""",178586.700337,364.449263
"""Australia""","""Australia""",182579.146919,158.570702
"""Dublin""","""Netherlands""",207067.336683,292.993976
"""France""","""Zurich""",208685.936152,358.455488
"""France""","""Germany""",210113.832853,351.054405


#### Airline Performance Analysis

In [206]:
df.group_by(["airline_name", "from_country", "dest_country"]).agg([
      pl.len().alias("total_flights"),
      pl.col("price").mean().alias("avg_price"),
      pl.col("duration").mean().alias("avg_duration"),
      pl.col("co2_emissions").mean().alias("avg_emissions")
  ]).sort("total_flights", descending=True).head(10)

airline_name,from_country,dest_country,total_flights,avg_price,avg_duration,avg_emissions
str,str,str,u32,f64,f64,f64
"""[China Southern]""","""China""","""China""",6537,552.298914,716.713324,277979.807394
"""[Qantas| American]""","""Australia""","""United States""",5996,1564.790027,1708.125083,1717000.0
"""[American]""","""Canada""","""United States""",4702,352.552318,687.07231,358677.158656
"""[Lufthansa| United]""","""Germany""","""United States""",4488,1518.45254,1100.019831,893181.311329
"""[Azul| United]""","""Brazil""","""United States""",4278,4690.743104,1928.774194,1710600.0
"""[China Eastern]""","""China""","""China""",3133,549.357485,688.249282,283033.429765
"""[United]""","""Australia""","""United States""",3081,1264.636157,1617.304771,1533000.0
"""[American]""","""Columbia""","""United States""",2897,773.778046,1062.801519,621938.902313
"""[ANA| United]""","""China""","""United States""",2806,4139.376336,1949.121882,2000100.0
"""[United]""","""Canada""","""United States""",2530,359.642688,695.298024,387981.027668


#### CO2 Emissions by Aircraft Type

In [207]:
df.group_by("aircraft_type").agg([
      pl.col("co2_emissions").mean().alias("avg_emissions"),
      pl.col("co2_emissions").min().alias("min_emissions"),
      pl.col("co2_emissions").max().alias("max_emissions")
  ])

aircraft_type,avg_emissions,min_emissions,max_emissions
str,f64,i64,i64
"""Boeing 747|Boeing 787|Airbus A…",3.5116e6,1359000,6346000
"""Airbus A220-300 Passenger|Embr…",604000.0,604000,604000
"""Airbus A340|Canadair Reg. Jet""",957000.0,957000,957000
"""Airbus A319|De Havilland-Bomba…",248000.0,248000,248000
"""Boeing 737|Airbus A350|Airbus …",1.5033e6,1456000,1578000
…,…,…,…
"""Airbus A318|Airbus A321|Airbus…",412583.333333,288000,605000
"""Airbus A320|Boeing 767|Boeing …",1.09e6,1090000,1090000
"""Boeing 747|Canadair RJ 900|Emb…",2.6615e6,1191000,4132000
"""Airbus A330|Airbus A380|Airbus…",1.93e6,1875000,1985000


#### Price and Emissions Correlation

In [208]:
df.select([
    pl.corr("price", "co2_emissions").alias("price_emissions_correlation")
]) #signifies the strength and direction of the linear relationship between the variables "price" and "co2_emissions" in the dataset.

price_emissions_correlation
f64
0.724647


#### Most Frequent Routes with Stops

In [209]:
df.filter(pl.col("stops") > 0).group_by(["from_country", "dest_country", "stops"]).agg([
      pl.len().alias("total_flights"),
      pl.col("price").mean().alias("avg_price")
  ]).sort("total_flights", descending=True).head(5)

from_country,dest_country,stops,total_flights,avg_price
str,str,i64,u32,f64
"""China""","""China""",1,15190,639.148379
"""China""","""United States""",2,15048,5714.2811
"""Brazil""","""United States""",2,13934,2630.964475
"""Australia""","""United States""",2,13180,1519.114264
"""Germany""","""United States""",1,11438,1630.789561


#### Lazy Evaluation with Predicate Pushdown

In [210]:
# Doesn't load entire dataset into memory
result = (pl.scan_csv("flight data.csv").filter(pl.col("price") > 500).group_by("airline_name").agg(pl.col("price").mean()).sort("price").head(5))
result.collect()

airline_name,price
str,f64
"""[Air France| Air Algerie]""",501.0
"""[Norwegian| Lufthansa| Condor]""",501.0
"""[Volotea| Wizz Air| Tap Air Po…",501.0
"""[Finnair| French bee]""",502.0
"""[Aeromexico| Aeromar| Volaris]""",502.0


# Speed Test

### Speed using pandas Library

In [211]:
%%timeit
df_pd = pd.read_csv("flight data.csv")
df_pd.groupby("airline_name").agg(
    {"duration":["min","mean","max"], "price":["min","mean","max"]}
)

3.71 s ± 128 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### Speed test using polars Library

In [212]:
%%timeit
aggs = (
    pl.read_csv("flight data.csv")
    .group_by("airline_name")
    .agg(
        [
            pl.col("duration").min().alias("duration_min"),
            pl.col("duration").mean().alias("duration_mean"),
            pl.col("duration").max().alias("duration_max"),
            pl.col("price").min().alias("price_min"),
            pl.col("price").mean().alias("price_mean"),
            pl.col("price").max().alias("price_max"),
        ]
    )
)

517 ms ± 18.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


#### Unique API

In [213]:
# Unique API call in pandas style syntax on polars dataframe
df['from_airport_code'].unique

<bound method Series.unique of shape: (998_866,)
Series: 'from_airport_code' [str]
[
	"ALG"
	"ALG"
	"ALG"
	"ALG"
	"ALG"
	…
	"BOM"
	"BOM"
	"BOM"
	"BOM"
	"BOM"
]>

In [214]:
# Unique API call in polars style
df.select(pl.col('from_airport_code').unique())

from_airport_code
str
"""MEL"""
"""CGO"""
"""PEK"""
"""YYZ"""
"""ATH"""
…
"""PVG"""
"""BOM"""
"""VIE"""
"""CTU"""


#### Selecting columns 

In [215]:
df.select(pl.col("*"))

from_airport_code,from_country,dest_airport_code,dest_country,aircraft_type,airline_number,airline_name,flight_number,departure_time,arrival_time,duration,stops,price,currency,co2_emissions,avg_co2_emission_for_this_route,co2_percentage,scan_date
str,str,str,str,str,str,str,str,str,str,i64,i64,f64,str,i64,str,str,str
"""ALG""","""Algeria""","""AEP""","""Argentina""","""Airbus A318|Canadair RJ 1000|A…","""multi""","""[Air France| Iberia| LATAM]""","""AF1491|AF1491|AF1491|AF1491""","""2022-04-30 14:30:00""","""2022-05-01 10:15:00""",1425,3,1279.0,"""USD""",1320000,"""1320000""","""0%""","""2022-04-29 17:52:59"""
"""ALG""","""Algeria""","""AEP""","""Argentina""","""Airbus A318|Canadair RJ 1000|B…","""multi""","""[Air France| Iberia| LATAM]""","""AF1491|AF1491|AF1491|AF1491""","""2022-04-30 14:30:00""","""2022-05-01 10:15:00""",1425,3,1279.0,"""USD""",1195000,"""1320000""","""-9%""","""2022-04-29 17:52:59"""
"""ALG""","""Algeria""","""AEP""","""Argentina""","""Airbus A320|Airbus A321|Boeing…","""multi""","""[Air France| LATAM]""","""AF1855|AF1855|AF1855|AF1855""","""2022-04-30 12:45:00""","""2022-05-01 10:15:00""",1530,3,1284.0,"""USD""",1248000,"""1320000""","""-5%""","""2022-04-29 17:52:59"""
"""ALG""","""Algeria""","""AEP""","""Argentina""","""Airbus A318|Airbus A320|Boeing…","""multi""","""[Air France| LATAM]""","""AF1491|AF1491|AF1491|AF1491""","""2022-04-30 14:30:00""","""2022-05-01 10:15:00""",1425,3,1290.0,"""USD""",1347000,"""1320000""","""2%""","""2022-04-29 17:52:59"""
"""ALG""","""Algeria""","""AEP""","""Argentina""","""Airbus A321neo|Boeing 777|Airb…","""multi""","""[Lufthansa| LATAM]""","""LH1317|LH1317|LH1317""","""2022-04-30 12:35:00""","""2022-05-01 10:15:00""",1540,2,1347.0,"""USD""",1381000,"""1320000""","""4%""","""2022-04-29 17:52:59"""
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""BOM""","""India""","""SYD""","""Australia""","""Boeing 777|Airbus A380""","""EK""","""[Emirates]""","""EK503|EK503""","""2022-05-01 19:20:00""","""2022-05-02 22:05:00""",1335,1,1317.0,"""USD""",1567000,"""1133000""","""38%""","""2022-04-29 17:52:59"""
"""BOM""","""India""","""SYD""","""Australia""","""Boeing 777|Airbus A380""","""EK""","""[Emirates]""","""EK509|EK509""","""2022-05-01 22:20:00""","""2022-05-02 22:05:00""",1155,1,1317.0,"""USD""",1583000,"""1133000""","""39%""","""2022-04-29 17:52:59"""
"""BOM""","""India""","""SYD""","""Australia""","""Airbus A350|Boeing 787|Boeing …","""multi""","""[Etihad| Virgin Australia]""","""EY205|EY205|EY205""","""2022-05-01 04:35:00""","""2022-05-02 09:55:00""",1490,2,1328.0,"""USD""",1354000,"""1133000""","""19%""","""2022-04-29 17:52:59"""
"""BOM""","""India""","""SYD""","""Australia""","""Airbus A321 (Sharklets)|Airbus…","""multi""","""[Air India| SriLankan]""","""AI603|AI603|AI603""","""2022-05-01 06:10:00""","""2022-05-02 14:55:00""",1695,2,1368.0,"""USD""",1099000,"""1133000""","""-2%""","""2022-04-29 17:52:59"""


In [216]:
df.select(pl.col(["from_airport_code", "dest_country"]))

from_airport_code,dest_country
str,str
"""ALG""","""Argentina"""
"""ALG""","""Argentina"""
"""ALG""","""Argentina"""
"""ALG""","""Argentina"""
"""ALG""","""Argentina"""
…,…
"""BOM""","""Australia"""
"""BOM""","""Australia"""
"""BOM""","""Australia"""
"""BOM""","""Australia"""


In [217]:
df.select(pl.exclude(["from_airport_code", "dest_country"]))

from_country,dest_airport_code,aircraft_type,airline_number,airline_name,flight_number,departure_time,arrival_time,duration,stops,price,currency,co2_emissions,avg_co2_emission_for_this_route,co2_percentage,scan_date
str,str,str,str,str,str,str,str,i64,i64,f64,str,i64,str,str,str
"""Algeria""","""AEP""","""Airbus A318|Canadair RJ 1000|A…","""multi""","""[Air France| Iberia| LATAM]""","""AF1491|AF1491|AF1491|AF1491""","""2022-04-30 14:30:00""","""2022-05-01 10:15:00""",1425,3,1279.0,"""USD""",1320000,"""1320000""","""0%""","""2022-04-29 17:52:59"""
"""Algeria""","""AEP""","""Airbus A318|Canadair RJ 1000|B…","""multi""","""[Air France| Iberia| LATAM]""","""AF1491|AF1491|AF1491|AF1491""","""2022-04-30 14:30:00""","""2022-05-01 10:15:00""",1425,3,1279.0,"""USD""",1195000,"""1320000""","""-9%""","""2022-04-29 17:52:59"""
"""Algeria""","""AEP""","""Airbus A320|Airbus A321|Boeing…","""multi""","""[Air France| LATAM]""","""AF1855|AF1855|AF1855|AF1855""","""2022-04-30 12:45:00""","""2022-05-01 10:15:00""",1530,3,1284.0,"""USD""",1248000,"""1320000""","""-5%""","""2022-04-29 17:52:59"""
"""Algeria""","""AEP""","""Airbus A318|Airbus A320|Boeing…","""multi""","""[Air France| LATAM]""","""AF1491|AF1491|AF1491|AF1491""","""2022-04-30 14:30:00""","""2022-05-01 10:15:00""",1425,3,1290.0,"""USD""",1347000,"""1320000""","""2%""","""2022-04-29 17:52:59"""
"""Algeria""","""AEP""","""Airbus A321neo|Boeing 777|Airb…","""multi""","""[Lufthansa| LATAM]""","""LH1317|LH1317|LH1317""","""2022-04-30 12:35:00""","""2022-05-01 10:15:00""",1540,2,1347.0,"""USD""",1381000,"""1320000""","""4%""","""2022-04-29 17:52:59"""
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""India""","""SYD""","""Boeing 777|Airbus A380""","""EK""","""[Emirates]""","""EK503|EK503""","""2022-05-01 19:20:00""","""2022-05-02 22:05:00""",1335,1,1317.0,"""USD""",1567000,"""1133000""","""38%""","""2022-04-29 17:52:59"""
"""India""","""SYD""","""Boeing 777|Airbus A380""","""EK""","""[Emirates]""","""EK509|EK509""","""2022-05-01 22:20:00""","""2022-05-02 22:05:00""",1155,1,1317.0,"""USD""",1583000,"""1133000""","""39%""","""2022-04-29 17:52:59"""
"""India""","""SYD""","""Airbus A350|Boeing 787|Boeing …","""multi""","""[Etihad| Virgin Australia]""","""EY205|EY205|EY205""","""2022-05-01 04:35:00""","""2022-05-02 09:55:00""",1490,2,1328.0,"""USD""",1354000,"""1133000""","""19%""","""2022-04-29 17:52:59"""
"""India""","""SYD""","""Airbus A321 (Sharklets)|Airbus…","""multi""","""[Air India| SriLankan]""","""AI603|AI603|AI603""","""2022-05-01 06:10:00""","""2022-05-02 14:55:00""",1695,2,1368.0,"""USD""",1099000,"""1133000""","""-2%""","""2022-04-29 17:52:59"""


#### Null operations

In [218]:
df.select(pl.col("co2_emissions").is_null().unique())

co2_emissions
bool
False
True


In [219]:
cleaned_df1 = df.drop_nulls()
cleaned_df

from_airport_code,from_country,dest_airport_code,dest_country,aircraft_type,airline_number,airline_name,flight_number,departure_time,arrival_time,duration,stops,price,currency,co2_emissions,avg_co2_emission_for_this_route,co2_percentage,scan_date
str,str,str,str,str,str,str,str,str,str,i64,i64,f64,str,i64,str,str,str
"""ALG""","""Algeria""","""AEP""","""Argentina""","""Airbus A318|Canadair RJ 1000|A…","""multi""","""[Air France| Iberia| LATAM]""","""AF1491|AF1491|AF1491|AF1491""","""2022-04-30 14:30:00""","""2022-05-01 10:15:00""",1425,3,1279.0,"""USD""",1320000,"""1320000""","""0%""","""2022-04-29 17:52:59"""
"""ALG""","""Algeria""","""AEP""","""Argentina""","""Airbus A318|Canadair RJ 1000|B…","""multi""","""[Air France| Iberia| LATAM]""","""AF1491|AF1491|AF1491|AF1491""","""2022-04-30 14:30:00""","""2022-05-01 10:15:00""",1425,3,1279.0,"""USD""",1195000,"""1320000""","""-9%""","""2022-04-29 17:52:59"""
"""ALG""","""Algeria""","""AEP""","""Argentina""","""Airbus A320|Airbus A321|Boeing…","""multi""","""[Air France| LATAM]""","""AF1855|AF1855|AF1855|AF1855""","""2022-04-30 12:45:00""","""2022-05-01 10:15:00""",1530,3,1284.0,"""USD""",1248000,"""1320000""","""-5%""","""2022-04-29 17:52:59"""
"""ALG""","""Algeria""","""AEP""","""Argentina""","""Airbus A318|Airbus A320|Boeing…","""multi""","""[Air France| LATAM]""","""AF1491|AF1491|AF1491|AF1491""","""2022-04-30 14:30:00""","""2022-05-01 10:15:00""",1425,3,1290.0,"""USD""",1347000,"""1320000""","""2%""","""2022-04-29 17:52:59"""
"""ALG""","""Algeria""","""AEP""","""Argentina""","""Airbus A321neo|Boeing 777|Airb…","""multi""","""[Lufthansa| LATAM]""","""LH1317|LH1317|LH1317""","""2022-04-30 12:35:00""","""2022-05-01 10:15:00""",1540,2,1347.0,"""USD""",1381000,"""1320000""","""4%""","""2022-04-29 17:52:59"""
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""BOM""","""India""","""SYD""","""Australia""","""Boeing 777|Airbus A380""","""EK""","""[Emirates]""","""EK503|EK503""","""2022-05-01 19:20:00""","""2022-05-02 22:05:00""",1335,1,1317.0,"""USD""",1567000,"""1133000""","""38%""","""2022-04-29 17:52:59"""
"""BOM""","""India""","""SYD""","""Australia""","""Boeing 777|Airbus A380""","""EK""","""[Emirates]""","""EK509|EK509""","""2022-05-01 22:20:00""","""2022-05-02 22:05:00""",1155,1,1317.0,"""USD""",1583000,"""1133000""","""39%""","""2022-04-29 17:52:59"""
"""BOM""","""India""","""SYD""","""Australia""","""Airbus A350|Boeing 787|Boeing …","""multi""","""[Etihad| Virgin Australia]""","""EY205|EY205|EY205""","""2022-05-01 04:35:00""","""2022-05-02 09:55:00""",1490,2,1328.0,"""USD""",1354000,"""1133000""","""19%""","""2022-04-29 17:52:59"""
"""BOM""","""India""","""SYD""","""Australia""","""Airbus A321 (Sharklets)|Airbus…","""multi""","""[Air India| SriLankan]""","""AI603|AI603|AI603""","""2022-05-01 06:10:00""","""2022-05-02 14:55:00""",1695,2,1368.0,"""USD""",1099000,"""1133000""","""-2%""","""2022-04-29 17:52:59"""


In [220]:
cleaned_df2 = df.filter(pl.col("co2_emissions").is_not_null())
cleaned_df

from_airport_code,from_country,dest_airport_code,dest_country,aircraft_type,airline_number,airline_name,flight_number,departure_time,arrival_time,duration,stops,price,currency,co2_emissions,avg_co2_emission_for_this_route,co2_percentage,scan_date
str,str,str,str,str,str,str,str,str,str,i64,i64,f64,str,i64,str,str,str
"""ALG""","""Algeria""","""AEP""","""Argentina""","""Airbus A318|Canadair RJ 1000|A…","""multi""","""[Air France| Iberia| LATAM]""","""AF1491|AF1491|AF1491|AF1491""","""2022-04-30 14:30:00""","""2022-05-01 10:15:00""",1425,3,1279.0,"""USD""",1320000,"""1320000""","""0%""","""2022-04-29 17:52:59"""
"""ALG""","""Algeria""","""AEP""","""Argentina""","""Airbus A318|Canadair RJ 1000|B…","""multi""","""[Air France| Iberia| LATAM]""","""AF1491|AF1491|AF1491|AF1491""","""2022-04-30 14:30:00""","""2022-05-01 10:15:00""",1425,3,1279.0,"""USD""",1195000,"""1320000""","""-9%""","""2022-04-29 17:52:59"""
"""ALG""","""Algeria""","""AEP""","""Argentina""","""Airbus A320|Airbus A321|Boeing…","""multi""","""[Air France| LATAM]""","""AF1855|AF1855|AF1855|AF1855""","""2022-04-30 12:45:00""","""2022-05-01 10:15:00""",1530,3,1284.0,"""USD""",1248000,"""1320000""","""-5%""","""2022-04-29 17:52:59"""
"""ALG""","""Algeria""","""AEP""","""Argentina""","""Airbus A318|Airbus A320|Boeing…","""multi""","""[Air France| LATAM]""","""AF1491|AF1491|AF1491|AF1491""","""2022-04-30 14:30:00""","""2022-05-01 10:15:00""",1425,3,1290.0,"""USD""",1347000,"""1320000""","""2%""","""2022-04-29 17:52:59"""
"""ALG""","""Algeria""","""AEP""","""Argentina""","""Airbus A321neo|Boeing 777|Airb…","""multi""","""[Lufthansa| LATAM]""","""LH1317|LH1317|LH1317""","""2022-04-30 12:35:00""","""2022-05-01 10:15:00""",1540,2,1347.0,"""USD""",1381000,"""1320000""","""4%""","""2022-04-29 17:52:59"""
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""BOM""","""India""","""SYD""","""Australia""","""Boeing 777|Airbus A380""","""EK""","""[Emirates]""","""EK503|EK503""","""2022-05-01 19:20:00""","""2022-05-02 22:05:00""",1335,1,1317.0,"""USD""",1567000,"""1133000""","""38%""","""2022-04-29 17:52:59"""
"""BOM""","""India""","""SYD""","""Australia""","""Boeing 777|Airbus A380""","""EK""","""[Emirates]""","""EK509|EK509""","""2022-05-01 22:20:00""","""2022-05-02 22:05:00""",1155,1,1317.0,"""USD""",1583000,"""1133000""","""39%""","""2022-04-29 17:52:59"""
"""BOM""","""India""","""SYD""","""Australia""","""Airbus A350|Boeing 787|Boeing …","""multi""","""[Etihad| Virgin Australia]""","""EY205|EY205|EY205""","""2022-05-01 04:35:00""","""2022-05-02 09:55:00""",1490,2,1328.0,"""USD""",1354000,"""1133000""","""19%""","""2022-04-29 17:52:59"""
"""BOM""","""India""","""SYD""","""Australia""","""Airbus A321 (Sharklets)|Airbus…","""multi""","""[Air India| SriLankan]""","""AI603|AI603|AI603""","""2022-05-01 06:10:00""","""2022-05-02 14:55:00""",1695,2,1368.0,"""USD""",1099000,"""1133000""","""-2%""","""2022-04-29 17:52:59"""
