In [1]:
!pip install polars
!pip install fastexcel

Collecting fastexcel
  Downloading fastexcel-0.16.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.0 kB)
Downloading fastexcel-0.16.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m42.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: fastexcel
Successfully installed fastexcel-0.16.0


In [2]:
import polars as pl

In [7]:
aeroportos = pl.read_csv("airports.csv",
                         columns = ["IATA_CODE", "CITY", "STATE"])
aeroportos.head(2)

IATA_CODE,CITY,STATE
str,str,str
"""ABE""","""Allentown""","""PA"""
"""ABI""","""Abilene""","""TX"""


In [19]:
wdi = pl.read_excel("WDIEXCEL.xlsx", sheet_name = "Country",
                    columns = ["Short Name", "Region"])
wdi.head(2)

Short Name,Region
str,str
"""Aruba""","""Latin America & Caribbean"""
"""Afghanistan""","""South Asia"""


In [8]:
df = pl.DataFrame({
    "grupo": ["A", "A", "B", "B", "C"],
    "valor1": [10, 15, 10, None, 25],
    "valor2": [5, None, 20, 30, None]
})
df

grupo,valor1,valor2
str,i64,i64
"""A""",10.0,5.0
"""A""",15.0,
"""B""",10.0,20.0
"""B""",,30.0
"""C""",25.0,


In [9]:
df["valor1"]

valor1
i64
10.0
15.0
10.0
""
25.0


In [10]:
df["valor1"].mean()

15.0

In [11]:
df["valor1"].drop_nulls()

valor1
i64
10
15
10
25


In [12]:
df["valor1"].drop_nulls().mean()

15.0

In [13]:
df.select([
  pl.col("valor1").mean().alias("media_v1"),
  pl.col("valor2").mean()
])

media_v1,valor2
f64,f64
15.0,18.333333


In [14]:
df.group_by("grupo").agg([
  pl.col("valor1").mean().alias("media_valor1"),
  pl.col("valor2").min().alias("min_valor2")
]).sort("grupo")

grupo,media_valor1,min_valor2
str,f64,i64
"""A""",12.5,5.0
"""B""",10.0,20.0
"""C""",25.0,


In [20]:
voos = pl.read_csv("flights.csv",
                   columns = ["AIRLINE", "ARRIVAL_DELAY", "DESTINATION_AIRPORT"],
                   dtypes = {"AIRLINE": pl.Utf8,
                             "ARRIVAL_DELAY": pl.Int32,
                             "DESTINATION_AIRPORT": pl.Utf8})
voos.shape

  voos = pl.read_csv("flights.csv",


(93070, 3)

In [21]:
voos.head(3)

AIRLINE,DESTINATION_AIRPORT,ARRIVAL_DELAY
str,str,i32
"""AS""","""SEA""",-22
"""AA""","""PBI""",-9
"""US""","""CLT""",5


In [23]:
resultado = (
  voos.drop_nulls(["AIRLINE", "DESTINATION_AIRPORT", "ARRIVAL_DELAY"])
  .filter(
    pl.col("AIRLINE").is_in(["AA", "DL"]) &
    pl.col("DESTINATION_AIRPORT").is_in(["SEA", "MIA", "BWI"])
    )
    .group_by(["AIRLINE", "DESTINATION_AIRPORT"])
    .agg([
      (pl.col("ARRIVAL_DELAY") > 30).mean().alias("atraso_medio")
      ])
)
resultado.sort("atraso_medio")

AIRLINE,DESTINATION_AIRPORT,atraso_medio
str,str,f64
"""DL""","""BWI""",0.120879
"""DL""","""MIA""",0.168919
"""DL""","""SEA""",0.169312
"""AA""","""BWI""",0.195652
"""AA""","""MIA""",0.226119
"""AA""","""SEA""",0.373333


In [24]:
from datetime import datetime
import pytz

# Define o fuso horário de Brasília
brasilia_timezone = pytz.timezone('America/Sao_Paulo')

# Obtém a data e hora atuais no fuso horário de Brasília
now_brasilia = datetime.now(brasilia_timezone)

# Imprime a data e hora
print(now_brasilia.strftime('%Y-%m-%d %H:%M:%S %Z%z'))

2025-10-02 10:58:31 -03-0300
