# Polars Library Python for data analysis

### Reading Excel file

In [1]:
import polars as pl

In [2]:
df = pl.read_excel("Sample - Superstore.xls", sheet_name="Orders")
df.head()

Row ID,Order ID,Order Date,Ship Date,Ship Mode,Customer ID,Customer Name,Segment,Country,City,State,Postal Code,Region,Product ID,Category,Sub-Category,Product Name,Sales,Quantity,Discount,Profit
i64,str,date,date,str,str,str,str,str,str,str,i64,str,str,str,str,str,f64,i64,f64,f64
1,"""CA-2016-152156""",2016-11-08,2016-11-11,"""Second Class""","""CG-12520""","""Claire Gute""","""Consumer""","""United States""","""Henderson""","""Kentucky""",42420,"""South""","""FUR-BO-10001798""","""Furniture""","""Bookcases""","""Bush Somerset Collection Bookc…",261.96,2,0.0,41.9136
2,"""CA-2016-152156""",2016-11-08,2016-11-11,"""Second Class""","""CG-12520""","""Claire Gute""","""Consumer""","""United States""","""Henderson""","""Kentucky""",42420,"""South""","""FUR-CH-10000454""","""Furniture""","""Chairs""","""Hon Deluxe Fabric Upholstered …",731.94,3,0.0,219.582
3,"""CA-2016-138688""",2016-06-12,2016-06-16,"""Second Class""","""DV-13045""","""Darrin Van Huff""","""Corporate""","""United States""","""Los Angeles""","""California""",90036,"""West""","""OFF-LA-10000240""","""Office Supplies""","""Labels""","""Self-Adhesive Address Labels f…",14.62,2,0.0,6.8714
4,"""US-2015-108966""",2015-10-11,2015-10-18,"""Standard Class""","""SO-20335""","""Sean O'Donnell""","""Consumer""","""United States""","""Fort Lauderdale""","""Florida""",33311,"""South""","""FUR-TA-10000577""","""Furniture""","""Tables""","""Bretford CR4500 Series Slim Re…",957.5775,5,0.45,-383.031
5,"""US-2015-108966""",2015-10-11,2015-10-18,"""Standard Class""","""SO-20335""","""Sean O'Donnell""","""Consumer""","""United States""","""Fort Lauderdale""","""Florida""",33311,"""South""","""OFF-ST-10000760""","""Office Supplies""","""Storage""","""Eldon Fold 'N Roll Cart System""",22.368,2,0.2,2.5164


In [3]:
df.schema

Schema([('Row ID', Int64),
        ('Order ID', String),
        ('Order Date', Date),
        ('Ship Date', Date),
        ('Ship Mode', String),
        ('Customer ID', String),
        ('Customer Name', String),
        ('Segment', String),
        ('Country', String),
        ('City', String),
        ('State', String),
        ('Postal Code', Int64),
        ('Region', String),
        ('Product ID', String),
        ('Category', String),
        ('Sub-Category', String),
        ('Product Name', String),
        ('Sales', Float64),
        ('Quantity', Int64),
        ('Discount', Float64),
        ('Profit', Float64)])

### Monthly profit

In [4]:
a = (
    df.group_by(
        pl.col("Order Date").dt.year().alias("order_year"),
        pl.col("Order Date").dt.month().alias("order_month"),
    )
    .agg(pl.col("Profit").sum().round(2).alias("total_profit"))
    .sort([pl.col("order_year"), pl.col("order_month")])
)

In [5]:
a

order_year,order_month,total_profit
i32,i8,f64
2014,1,2450.19
2014,2,862.31
2014,3,498.73
2014,4,3488.84
2014,5,2738.71
…,…,…
2017,8,9040.96
2017,9,10991.56
2017,10,9275.28
2017,11,9690.1


In [6]:
a.write_csv("monthly_profit.csv")

In [7]:
import altair as alt

In [8]:
a.plot.line(
    x="order_month",
    y="total_profit",
    color=alt.Color(
        "order_year:N",
        scale=alt.Scale(scheme="category10"),
        legend=alt.Legend(title="Year"),
    ),
).properties(width=1000, height=600)

# Top 5 states with most profit

In [10]:
b = (
    df.group_by(pl.col("State"))
    .agg(pl.col("Profit").sum().round(2).alias("total_profit"))
    .sort(pl.col("total_profit"), descending=True)
    .head()
)

In [11]:
b

State,total_profit
str,f64
"""California""",76381.39
"""New York""",74038.55
"""Washington""",33402.65
"""Michigan""",24463.19
"""Virginia""",18597.95


In [18]:
# Bar chart
bar_chart = b.plot.bar(x=alt.X("State", sort="-y"), y="total_profit").properties(
    width=800, height=400, title="Top 5 states by Profit"
)

# Text labels on top of bars
text = bar_chart.mark_text(
    align="center", baseline="bottom", dy=-5  # Adjust position above bars
).encode(text="total_profit")

bar_chart + text