In [41]:
import pandas as pd

# Read the CSV file
df = pd.read_csv('../input_data/udemy_ms_powerbi/factInternetSales.csv')

# Display the first few rows
df.head()

Unnamed: 0,ProductKey,OrderDate,DueDate,ShipDate,CustomerKey,PromotionKey,SalesTerritoryKey,SalesOrderNumber,SalesOrderLineNumber,RevisionNumber,...,UnitPriceDiscountPct,DiscountAmount,ProductStandardCost,TotalProductCost,29,6,98,19,36,100
0,310,29/12/2010,10/01/2011,5/01/2011,21768,1,6,SO43697,1,1,...,0,0,2171.2942,2171.2942,,,,3578.27,,
1,346,29/12/2010,10/01/2011,5/01/2011,28389,1,7,SO43698,1,1,...,0,0,1912.1544,1912.1544,,,,,,
2,346,29/12/2010,10/01/2011,5/01/2011,25863,1,1,SO43699,1,1,...,0,0,1912.1544,1912.1544,,,,,,3399.99
3,336,29/12/2010,10/01/2011,5/01/2011,14501,1,4,SO43700,1,1,...,0,0,413.1463,413.1463,,,,,,699.0982
4,346,29/12/2010,10/01/2011,5/01/2011,11003,1,9,SO43701,1,1,...,0,0,1912.1544,1912.1544,,3399.99,,,,


In [42]:
# Unpivot to create the 2 columns: Currency Key & Sales Amount
df_unpivoted = pd.melt(
    df,
    id_vars=["ProductKey",
             "OrderDate",
             "DueDate",
             "ShipDate",
             "CustomerKey",
             "PromotionKey",
             "SalesTerritoryKey",
             "SalesOrderNumber",
             "SalesOrderLineNumber",
             "RevisionNumber",
             "OrderQuantity",
             "UnitPrice",
             "ExtendedAmount",
             "UnitPriceDiscountPct",
             "DiscountAmount",
             "ProductStandardCost",
             "TotalProductCost"],  # Columns to keep fixed
    var_name="Currency Key",  # Name for the unpivoted variable column
    value_name="Sales Amount"  # Name for the unpivoted value column
)
# Change 'Currency Key' column from 'object' to 'int64'
df_unpivoted["Currency Key"] = df_unpivoted["Currency Key"].astype("int64")

df_unpivoted.tail()

Unnamed: 0,ProductKey,OrderDate,DueDate,ShipDate,CustomerKey,PromotionKey,SalesTerritoryKey,SalesOrderNumber,SalesOrderLineNumber,RevisionNumber,OrderQuantity,UnitPrice,ExtendedAmount,UnitPriceDiscountPct,DiscountAmount,ProductStandardCost,TotalProductCost,Currency Key,Sales Amount
362383,485,28/01/2014,9/02/2014,4/02/2014,15868,1,6,SO75122,1,1,1,21.98,21.98,0,0,8.2205,8.2205,100,21.98
362384,225,28/01/2014,9/02/2014,4/02/2014,15868,1,6,SO75122,2,1,1,8.99,8.99,0,0,6.9223,6.9223,100,8.99
362385,485,28/01/2014,9/02/2014,4/02/2014,18759,1,6,SO75123,1,1,1,21.98,21.98,0,0,8.2205,8.2205,100,21.98
362386,486,28/01/2014,9/02/2014,4/02/2014,18759,1,6,SO75123,2,1,1,159.0,159.0,0,0,59.466,59.466,100,159.0
362387,225,28/01/2014,9/02/2014,4/02/2014,18759,1,6,SO75123,3,1,1,8.99,8.99,0,0,6.9223,6.9223,100,8.99


In [43]:
# Just get the records having valid sales amount
df_fact_internet_sales = df_unpivoted[df_unpivoted["Sales Amount"].notnull()]
len(df_fact_internet_sales)

60339

In [44]:
# Load dim currency table into a dataframe
df_dim_currency = pd.read_csv('../input_data/udemy_ms_powerbi/dimCurrency.csv')
df_dim_currency

Unnamed: 0,CurrencyKey,CurrencyAlternateKey,CurrencyName,Exchange Rate
0,6,AUD,Australian Dollar,1.62
1,19,CAD,Canadian Dollar,1.5
2,29,DEM,Deutsche Mark,1.0
3,36,EUR,EURO,1.0
4,98,GBP,United Kingdom Pound,0.83
5,100,USD,US Dollar,1.08


In [45]:
# Merge fact internet sales & dim currency
df_result = pd.merge(df_fact_internet_sales, df_dim_currency, left_on="Currency Key", right_on="CurrencyKey", how="left")

df_result.tail()

Unnamed: 0,ProductKey,OrderDate,DueDate,ShipDate,CustomerKey,PromotionKey,SalesTerritoryKey,SalesOrderNumber,SalesOrderLineNumber,RevisionNumber,...,UnitPriceDiscountPct,DiscountAmount,ProductStandardCost,TotalProductCost,Currency Key,Sales Amount,CurrencyKey,CurrencyAlternateKey,CurrencyName,Exchange Rate
60334,485,28/01/2014,9/02/2014,4/02/2014,15868,1,6,SO75122,1,1,...,0,0,8.2205,8.2205,100,21.98,100,USD,US Dollar,1.08
60335,225,28/01/2014,9/02/2014,4/02/2014,15868,1,6,SO75122,2,1,...,0,0,6.9223,6.9223,100,8.99,100,USD,US Dollar,1.08
60336,485,28/01/2014,9/02/2014,4/02/2014,18759,1,6,SO75123,1,1,...,0,0,8.2205,8.2205,100,21.98,100,USD,US Dollar,1.08
60337,486,28/01/2014,9/02/2014,4/02/2014,18759,1,6,SO75123,2,1,...,0,0,59.466,59.466,100,159.0,100,USD,US Dollar,1.08
60338,225,28/01/2014,9/02/2014,4/02/2014,18759,1,6,SO75123,3,1,...,0,0,6.9223,6.9223,100,8.99,100,USD,US Dollar,1.08


In [46]:
# Drop 3 redundant columns & rename 1 column
df_result = df_result.drop(columns=["Currency Key", "CurrencyKey", "CurrencyName"]).rename(columns={"CurrencyAlternateKey": "Currency Code"})
df_result.tail()

Unnamed: 0,ProductKey,OrderDate,DueDate,ShipDate,CustomerKey,PromotionKey,SalesTerritoryKey,SalesOrderNumber,SalesOrderLineNumber,RevisionNumber,OrderQuantity,UnitPrice,ExtendedAmount,UnitPriceDiscountPct,DiscountAmount,ProductStandardCost,TotalProductCost,Sales Amount,Currency Code,Exchange Rate
60334,485,28/01/2014,9/02/2014,4/02/2014,15868,1,6,SO75122,1,1,1,21.98,21.98,0,0,8.2205,8.2205,21.98,USD,1.08
60335,225,28/01/2014,9/02/2014,4/02/2014,15868,1,6,SO75122,2,1,1,8.99,8.99,0,0,6.9223,6.9223,8.99,USD,1.08
60336,485,28/01/2014,9/02/2014,4/02/2014,18759,1,6,SO75123,1,1,1,21.98,21.98,0,0,8.2205,8.2205,21.98,USD,1.08
60337,486,28/01/2014,9/02/2014,4/02/2014,18759,1,6,SO75123,2,1,1,159.0,159.0,0,0,59.466,59.466,159.0,USD,1.08
60338,225,28/01/2014,9/02/2014,4/02/2014,18759,1,6,SO75123,3,1,1,8.99,8.99,0,0,6.9223,6.9223,8.99,USD,1.08


In [47]:
df_result["Sales Amount EUR"] = df_result["Sales Amount"] / df_result["Exchange Rate"]
df_result.head()

Unnamed: 0,ProductKey,OrderDate,DueDate,ShipDate,CustomerKey,PromotionKey,SalesTerritoryKey,SalesOrderNumber,SalesOrderLineNumber,RevisionNumber,...,UnitPrice,ExtendedAmount,UnitPriceDiscountPct,DiscountAmount,ProductStandardCost,TotalProductCost,Sales Amount,Currency Code,Exchange Rate,Sales Amount EUR
0,311,1/01/2011,13/01/2011,8/01/2011,13513,1,8,SO43712,1,1,...,3578.27,3578.27,0,0,2171.2942,2171.2942,3578.27,DEM,1.0,3578.27
1,311,3/01/2011,15/01/2011,10/01/2011,13264,1,8,SO43720,1,1,...,3578.27,3578.27,0,0,2171.2942,2171.2942,3578.27,DEM,1.0,3578.27
2,311,5/01/2011,17/01/2011,12/01/2011,13258,1,8,SO43725,1,1,...,3578.27,3578.27,0,0,2171.2942,2171.2942,3578.27,DEM,1.0,3578.27
3,311,8/01/2011,20/01/2011,15/01/2011,13261,1,8,SO43737,1,1,...,3578.27,3578.27,0,0,2171.2942,2171.2942,3578.27,DEM,1.0,3578.27
4,311,11/01/2011,23/01/2011,18/01/2011,13265,1,8,SO43752,1,1,...,3578.27,3578.27,0,0,2171.2942,2171.2942,3578.27,DEM,1.0,3578.27


In [48]:
df_result = df_result.drop(columns=["Sales Amount", "Exchange Rate"])
df_result.head()
df_result.to_csv('../input_data/udemy_ms_powerbi/expected_factInternetSales.csv', index=False)