In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import *
from pyspark.sql.types import *

In [0]:
spark = SparkSession.builder.appName("GoldLayerCreation").getOrCreate()

In [0]:
#Reading tables from silver layer
silver_sellers = spark.read.format("delta").load("/mnt/delta/tables/silver/sellers")
silver_buyers = spark.read.format("delta").load("/mnt/delta/tables/silver/buyers")
silver_users = spark.read.format("delta").load("/mnt/delta/tables/silver/users")
silver_countries = spark.read.format("delta").load("/mnt/delta/tables/silver/countries")



In [0]:
silver_users.printSchema()

root
 |-- identifierHash: string (nullable = true)
 |-- type: string (nullable = true)
 |-- countrycode: string (nullable = true)
 |-- country: string (nullable = true)
 |-- language: string (nullable = true)
 |-- socialnbfollowers: integer (nullable = true)
 |-- socialnbfollows: integer (nullable = true)
 |-- socialProductsLiked: string (nullable = true)
 |-- productsListed: string (nullable = true)
 |-- productsSold: string (nullable = true)
 |-- productspassrate: decimal(10,2) (nullable = true)
 |-- productsWished: string (nullable = true)
 |-- productsBought: string (nullable = true)
 |-- gender: string (nullable = true)
 |-- civilityGenderId: string (nullable = true)
 |-- civilityTitle: string (nullable = true)
 |-- hasprofilepicture: boolean (nullable = true)
 |-- seniority: string (nullable = true)
 |-- seniorityasmonths: decimal(10,2) (nullable = true)
 |-- seniorityasyears: decimal(10,2) (nullable = true)
 |-- websiteLongevity: string (nullable = true)
 |-- hasanyapp: boolean 

In [0]:
# Perform the join operations
comprehensive_user_table = silver_users \
    .join(silver_countries, ["country"], "outer") \
    .join(silver_buyers, ["country"], "outer") \
    .join(silver_sellers, ["country"], "outer")

# Select and alias columns from each dataframe to ensure uniqueness
comprehensive_user_table = comprehensive_user_table.select(
    silver_users["country"].alias("Country"),
    # From silver_users
    silver_users["productsSold"].alias("Users_productsSold"),
    silver_users["productsWished"].alias("Users_productsWished"),
    silver_users["account_age_years"].alias("Users_account_age_years"),
    silver_users["account_age_years"].alias("Users_account_age_group"),
    silver_users["hasanyapp"].alias("Users_hasanyapp"),
    silver_users["socialnbfollowers"].alias("Users_socialnbfollowers"),
    silver_users["flag_long_title"].alias("Users_flag_long_title"),

    
    # From silver_countries
    silver_countries["sellers"].alias("Countries_Sellers"),
    silver_countries["topsellers"].alias("Countries_TopSellers"),
    silver_countries["femalesellers"].alias("Countries_FemaleSellers"),
    silver_countries["malesellers"].alias("Countries_MaleSellers"),
    silver_countries["topfemalesellers"].alias("Countries_TopFemaleSellers"),
    silver_countries["topmalesellers"].alias("Countries_TopMaleSellers"),
   
    
    # From silver_buyers
    silver_buyers["buyers"].alias("Buyers_Total"),
    silver_buyers["topbuyers"].alias("Buyers_Top"),
    silver_buyers["femalebuyers"].alias("Buyers_Female"),
    silver_buyers["malebuyers"].alias("Buyers_Male"),
    silver_buyers["topfemalebuyers"].alias("Buyers_TopFemale"),
    silver_buyers["topmalebuyers"].alias("Buyers_TopMale"),
   
    
    # From silver_sellers
    silver_sellers["nbsellers"].alias("Sellers_Total"),
    silver_sellers["sex"].alias("Sellers_Sex"),
    silver_sellers["meanproductssold"].alias("Sellers_MeanProductsSold"),
    silver_sellers["meanproductslisted"].alias("Sellers_MeanProductsListed"),

)

In [0]:
comprehensive_user_table.write.format("delta")\
                                .mode("overwrite")\
                                .save("/mnt/delta/tables/gold/ecom_one_big_table")