In [0]:
%sql
DROP TABLE IF EXISTS martech_archive_gsm;
CREATE EXTERNAL TABLE IF NOT EXISTS martech_archive_gsm (
    year INT,
    month INT,
    day INT,
    cell_identity STRING,
    frequency_band INT,
    site_id INT
)
USING CSV
PARTITIONED BY (
    p_year INT,
    p_month INT,
    p_day INT
)
LOCATION 'gs://martech-archive-data/Archive/gsm/'
OPTIONS (
    header = 'true',
    inferSchema = 'true',
    delimiter = ';',
    pattern = '*.csv'
);




In [0]:
%sql
DROP TABLE IF EXISTS martech_archive_site;
CREATE EXTERNAL TABLE IF NOT EXISTS martech_archive_site (
    year INT,
    month INT,
    day INT,
    site_id INT
)
USING CSV
PARTITIONED BY (
    p_year INT,
    p_month INT,
    p_day INT
)
LOCATION 'gs://martech-archive-data/Archive/site/'
OPTIONS (
    header = 'true',
    inferSchema = 'true',
    delimiter = ';',
    pattern = '*.csv'
);

In [0]:
drop_external_table_tech_sql = 'DROP TABLE IF EXISTS martech_archive_{tech_type}'
create_external_table_tech_sql = """
CREATE EXTERNAL TABLE IF NOT EXISTS martech_archive_{tech_type} (
    year INT,
    month INT,
    day INT,
    cell_identity STRING,
    frequency_band INT,
    site_id INT
)
USING CSV
PARTITIONED BY (
    p_year INT,
    p_month INT,
    p_day INT
)
LOCATION 'gs://martech-archive-data/Archive/{tech_type}/'
OPTIONS (
    header = 'true',
    inferSchema = 'true',
    delimiter = ';',
    pattern = '*.csv'
);

"""


from pyspark.sql import SparkSession

spark = SparkSession.builder.appName("CreateExternalTable").getOrCreate()

for tech_type in ['gsm', 'umts', 'lte']:
    spark.sql(drop_external_table_tech_sql.format(tech_type=tech_type))
    spark.sql(create_external_table_tech_sql.format(tech_type=tech_type))




In [0]:
%sql
SELECT a.p_year, a.p_month, a.p_day, a.site_id, 

       -- cell count
       COALESCE(gsm.cell_cnt, 0) AS site_2g_cnt,
       COALESCE(umts.cell_cnt, 0) AS site_3g_cnt,
       COALESCE(lte.cell_cnt, 0) AS site_4g_cnt,

       -- frequency_band
       COALESCE(frequency_band_G900, 0) AS frequency_band_G900,
       COALESCE(frequency_band_G1800, 0) AS frequency_band_G1800,
       COALESCE(frequency_band_U900, 0) AS frequency_band_U900,
       COALESCE(frequency_band_U2100, 0) AS frequency_band_U2100,
       -- frequency_band: lte
       COALESCE(frequency_band_L700, 0) AS frequency_band_L700,
       COALESCE(frequency_band_L800, 0) AS frequency_band_L800,
       COALESCE(frequency_band_L1800, 0) AS frequency_band_L1800,
       COALESCE(frequency_band_L2100, 0) AS frequency_band_L2100,
       COALESCE(frequency_band_L2600, 0) AS frequency_band_L2600
FROM 
(
  SELECT *
  FROM martech_archive_site 
) a 
LEFT JOIN (
  SELECT p_year, p_month, p_day, site_id, 
         COUNT(DISTINCT cell_identity) AS cell_cnt,
         MAX(IF(frequency_band = 900, 1, 0)) AS frequency_band_G900,
         MAX(IF(frequency_band = 1800, 1, 0)) AS frequency_band_G1800
   FROM martech_archive_gsm 
  GROUP BY p_year, p_month, p_day, site_id
) gsm ON a.p_year = gsm.p_year AND a.p_month = gsm.p_month AND a.p_day = gsm.p_day AND a.site_id = gsm.site_id
LEFT JOIN (
  SELECT p_year, p_month, p_day, site_id, 
         COUNT(DISTINCT cell_identity) AS cell_cnt,
         MAX(IF(frequency_band = 900, 1, 0)) AS frequency_band_U900,
         MAX(IF(frequency_band = 2100, 1, 0)) AS frequency_band_U2100
   FROM martech_archive_umts
  GROUP BY p_year, p_month, p_day, site_id
) umts ON a.p_year = umts.p_year AND a.p_month = umts.p_month AND a.p_day = umts.p_day AND a.site_id = umts.site_id
LEFT JOIN (
  SELECT p_year, p_month, p_day, site_id, 
         COUNT(DISTINCT cell_identity) AS cell_cnt,
         MAX(IF(frequency_band = 700, 1, 0)) AS frequency_band_L700,
         MAX(IF(frequency_band = 800, 1, 0)) AS frequency_band_L800,
         MAX(IF(frequency_band = 1800, 1, 0)) AS frequency_band_L1800,
         MAX(IF(frequency_band = 2100, 1, 0)) AS frequency_band_L2100,
         MAX(IF(frequency_band = 2600, 1, 0)) AS frequency_band_L2600
   FROM martech_archive_lte
  GROUP BY p_year, p_month, p_day, site_id
) lte ON a.p_year = lte.p_year AND a.p_month = lte.p_month AND a.p_day = lte.p_day AND a.site_id = lte.site_id

;