# Description

Unit test pyspark.

# Unit Test

In [1]:
from pyspark import SparkContext
from pyspark.sql import SparkSession
import pyspark.sql.functions as F
import pyspark.sql.types as T
from pyspark.sql import Window as W 

In [2]:
def init_spark():
    ''' 
    Initiate spark with `spark = init_spark()`. Note that we only read from local s3fs. 
    '''
    
    spark_config = (SparkSession.builder
                    .config('spark.master', 'local[6]') # Use 3 cores -- leave one core for others.
                    .config('spark.driver.memory', '10g')
                    .appName("Standard Pyspark")
                    )

    # Create spark
    spark = spark_config.getOrCreate()

    # For better display of documentation
    spark.conf.set('spark.sql.repl.eagerEval.enabled', True)
    spark.conf.set('spark.sql.repl.eagerEval.truncate', -1)
    spark.conf.set('spark.sql.repl.eagerEval.maxNumRows', 10)

    return spark

In [3]:
spark = init_spark()

In [4]:
spark

In [5]:
data_path = '/home/ds_user/s3/adc-ds-data/fact_plain/boost/merchant/latest_data/merchant.parquet/part-00000-b875e077-9a22-4084-adb2-bf7347a3b61e-c000.snappy.parquet'


In [6]:
data_sdf  = spark.read.parquet(data_path)

In [7]:
data_sdf.orderBy(F.rand()).limit(10)

business_account_id,account_description,owner_contact_no,owner_email,owner_fullname,owner_id,owner_id_type,account_type,account_unique_name,business_address1,business_address2,business_city,business_country,business_postalcode,business_state,business_type,mailing_address1,mailing_address2,mailing_city,mailing_country,mailing_postalcode,mailing_state,mailing_type,bank_account_holder_name,bank_account_no,bank_settlement_id,bank_settlement_id_type,bank_swift_code,business_category_id,business_category_level,business_category_name,business_profile_id,created_by,date_created,modified_by,date_modified,merchant_category,merchant_id,original_merchant_id,identifier,q_registered_name,registered_id,registered_id_type,registered_name,status,trading_name,year_of_incoporation,ops_filename,ops_filename_date,ops_loaded_datetime,ops_type
5cf0845db5618f4208d45ca4,,60192628813,yaplipwei@gmail.com,Yap Lip Wei,831019145267.0,NEW_IC,MERCHANT,Yap Lip Wei,Grab Car @ Sepang,,Sepang,Malaysia,64000,Selangor,BUSINESS,No 33 Jalan 1/4 46000 Petaling Jaya Selangor,,Petaling Jaya,Malaysia,46000,Selangor,MAIL,Yap Lip Wei,11403500613574,831019145267,NEW_IC,RHBBMYKL,A1013,1.0,Services,5cf078c3b5618f4208d34ff0,Merchant Ops 6,2018-07-02 09:07:12,Merchant Ops 6,2018-07-02 09:07:12,INDIVIDUAL,16721,16721,MCM0024252,YAP LIP WEI,831019145267,NEW_IC,Yap Lip Wei,ACTIVE,WC8568F,2018,20210208_merchant,20210208,2021-02-09 02:00:58,others
5f4717865dc10c00017384de,,60123037242,luckygirl_8383@yahoo.com,KRISHNA KUMARI A/P KRISHNAN,830103045248.0,NEW_IC,MERCHANT,KRISHNA KUMARI A/P KRISHNAN,"CHENG GARDEN AVENUE FOOD CENTRE JALAN SERI 7, CHENG",,MELAKA,MY,75260,Melaka,BUSINESS,"NO.4, JALAN MURNI 17 TAMAN MALIM JAYA",,MELAKA,MY,75250,Melaka,MAIL,KRISHNA KUMARI A/P KRISHNAN,154062710275,830103045248,NEW_IC,MBBEMYKL,A2028,1.0,Food & Beverage,5f471705f412b20001e95198,firdausnasha,2020-08-27 02:16:38,firdausnasha,2020-08-27 02:16:38,INDIVIDUAL,99937,99937,MCM0107466,KRISHNA KUMARI A/P KRISHNAN,830103045248,NEW_IC,KRISHNA KUMARI A/P KRISHNAN,ACTIVE,NASI LEMAK KUKUS BERLAUK,2015,20210208_merchant,20210208,2021-02-09 02:00:58,others
5cf08467b5618f4208d46227,,6017374165,Awsrafarwen@gmail.com,MOHD KHAREY BIN NOOH,740317125359.0,NEW_IC,MERCHANT,MOHD KHAREY BIN NOOH,JALAN ILHAM U2/14 SEKSYEN U2 TAMAN TTDI JAYA,,Shah Alam,Malaysia,40150,Selangor,BUSINESS,5D-01-35 JALAN ILHAM U2/14 SEKSYEN U2 TAMAN TTDI JAYA,,Shah Alam,Malaysia,40150,Selangor,MAIL,MOHD KHAREY BIN NOOH,162478353117,740317125359,NEW_IC,MBBEMYKL,A1001,1.0,Food & Beverage,5cf078ceb5618f4208d35806,Merchant Ops 1,2018-07-24 09:07:22,Merchant Ops 1,2018-07-24 09:07:22,INDIVIDUAL,18812,18812,MCM0026343,MOHD KHAREY BIN NOOH,740317125359,NEW_IC,MOHD KHAREY BIN NOOH,ACTIVE,NASI AYAM PENYET DAN BAKSO,2018,20210208_merchant,20210208,2021-02-09 02:00:58,others
5cf083dab5618f4208d3cba5,"14/12/2018 - inactive due unreachable, need merchant to provide new account details. as current bank account is closed.",60146667999,uro2m.md@gmail.com,MAS HAMDY BIN RAZALAN,,,MERCHANT,MHS SIZZLING RESOURCES,"NO 1-16 TINGKAT BAWAH RESIDENSI UTM KL, JALAN MAKTAB 54000 KUALA",,KUALA LUMPUR,Malaysia,54000,Wilayah Persekutuan,BUSINESS,NO 15 JALAN TP7/B TAMAN PERINDUSTRIAN UEP 47620 SUBANG JAYA,,SUBANG JAYA,Malaysia,47620,Selangor,MAIL,MHS SIZZLING RESOURCES,8603041669,SA0430084T,BRN,CIBBMYKL,A1001,1.0,Food & Beverage,5cf07880b5618f4208d3182e,,2017-11-30 03:11:08,,2018-12-15 05:12:18,RETAIL,2400,2400,MCM0009931,MHS SIZZLING RESOURCES,SA0430084T,BRN,MHS SIZZLING RESOURCES,INACTIVE,MHS SIZZLING RESOURCES,2016,20210208_merchant,20210208,2021-02-09 02:00:58,others
5cfaf88b0b85a05d89d350b6,,60192469717,esthertfl@gmail.com,TAI PIANG SIN,,,MERCHANT,TPS AUTO ACCESSORIES & AIRCOND SERVICE,"NO.561,JALAN IDAMAN 1,",TAMAN DESA IDAMAN,DURIAN TUNGGAL,Malaysia,76100,Melaka,BUSINESS,"NO.561,JALAN IDAMAN 1,",TAMAN DESA IDAMAN,DURIAN TUNGGAL,Malaysia,76100,Melaka,MAIL,TPS AUTO ACCESSORIES & AIRCOND,504124123080,MA0169362W,BRN,MBBEMYKL,A2008,1.0,Automotive,5cfaf2220b85a05d89d34f08,Sundravarthan Muniandi,2019-06-03 02:06:41,dyleah,2020-10-21 03:32:03,RETAIL,48467,48467,MCM0055998,TPS AUTO ACCESSORIES & AIRCOND SERVICE,MA0169362W,BRN,TPS AUTO ACCESSORIES & AIRCOND SERVICE,ACTIVE,TPS AUTO ACCESSORIES & AIRCOND SERVICE,2012,20210208_merchant,20210208,2021-02-09 02:00:58,others
5e708d4bcde0a700015b9cd0,,601125485006,tsauto28@yahoo.com,NG KIM LENG,701018065139.0,NEW_IC,MERCHANT,TECK SEONG AUTO TRADING SDN. BHD.,"LOT 308-311,JALAN DATO NGAU KEN LOCK",,TEMERLOH,MY,28000,Pahang,BUSINESS,"LOT 308-311,JALAN DATO NGAU KEN LOCK",,TEMERLOH,MY,28000,Pahang,MAIL,TECK SEONG AUTO TRADING SDN. BHD.,556057502862,940871W,CRN,MBBEMYKL,A2008,1.0,Automotive,5e708d0b607d9f00010e7eaf,nurindalilah,2020-03-17 08:41:47,danielahmad,2020-06-25 09:43:50,RETAIL,78737,78737,MCM0086266,TECK SEONG AUTO TRADING SDN. BHD.,940871W,CRN,TECK SEONG AUTO TRADING SDN. BHD.,ACTIVE,TECK SEONG AUTO TRADING SDN. BHD.,2011,20210208_merchant,20210208,2021-02-09 02:00:58,sdn_bhd
5cf083dbb5618f4208d3d3c8,,60167710518,tongjun531@gmail.com,Beh Cheng Huat,,,MERCHANT,Hup Choon Sports,"737, Jalan Kulim,",,Bukit Mertajam,Malaysia,14000,Pulau Pinang,BUSINESS,"737, Jalan Kulim,",,Bukit Mertajam,Malaysia,14000,Pulau Pinang,MAIL,Hup Choon Sports,3109486229,PG0154028D,BRN,PBBEMYKL,A1010,1.0,Others,5cf07890b5618f4208d33202,Merchant Ops 5,2018-04-05 08:04:25,Merchant Ops 5,2018-04-05 08:04:25,RETAIL,9023,9023,MCM0016554,HUP CHOON SPORTS,PG0154028D,BRN,Hup Choon Sports,ACTIVE,Hup Choon Sports,1992,20210208_merchant,20210208,2021-02-09 02:00:58,others
5f3faab3ba11df000157a6f6,,60126299808,waikwanenterprise@gmail.com,CHOO KA WAI,930325105881.0,NEW_IC,MERCHANT,WAI KWAN ENTERPRISE Locked control,"NO. 2, JALAN BAWAL 7, TAMAN MUTIARA",,TANJONG SEPAT,MY,42800,Selangor,BUSINESS,"NO. 2, JALAN BAWAL 7, TAMAN MUTIARA",,TANJONG SEPAT,MY,42800,Selangor,MAIL,WAI KWAN ENTERPRISE,514178638933,002353197V,BRN,MBBEMYKL,A2012,1.0,Buying/Retail Trade,5f3faa46a7be010001999911,bazil,2020-08-21 11:06:27,bazil,2020-08-21 11:10:08,RETAIL,98750,98750,MCM0106279,WAI KWAN ENTERPRISE,002353197V,BRN,WAI KWAN ENTERPRISE,ACTIVE,WAI KWAN ENTERPRISE,2014,20210208_merchant,20210208,2021-02-09 02:00:58,others
5f290619e196980001f893c4,,60176239109,farmasiubatku@gmail.com,NURUL SYAMIMI BINTI SAHAROM,930312065606.0,NEW_IC,MERCHANT,SAMILIN ENTERPRISE,"NO 9 JALAN CENGAL 2, TAMAN CENGAL",,TEMERLOH,MY,28000,Pahang,BUSINESS,"NO 9 JALAN CENGAL 2, TAMAN CENGAL",,TEMERLOH,MY,28000,Pahang,MAIL,SAMILIN ENTERPRISE,6028010070289,CT0071290M,BRN,BIMBMYKL,A2012,1.0,Buying/Retail Trade,5f2905cc3f54770001652c98,allyanaziruddin,2020-08-04 06:54:17,allyanaziruddin,2020-08-04 06:54:43,RETAIL,95332,95332,MCM0102861,SAMILIN ENTERPRISE,CT0071290M,BRN,SAMILIN ENTERPRISE,ACTIVE,FARMASI UBATKU,2020,20210208_merchant,20210208,2021-02-09 02:00:58,others
5cf08405b5618f4208d3fe72,,60163333970,elvisinlove@hotmail.com,TEH YEW FEI,,,MERCHANT,ELVIS BENTO CAFE,PT NO. 140515,MUKIM HULU KINTA,IPOH,Malaysia,31650,Perak,BUSINESS,PT NO. 140515,MUKIM HULU KINTA,IPOH,Malaysia,31650,Perak,MAIL,ELVIS BENTO CAFE,508038615289,IP0500615K,BRN,MBBEMYKL,A2028,1.0,Food & Beverage,5cf07994b5618f4208d38d94,Merchant Ops 1,2018-12-04 04:12:34,firdausnasha,2020-10-27 02:26:54,RETAIL,32748,32748,MCM0040279,ELVIS BENTO CAFE,IP0500615K,BRN,ELVIS BENTO CAFE,ACTIVE,ELVIS BENTO CAFE,2018,20210208_merchant,20210208,2021-02-09 02:00:58,others
