In [1]:
from pyspark.sql import SparkSession

In [2]:
import os

In [3]:
cwd = os.getcwd()

In [4]:
spark = SparkSession.builder.appName("how to read csv file").getOrCreate()

cases = spark.read.load(cwd+'/Case.csv', format='csv', sep=',',
                       inferSchema="true", header="true")

In [5]:
cases.show(10)

+--------+--------+---------------+-----+--------------------+---------+---------+----------+
| case_id|province|           city|group|      infection_case|confirmed| latitude| longitude|
+--------+--------+---------------+-----+--------------------+---------+---------+----------+
| 1000001|   Seoul|     Yongsan-gu| true|       Itaewon Clubs|      139|37.538621|126.992652|
| 1000002|   Seoul|      Gwanak-gu| true|             Richway|      119| 37.48208|126.901384|
| 1000003|   Seoul|        Guro-gu| true| Guro-gu Call Center|       95|37.508163|126.884387|
| 1000004|   Seoul|   Yangcheon-gu| true|Yangcheon Table T...|       43|37.546061|126.874209|
| 1000005|   Seoul|      Dobong-gu| true|     Day Care Center|       43|37.679422|127.044374|
| 1000006|   Seoul|        Guro-gu| true|Manmin Central Ch...|       41|37.481059|126.894343|
| 1000007|   Seoul|from other city| true|SMR Newly Planted...|       36|        -|         -|
| 1000008|   Seoul|  Dongdaemun-gu| true|       Dongan Churc

In [6]:
cases= cases.drop('group','latitude')

In [7]:
cases.limit(10).toPandas()

Unnamed: 0,case_id,province,city,infection_case,confirmed,longitude
0,1000001,Seoul,Yongsan-gu,Itaewon Clubs,139,126.992652
1,1000002,Seoul,Gwanak-gu,Richway,119,126.901384
2,1000003,Seoul,Guro-gu,Guro-gu Call Center,95,126.884387
3,1000004,Seoul,Yangcheon-gu,Yangcheon Table Tennis Club,43,126.874209
4,1000005,Seoul,Dobong-gu,Day Care Center,43,127.044374
5,1000006,Seoul,Guro-gu,Manmin Central Church,41,126.894343
6,1000007,Seoul,from other city,SMR Newly Planted Churches Group,36,-
7,1000008,Seoul,Dongdaemun-gu,Dongan Church,17,127.056766
8,1000009,Seoul,from other city,Coupang Logistics Center,25,-
9,1000010,Seoul,Gwanak-gu,Wangsung Church,30,126.930121


In [8]:
cases1 = cases.withColumnRenamed("infection_case","infection_source")

In [9]:
from pyspark.sql import functions as f
from pyspark.sql.types import DoubleType, IntegerType, StringType


In [10]:
cases1 = cases1.select([f.col(col).alias(col.replace(' ', '')) for col in cases1.columns])

In [11]:
cases1.columns

['case_id', 'province', 'city', 'infection_source', 'confirmed', 'longitude']

In [12]:
cases1 = cases1.withColumn('confirmed',f.col('confirmed').cast(IntegerType()))
cases1 = cases1.withColumn('case_id',f.col('case_id').cast(IntegerType()))

In [13]:
cases1.dtypes

[('case_id', 'int'),
 ('province', 'string'),
 ('city', 'string'),
 ('infection_source', 'string'),
 ('confirmed', 'int'),
 ('longitude', 'string')]

# adding zero infront of an int

In [14]:
cases1.show(5)

+-------+--------+------------+--------------------+---------+----------+
|case_id|province|        city|    infection_source|confirmed| longitude|
+-------+--------+------------+--------------------+---------+----------+
|1000001|   Seoul|  Yongsan-gu|       Itaewon Clubs|      139|126.992652|
|1000002|   Seoul|   Gwanak-gu|             Richway|      119|126.901384|
|1000003|   Seoul|     Guro-gu| Guro-gu Call Center|       95|126.884387|
|1000004|   Seoul|Yangcheon-gu|Yangcheon Table T...|       43|126.874209|
|1000005|   Seoul|   Dobong-gu|     Day Care Center|       43|127.044374|
+-------+--------+------------+--------------------+---------+----------+
only showing top 5 rows



In [15]:
# cases1 = cases1.withColumn('confirmed1', f.col('confirmed').cast(StringType()))

In [16]:
def change_type_geokey(sdf):
    sdf = sdf.withColumn('confirmed',f.col('confirmed').cast(StringType()))
    return sdf

In [17]:
@f.udf(returnType=StringType())
def get_string_to_length(text):
    nulls_to_fill = 4 - len(text)
    return '0' * nulls_to_fill+text

# cases2 = cases1.withColumn('confirmed', get_string_to_length(f.col('confirmed')))

In [18]:
cases2 = change_type_geokey(cases1).withColumn('confirmed', get_string_to_length(f.col('confirmed')))

In [19]:
cases2.dtypes

[('case_id', 'int'),
 ('province', 'string'),
 ('city', 'string'),
 ('infection_source', 'string'),
 ('confirmed', 'string'),
 ('longitude', 'string')]

In [20]:
cases2 = cases2.withColumn('confirmed', get_string_to_length(f.col('confirmed')))

In [21]:
cases2.show()

+-------+--------+---------------+--------------------+---------+----------+
|case_id|province|           city|    infection_source|confirmed| longitude|
+-------+--------+---------------+--------------------+---------+----------+
|1000001|   Seoul|     Yongsan-gu|       Itaewon Clubs|     0139|126.992652|
|1000002|   Seoul|      Gwanak-gu|             Richway|     0119|126.901384|
|1000003|   Seoul|        Guro-gu| Guro-gu Call Center|     0095|126.884387|
|1000004|   Seoul|   Yangcheon-gu|Yangcheon Table T...|     0043|126.874209|
|1000005|   Seoul|      Dobong-gu|     Day Care Center|     0043|127.044374|
|1000006|   Seoul|        Guro-gu|Manmin Central Ch...|     0041|126.894343|
|1000007|   Seoul|from other city|SMR Newly Planted...|     0036|         -|
|1000008|   Seoul|  Dongdaemun-gu|       Dongan Church|     0017|127.056766|
|1000009|   Seoul|from other city|Coupang Logistics...|     0025|         -|
|1000010|   Seoul|      Gwanak-gu|     Wangsung Church|     0030|126.930121|

In [22]:
cases1.withColumn('case_id_1',f.col('confirmed')*100000+f.col('case_id')).show(5)

+-------+--------+------------+--------------------+---------+----------+---------+
|case_id|province|        city|    infection_source|confirmed| longitude|case_id_1|
+-------+--------+------------+--------------------+---------+----------+---------+
|1000001|   Seoul|  Yongsan-gu|       Itaewon Clubs|      139|126.992652| 14900001|
|1000002|   Seoul|   Gwanak-gu|             Richway|      119|126.901384| 12900002|
|1000003|   Seoul|     Guro-gu| Guro-gu Call Center|       95|126.884387| 10500003|
|1000004|   Seoul|Yangcheon-gu|Yangcheon Table T...|       43|126.874209|  5300004|
|1000005|   Seoul|   Dobong-gu|     Day Care Center|       43|127.044374|  5300005|
+-------+--------+------------+--------------------+---------+----------+---------+
only showing top 5 rows



In [23]:
cases1.show(5)

+-------+--------+------------+--------------------+---------+----------+
|case_id|province|        city|    infection_source|confirmed| longitude|
+-------+--------+------------+--------------------+---------+----------+
|1000001|   Seoul|  Yongsan-gu|       Itaewon Clubs|      139|126.992652|
|1000002|   Seoul|   Gwanak-gu|             Richway|      119|126.901384|
|1000003|   Seoul|     Guro-gu| Guro-gu Call Center|       95|126.884387|
|1000004|   Seoul|Yangcheon-gu|Yangcheon Table T...|       43|126.874209|
|1000005|   Seoul|   Dobong-gu|     Day Care Center|       43|127.044374|
+-------+--------+------------+--------------------+---------+----------+
only showing top 5 rows



# Changing Multiple Columns Together

In [25]:
cases.show()

+--------+--------+---------------+--------------------+---------+----------+
| case_id|province|           city|      infection_case|confirmed| longitude|
+--------+--------+---------------+--------------------+---------+----------+
| 1000001|   Seoul|     Yongsan-gu|       Itaewon Clubs|      139|126.992652|
| 1000002|   Seoul|      Gwanak-gu|             Richway|      119|126.901384|
| 1000003|   Seoul|        Guro-gu| Guro-gu Call Center|       95|126.884387|
| 1000004|   Seoul|   Yangcheon-gu|Yangcheon Table T...|       43|126.874209|
| 1000005|   Seoul|      Dobong-gu|     Day Care Center|       43|127.044374|
| 1000006|   Seoul|        Guro-gu|Manmin Central Ch...|       41|126.894343|
| 1000007|   Seoul|from other city|SMR Newly Planted...|       36|         -|
| 1000008|   Seoul|  Dongdaemun-gu|       Dongan Church|       17|127.056766|
| 1000009|   Seoul|from other city|Coupang Logistics...|       25|         -|
| 1000010|   Seoul|      Gwanak-gu|     Wangsung Church|       3

In [26]:
# changing names of mulitple columns together
cases = cases.toDF(*['case_id', 'province', 'city', 'infection_case', 'confirmed', 'longitude'])

In [27]:
cases.show()

+-------+--------+---------------+--------------------+---------+----------+
|case_id|province|           city|      infection_case|confirmed| longitude|
+-------+--------+---------------+--------------------+---------+----------+
|1000001|   Seoul|     Yongsan-gu|       Itaewon Clubs|      139|126.992652|
|1000002|   Seoul|      Gwanak-gu|             Richway|      119|126.901384|
|1000003|   Seoul|        Guro-gu| Guro-gu Call Center|       95|126.884387|
|1000004|   Seoul|   Yangcheon-gu|Yangcheon Table T...|       43|126.874209|
|1000005|   Seoul|      Dobong-gu|     Day Care Center|       43|127.044374|
|1000006|   Seoul|        Guro-gu|Manmin Central Ch...|       41|126.894343|
|1000007|   Seoul|from other city|SMR Newly Planted...|       36|         -|
|1000008|   Seoul|  Dongdaemun-gu|       Dongan Church|       17|127.056766|
|1000009|   Seoul|from other city|Coupang Logistics...|       25|         -|
|1000010|   Seoul|      Gwanak-gu|     Wangsung Church|       30|126.930121|

# Sort

In [28]:
from pyspark.sql import functions as f
from pyspark.sql.types import DoubleType, IntegerType, StringType

In [29]:
cases.sort(f.desc("confirmed"),f.desc("case_id")).show(5)
# cases.sort("case_id").show(5)

+-------+----------------+---------------+--------------------+---------+---------+
|case_id|        province|           city|      infection_case|confirmed|longitude|
+-------+----------------+---------------+--------------------+---------+---------+
|1200001|           Daegu|         Nam-gu|  Shincheonji Church|     4511| 128.5667|
|1200009|           Daegu|              -|contact with patient|      917|        -|
|1200010|           Daegu|              -|                 etc|      747|        -|
|6000001|Gyeongsangbuk-do|from other city|  Shincheonji Church|      566|        -|
|2000020|     Gyeonggi-do|              -|     overseas inflow|      305|        -|
+-------+----------------+---------------+--------------------+---------+---------+
only showing top 5 rows



In [30]:
cases.select(['case_id', 'city','confirmed']).sort(f.desc('confirmed')).show(5)

+-------+---------------+---------+
|case_id|           city|confirmed|
+-------+---------------+---------+
|1200001|         Nam-gu|     4511|
|1200009|              -|      917|
|1200010|              -|      747|
|6000001|from other city|      566|
|2000020|              -|      305|
+-------+---------------+---------+
only showing top 5 rows



In [31]:
cases = cases.select('case_id','province','city','infection_case','confirmed')

# Cast

In [32]:
cases = cases.withColumn('confirmed',f.col('confirmed').cast(IntegerType()))

In [33]:
cases.show(5)

+-------+--------+------------+--------------------+---------+
|case_id|province|        city|      infection_case|confirmed|
+-------+--------+------------+--------------------+---------+
|1000001|   Seoul|  Yongsan-gu|       Itaewon Clubs|      139|
|1000002|   Seoul|   Gwanak-gu|             Richway|      119|
|1000003|   Seoul|     Guro-gu| Guro-gu Call Center|       95|
|1000004|   Seoul|Yangcheon-gu|Yangcheon Table T...|       43|
|1000005|   Seoul|   Dobong-gu|     Day Care Center|       43|
+-------+--------+------------+--------------------+---------+
only showing top 5 rows



In [35]:
cases = cases.withColumn('category', f.lit('not defined'))

In [37]:
cases.show(5)

+-------+--------+------------+--------------------+---------+-----------+
|case_id|province|        city|      infection_case|confirmed|   category|
+-------+--------+------------+--------------------+---------+-----------+
|1000001|   Seoul|  Yongsan-gu|       Itaewon Clubs|      139|not defined|
|1000002|   Seoul|   Gwanak-gu|             Richway|      119|not defined|
|1000003|   Seoul|     Guro-gu| Guro-gu Call Center|       95|not defined|
|1000004|   Seoul|Yangcheon-gu|Yangcheon Table T...|       43|not defined|
|1000005|   Seoul|   Dobong-gu|     Day Care Center|       43|not defined|
+-------+--------+------------+--------------------+---------+-----------+
only showing top 5 rows



In [38]:
cases.select('category').distinct().collect()

[Row(category='not defined')]

In [39]:
proveince = cases.select('province').distinct().collect()


In [49]:
for i, j in enumerate(proveince):
    print(i, j['province'])

0 Sejong
1 Ulsan
2 Chungcheongbuk-do
3 Gangwon-do
4 Gwangju
5 Gyeongsangbuk-do
6 Daegu
7 Gyeongsangnam-do
8 Incheon
9 Jeju-do
10 Gyeonggi-do
11 Busan
12 Daejeon
13 Seoul
14 Chungcheongnam-do
15 Jeollabuk-do
16 Jeollanam-do


# Filter

In [None]:
cases.filter((f.col("confirmed") > 100) & (f.col("province")=="Seoul")).show(5)

In [None]:
cases.filter((cases.confirmed > 100) & (cases.province =="Seoul")).show(5)

# Group By

In [None]:
from pyspark.sql import functions as f

In [None]:
cases.

In [21]:
cases.groupBy("province").agg(f.sum("confirmed"), f.mean("confirmed"), f.max("confirmed")).show(5)

+-----------------+--------------+-----------------+--------------+
|         province|sum(confirmed)|   avg(confirmed)|max(confirmed)|
+-----------------+--------------+-----------------+--------------+
|           Sejong|            49|8.166666666666666|            31|
|            Ulsan|            51|            12.75|            25|
|Chungcheongbuk-do|            60|8.571428571428571|            13|
|       Gangwon-do|            62|             7.75|            17|
|          Gwangju|            43|              8.6|            23|
+-----------------+--------------+-----------------+--------------+
only showing top 5 rows



In [22]:
cases.groupBy(["province", "city"]).agg(f.sum("confirmed"), f.mean("confirmed"), f.max("confirmed")).show(5)

+----------------+---------------+--------------+------------------+--------------+
|        province|           city|sum(confirmed)|    avg(confirmed)|max(confirmed)|
+----------------+---------------+--------------+------------------+--------------+
|Gyeongsangnam-do|       Jinju-si|             9|               9.0|             9|
|           Seoul|        Guro-gu|           139|46.333333333333336|            95|
|           Seoul|     Gangnam-gu|            18|               4.5|             7|
|         Daejeon|              -|           100|              25.0|            55|
|    Jeollabuk-do|from other city|             6|               2.0|             3|
+----------------+---------------+--------------+------------------+--------------+
only showing top 5 rows



In [23]:
# with aliases
cases.groupBy(["province", "city"]).agg(
    f.sum("confirmed").alias("TotalConfirmed"),
    f.mean("confirmed").alias("AverageConfirmed"), f.max("confirmed")).show(5)

+----------------+---------------+--------------+------------------+--------------+
|        province|           city|TotalConfirmed|  AverageConfirmed|max(confirmed)|
+----------------+---------------+--------------+------------------+--------------+
|Gyeongsangnam-do|       Jinju-si|             9|               9.0|             9|
|           Seoul|        Guro-gu|           139|46.333333333333336|            95|
|           Seoul|     Gangnam-gu|            18|               4.5|             7|
|         Daejeon|              -|           100|              25.0|            55|
|    Jeollabuk-do|from other city|             6|               2.0|             3|
+----------------+---------------+--------------+------------------+--------------+
only showing top 5 rows



# Joins

In [24]:
regions = spark.read.load(cwd+'/Region.csv', format='csv', sep=',',
                       inferSchema="true", header="true")

In [25]:
regions.columns

['code',
 'province',
 'city',
 'latitude',
 'longitude',
 'elementary_school_count',
 'kindergarten_count',
 'university_count',
 'academy_ratio',
 'elderly_population_ratio',
 'elderly_alone_ratio',
 'nursing_home_count']

In [26]:
regions.describe(['code','latitude']).show()

+-------+------------------+------------------+
|summary|              code|          latitude|
+-------+------------------+------------------+
|  count|               244|               244|
|   mean| 32912.09016393442| 36.39699581147539|
| stddev|19373.349735535565|1.0603044400519337|
|    min|             10000|         33.488936|
|    max|             80000|         38.380571|
+-------+------------------+------------------+



In [27]:
regions.dtypes

[('code', 'int'),
 ('province', 'string'),
 ('city', 'string'),
 ('latitude', 'double'),
 ('longitude', 'double'),
 ('elementary_school_count', 'int'),
 ('kindergarten_count', 'int'),
 ('university_count', 'int'),
 ('academy_ratio', 'double'),
 ('elderly_population_ratio', 'double'),
 ('elderly_alone_ratio', 'double'),
 ('nursing_home_count', 'int')]

In [36]:
%%time
cases_regions = cases.join(regions, ['province', 'city'], how = 'left').toPandas()

Wall time: 460 ms


In [29]:
len(cases_regions)

372

# Broadcast Join

In [32]:
# When you have to join a large table(~1 billion) with a much smaller table(~ 100-200)
# A small optimization then you can do when joining on such big tables(assuming the other table is small) 
# is to broadcast the small table to each machine/node when you perform a join.

In [43]:
from pyspark.sql.functions import broadcast

In [44]:
%%time
cases_reg_broad = cases.join(broadcast(regions), ['province','city'],how='left').toPandas()

Wall time: 330 ms


# Use SQL with Dataframes

In [54]:
cases.createOrReplaceTempView('cases_table')

In [55]:
newDF = spark.sql("select * from cases_table where confirmed>100")

In [56]:
newDF.show()

+-------+-----------------+---------------+--------------------+---------+
|case_id|         province|           city|      infection_case|confirmed|
+-------+-----------------+---------------+--------------------+---------+
|1000001|            Seoul|     Yongsan-gu|       Itaewon Clubs|      139|
|1000002|            Seoul|      Gwanak-gu|             Richway|      119|
|1000036|            Seoul|              -|     overseas inflow|      298|
|1000037|            Seoul|              -|contact with patient|      162|
|1200001|            Daegu|         Nam-gu|  Shincheonji Church|     4511|
|1200002|            Daegu|   Dalseong-gun|Second Mi-Ju Hosp...|      196|
|1200003|            Daegu|         Seo-gu|Hansarang Convale...|      124|
|1200004|            Daegu|   Dalseong-gun|Daesil Convalesce...|      101|
|1200009|            Daegu|              -|contact with patient|      917|
|1200010|            Daegu|              -|                 etc|      747|
|2000020|      Gyeonggi-d

# Create New Columns

In [57]:
casesWithNewConfirmed = cases.withColumn("NewConfirmed", 100 + f.col("confirmed"))
casesWithNewConfirmed.show()

+-------+--------+---------------+--------------------+---------+------------+
|case_id|province|           city|      infection_case|confirmed|NewConfirmed|
+-------+--------+---------------+--------------------+---------+------------+
|1000001|   Seoul|     Yongsan-gu|       Itaewon Clubs|      139|         239|
|1000002|   Seoul|      Gwanak-gu|             Richway|      119|         219|
|1000003|   Seoul|        Guro-gu| Guro-gu Call Center|       95|         195|
|1000004|   Seoul|   Yangcheon-gu|Yangcheon Table T...|       43|         143|
|1000005|   Seoul|      Dobong-gu|     Day Care Center|       43|         143|
|1000006|   Seoul|        Guro-gu|Manmin Central Ch...|       41|         141|
|1000007|   Seoul|from other city|SMR Newly Planted...|       36|         136|
|1000008|   Seoul|  Dongdaemun-gu|       Dongan Church|       17|         117|
|1000009|   Seoul|from other city|Coupang Logistics...|       25|         125|
|1000010|   Seoul|      Gwanak-gu|     Wangsung Chur

In [72]:
# casesWithNewConfirmed.withColumn("confirmed_just", f.col('confirmed')).show()

# Spark UDFs

We use spark UDFs if we need a more matured python functionality

In [73]:
import pyspark.sql.functions as F
from pyspark.sql.types import *

In [74]:
def casesHighLow(confirmed):
    if confirmed < 50: 
        return 'low'
    else:
        return 'high'

In [75]:
#convert to a UDF Function by passing in the function and return type of function
casesHighLowUDF = F.udf(casesHighLow, StringType())
CasesWithHighLow = cases.withColumn("HighLow", casesHighLowUDF("confirmed"))
CasesWithHighLow.show()

+-------+--------+---------------+--------------------+---------+-------+
|case_id|province|           city|      infection_case|confirmed|HighLow|
+-------+--------+---------------+--------------------+---------+-------+
|1000001|   Seoul|     Yongsan-gu|       Itaewon Clubs|      139|   high|
|1000002|   Seoul|      Gwanak-gu|             Richway|      119|   high|
|1000003|   Seoul|        Guro-gu| Guro-gu Call Center|       95|   high|
|1000004|   Seoul|   Yangcheon-gu|Yangcheon Table T...|       43|    low|
|1000005|   Seoul|      Dobong-gu|     Day Care Center|       43|    low|
|1000006|   Seoul|        Guro-gu|Manmin Central Ch...|       41|    low|
|1000007|   Seoul|from other city|SMR Newly Planted...|       36|    low|
|1000008|   Seoul|  Dongdaemun-gu|       Dongan Church|       17|    low|
|1000009|   Seoul|from other city|Coupang Logistics...|       25|    low|
|1000010|   Seoul|      Gwanak-gu|     Wangsung Church|       30|    low|
|1000011|   Seoul|   Eunpyeong-gu|Eunp

# Using RDD (Resilient Distributed Dataset)

In [21]:
s = [1,2,3,4,5,6,7,8,9,10]

In [23]:
t = s[::2]

In [24]:
s[::len(t)-1]

[1, 5, 9]

In [71]:
cases.withColumn('case-c',f.col('case_id')*100+f.col('confirmed')).show()

+-------+--------+---------------+--------------------+---------+---------+
|case_id|province|           city|      infection_case|confirmed|   case-c|
+-------+--------+---------------+--------------------+---------+---------+
|1000001|   Seoul|     Yongsan-gu|       Itaewon Clubs|      139|100000239|
|1000002|   Seoul|      Gwanak-gu|             Richway|      119|100000319|
|1000003|   Seoul|        Guro-gu| Guro-gu Call Center|       95|100000395|
|1000004|   Seoul|   Yangcheon-gu|Yangcheon Table T...|       43|100000443|
|1000005|   Seoul|      Dobong-gu|     Day Care Center|       43|100000543|
|1000006|   Seoul|        Guro-gu|Manmin Central Ch...|       41|100000641|
|1000007|   Seoul|from other city|SMR Newly Planted...|       36|100000736|
|1000008|   Seoul|  Dongdaemun-gu|       Dongan Church|       17|100000817|
|1000009|   Seoul|from other city|Coupang Logistics...|       25|100000925|
|1000010|   Seoul|      Gwanak-gu|     Wangsung Church|       30|100001030|
|1000011|   

In [61]:
cases.show()

+-------+--------+---------------+--------------------+---------+
|case_id|province|           city|      infection_case|confirmed|
+-------+--------+---------------+--------------------+---------+
|1000001|   Seoul|     Yongsan-gu|       Itaewon Clubs|      139|
|1000002|   Seoul|      Gwanak-gu|             Richway|      119|
|1000003|   Seoul|        Guro-gu| Guro-gu Call Center|       95|
|1000004|   Seoul|   Yangcheon-gu|Yangcheon Table T...|       43|
|1000005|   Seoul|      Dobong-gu|     Day Care Center|       43|
|1000006|   Seoul|        Guro-gu|Manmin Central Ch...|       41|
|1000007|   Seoul|from other city|SMR Newly Planted...|       36|
|1000008|   Seoul|  Dongdaemun-gu|       Dongan Church|       17|
|1000009|   Seoul|from other city|Coupang Logistics...|       25|
|1000010|   Seoul|      Gwanak-gu|     Wangsung Church|       30|
|1000011|   Seoul|   Eunpyeong-gu|Eunpyeong St. Mar...|       14|
|1000012|   Seoul|   Seongdong-gu|    Seongdong-gu APT|       13|
|1000013| 