In [1]:
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

Mounted at /content/drive


## Install Pyspark

In [2]:
!pip install pyspark

Collecting pyspark
  Downloading pyspark-3.5.0.tar.gz (316.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m316.9/316.9 MB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pyspark
  Building wheel for pyspark (setup.py) ... [?25l[?25hdone
  Created wheel for pyspark: filename=pyspark-3.5.0-py2.py3-none-any.whl size=317425344 sha256=3f3517ca2a7d877534fe9d4ec192e99329663a78911fc9e8d04d88e55929eebd
  Stored in directory: /root/.cache/pip/wheels/41/4e/10/c2cf2467f71c678cfc8a6b9ac9241e5e44a01940da8fbb17fc
Successfully built pyspark
Installing collected packages: pyspark
Successfully installed pyspark-3.5.0


## Set up the environment for SparkSession and SparkContext

In [3]:
from pyspark import SparkContext

In [4]:
from pyspark.sql import SparkSession

# Create a SparkSession
spark = SparkSession.builder.appName("PySpark1").getOrCreate()

# Get the SparkContext from the SparkSession
sc = spark.sparkContext

In [5]:
# Spark version
sc.version

'3.5.0'

In [6]:
# PySpark version
sc.pythonVer

'3.10'

In [7]:
# Master location
sc.master

'local[*]'

Currently, the SparkSession is running in local (it can be either **Local** or Cluster)

## Get familiar with PySpark DataFrame





#### Create a dataframe using SparkSession

In [8]:
# Define animals
animals = [
    ("Cat", 3, 12, "Eat meat", "30 kg"),
    ("Dog", 3, 15, "Eat meat and vegetables", "70 kg"),
    ("Mouse", 5, 2, "Eat grass", "800 grams"),
    ("Camel", 5, 30, "Eat dry vegetables", "400 kg"),
    ("Elephant", 8, 50, "Eat grass", "4 tons"),
]

# Define header (schema)
headers = ("Animal name", "Number of letters", "Age", "Food", "Max Weight")

In [9]:
# Create dataframe using Spark
animal_df = spark.createDataFrame(animals, schema=headers)

In [10]:
animal_df.collect()

[Row(Animal name='Cat', Number of letters=3, Age=12, Food='Eat meat', Max Weight='30 kg'),
 Row(Animal name='Dog', Number of letters=3, Age=15, Food='Eat meat and vegetables', Max Weight='70 kg'),
 Row(Animal name='Mouse', Number of letters=5, Age=2, Food='Eat grass', Max Weight='800 grams'),
 Row(Animal name='Camel', Number of letters=5, Age=30, Food='Eat dry vegetables', Max Weight='400 kg'),
 Row(Animal name='Elephant', Number of letters=8, Age=50, Food='Eat grass', Max Weight='4 tons')]

In [11]:
type(animal_df)

pyspark.sql.dataframe.DataFrame

#### Create a dataframe using CSV/JSON/TXT file

In [12]:
file_path = '/content/drive/My Drive/Research Data/Insect_PT.csv'
insect_df = spark.read.csv(file_path, header=True, inferSchema=True)

In [13]:
insect_df.printSchema()

root
 |-- insectID: integer (nullable = true)
 |-- insect_name: string (nullable = true)
 |-- price: integer (nullable = true)
 |-- crown: integer (nullable = true)
 |-- location: string (nullable = true)
 |-- date_caught: string (nullable = true)
 |-- pole: string (nullable = true)



In [14]:
insect_df.take(6)

[Row(insectID=1, insect_name='nhen thay ma', price=30, crown=None, location='plaza', date_caught='10/17/2023 12:12', pole='DARK'),
 Row(insectID=2, insect_name='buom trang', price=68, crown=None, location='plaza', date_caught='10/17/2023 12:12', pole='DARK'),
 Row(insectID=3, insect_name='hop tien sao', price=100, crown=None, location='camp', date_caught='10/17/2023 14:42', pole='DARK'),
 Row(insectID=4, insect_name='bo hung thay ma', price=62, crown=None, location='camp', date_caught='10/17/2023 14:48', pole='DARK'),
 Row(insectID=5, insect_name='ran', price=20, crown=None, location='camp', date_caught='10/17/2023 14:48', pole='DARK'),
 Row(insectID=6, insect_name='buom vang', price=51, crown=1, location='camp', date_caught='10/17/2023 14:49', pole='DARK')]

#### show() - **Print the Dataframe**

In [15]:
# Show top 20 rows (default = 20)
insect_df.show()

+--------+--------------------+-----+-----+--------+----------------+----+
|insectID|         insect_name|price|crown|location|     date_caught|pole|
+--------+--------------------+-----+-----+--------+----------------+----+
|       1|        nhen thay ma|   30| NULL|   plaza|10/17/2023 12:12|DARK|
|       2|          buom trang|   68| NULL|   plaza|10/17/2023 12:12|DARK|
|       3|        hop tien sao|  100| NULL|    camp|10/17/2023 14:42|DARK|
|       4|     bo hung thay ma|   62| NULL|    camp|10/17/2023 14:48|DARK|
|       5|                 ran|   20| NULL|    camp|10/17/2023 14:48|DARK|
|       6|           buom vang|   51|    1|    camp|10/17/2023 14:49|DARK|
|       7|           buom vang|   26| NULL|    camp|10/17/2023 14:49|DARK|
|       8|              ve sau|   64| NULL|    camp|10/17/2023 14:49|DARK|
|       9|chuon chuon duoi ...|   60| NULL|    camp|10/17/2023 14:49|DARK|
|      10|          buom trang|   69| NULL|    camp|10/17/2023 14:49|DARK|
|      11|          ve sa

In [16]:
# Show 40 rows
insect_df.show(40)

+--------+--------------------+-----+-----+--------+----------------+----+
|insectID|         insect_name|price|crown|location|     date_caught|pole|
+--------+--------------------+-----+-----+--------+----------------+----+
|       1|        nhen thay ma|   30| NULL|   plaza|10/17/2023 12:12|DARK|
|       2|          buom trang|   68| NULL|   plaza|10/17/2023 12:12|DARK|
|       3|        hop tien sao|  100| NULL|    camp|10/17/2023 14:42|DARK|
|       4|     bo hung thay ma|   62| NULL|    camp|10/17/2023 14:48|DARK|
|       5|                 ran|   20| NULL|    camp|10/17/2023 14:48|DARK|
|       6|           buom vang|   51|    1|    camp|10/17/2023 14:49|DARK|
|       7|           buom vang|   26| NULL|    camp|10/17/2023 14:49|DARK|
|       8|              ve sau|   64| NULL|    camp|10/17/2023 14:49|DARK|
|       9|chuon chuon duoi ...|   60| NULL|    camp|10/17/2023 14:49|DARK|
|      10|          buom trang|   69| NULL|    camp|10/17/2023 14:49|DARK|
|      11|          ve sa

#### select() - **Just like SELECT in SQL**

In [17]:
# Store all 'insect_name' from dataset to variable "insect_type"
insect_type = insect_df.select('insect_name')

In [18]:
# Show 20 (default) rows of insect_name
insect_type.show()

+--------------------+
|         insect_name|
+--------------------+
|        nhen thay ma|
|          buom trang|
|        hop tien sao|
|     bo hung thay ma|
|                 ran|
|           buom vang|
|           buom vang|
|              ve sau|
|chuon chuon duoi ...|
|          buom trang|
|          ve sau dau|
|                 ran|
|      oc sen thay ma|
|           chau chau|
|           buom vang|
|chuon chuon duoi ...|
|      chuon chuon ot|
|      chuon chuon ot|
|             cao cao|
|                 ong|
+--------------------+
only showing top 20 rows



In [19]:
# Store all 'price' from dataset to variable "insect_price"
insect_price = insect_df.select('price')

In [20]:
# Show 7 rows of 'price'
insect_price.show(7)

+-----+
|price|
+-----+
|   30|
|   68|
|  100|
|   62|
|   20|
|   51|
|   26|
+-----+
only showing top 7 rows



In [21]:
# Store more than 1 column
insect_df_3col = insect_df.select('insect_name', 'price', 'crown')
insect_df_3col.show(6)

+---------------+-----+-----+
|    insect_name|price|crown|
+---------------+-----+-----+
|   nhen thay ma|   30| NULL|
|     buom trang|   68| NULL|
|   hop tien sao|  100| NULL|
|bo hung thay ma|   62| NULL|
|            ran|   20| NULL|
|      buom vang|   51|    1|
+---------------+-----+-----+
only showing top 6 rows



#### filter() - **Like WHERE in SQL, it is used to get rows based on conditions**

In [22]:
# Get all the insect records with 'price' is exactly 28
insect_df_price28 = insect_df.filter(insect_df.price == 28)
insect_df_price28.show(4)

+--------+------------------+-----+-----+--------+----------------+----+
|insectID|       insect_name|price|crown|location|     date_caught|pole|
+--------+------------------+-----+-----+--------+----------------+----+
|      35|chau chau sung dai|   28| NULL|    city|10/17/2023 15:01|DARK|
|      70|  than lan bao dom|   28| NULL|    city|10/17/2023 16:35|DARK|
|     164|      nhen thay ma|   28| NULL|    camp|10/17/2023 17:14|DARK|
|     217|chau chau sung dai|   28| NULL|    city|10/17/2023 23:33|DARK|
+--------+------------------+-----+-----+--------+----------------+----+
only showing top 4 rows



In [23]:
# Get all the insect records with 'price' is exactly 56 or 500
insect_df_pricex = insect_df.filter((insect_df.price == 56) | (insect_df.price == 500))
insect_df_pricex.show()

+--------+--------------------+-----+-----+--------+----------------+----+
|insectID|         insect_name|price|crown|location|     date_caught|pole|
+--------+--------------------+-----+-----+--------+----------------+----+
|     221|    than lan bao dom|   56| NULL|    city|10/17/2023 23:34|DARK|
|     549|    than lan bao dom|   56| NULL|   plaza|10/18/2023 15:07|NULL|
|     704|chau chau canh thang|   56|    1|hometown|10/18/2023 19:15|NULL|
|     799|    than lan bao dom|   56| NULL|    city|10/18/2023 23:58|NULL|
|    3203|          chuot cong|   56| NULL|   sewer|10/24/2023 17:53|NULL|
|    3271|chuon chuon ngo h...|  500|    1|    city|10/25/2023 15:24|NULL|
+--------+--------------------+-----+-----+--------+----------------+----+



In [24]:
# Get all the insect records with 'price' in one of these range: 54 to 55, 238 to 240, 499 to 501
insect_df_price_range = insect_df.filter(
    ((insect_df.price >= 54) & (insect_df.price <= 55)) | ((insect_df.price >= 238) & (insect_df.price <= 240)) | ((insect_df.price >= 499) & (insect_df.price <= 501)))
insect_df_price_range.show()

+--------+--------------------+-----+-----+--------+----------------+----+
|insectID|         insect_name|price|crown|location|     date_caught|pole|
+--------+--------------------+-----+-----+--------+----------------+----+
|     785|        nhen thay ma|   54|    1|   sewer|10/18/2023 23:50|NULL|
|     924|          chuot cong|   54| NULL|   sewer| 10/19/2023 2:08|NULL|
|     925|          chuot cong|   54| NULL|   sewer| 10/19/2023 2:09|NULL|
|    1001|    than lan bao dom|   55| NULL|   plaza| 10/19/2023 2:38|NULL|
|    1200| buom phuong machaon|  238| NULL|hometown|10/19/2023 16:11|NULL|
|    1802|chau chau canh thang|   55|    1|hometown|10/21/2023 22:46|NULL|
|    2113|    than lan bao dom|   54| NULL|    city|10/22/2023 10:30|NULL|
|    2230|          tac ke mao|  238|    1|    city|10/22/2023 13:31|NULL|
|    3000|    than lan bao dom|   55|    1|    city|10/24/2023 12:28|NULL|
|    3039|chau chau canh thang|   55|    1|hometown|10/24/2023 12:47|NULL|
|    3271|chuon chuon ngo

#### groupby() + count() – **Like COUNT() + GROUP BY in SQL, but stored in a variable**

In [25]:
# Group the table by location, then get the value counts for each group
insect_location_group = insect_df.groupby('location')
insect_location_group.count().show()

+--------+-----+
|location|count|
+--------+-----+
|hometown| 1235|
|   sewer|  419|
|    camp|  785|
|    city| 1233|
|   plaza|  813|
+--------+-----+



In [26]:
# Group the table by insect_name, the get value counts for each group
insect_name_group = insect_df.groupby('insect_name')
insect_name_group.count().show(3)

+------------------+-----+
|       insect_name|count|
+------------------+-----+
|                �e|    2|
|chau chau sung dai|  169|
|    buom xanh holy|  188|
+------------------+-----+
only showing top 3 rows



#### dropDuplicates() – **Remove Duplications based on 1 or more column**

In [27]:
# Remove all records that exist their duplicated "price"
insect_no_price = insect_df.select('price').dropDuplicates()
insect_no_price.show()

+-----+
|price|
+-----+
|  148|
|   31|
|  516|
|   85|
|  137|
|   65|
|   53|
|  133|
|   78|
|  513|
|  108|
|  155|
|   34|
|  211|
|  193|
|  126|
|  115|
|  101|
|   81|
|   28|
+-----+
only showing top 20 rows



In [28]:
# Remove all records that exists their duplicated in all the field "insect_name", "price" and "crown"
insect_no_fields = insect_df.select('insect_name', 'price', 'crown').dropDuplicates()
insect_no_fields.show(4)

+----------------+-----+-----+
|     insect_name|price|crown|
+----------------+-----+-----+
|than lan bao dom|   93|    1|
|      buom trang|  121|    1|
|       chau chau|  135|    1|
|      tac ke mao|  206|    1|
+----------------+-----+-----+
only showing top 4 rows



In [29]:
# Remove all records that exists their duplicated in general
insect_no_dup = insect_df.dropDuplicates()
insect_no_dup.show(6)

+--------+--------------------+-----+-----+--------+----------------+----+
|insectID|         insect_name|price|crown|location|     date_caught|pole|
+--------+--------------------+-----+-----+--------+----------------+----+
|     261|           chau chau|  122|    1|hometown|10/17/2023 23:48|DARK|
|     255|             cao cao|  129|    1|hometown|10/17/2023 23:46|DARK|
|     296|             de nhat|   44|    1|    city| 10/18/2023 0:05|DARK|
|      98|chau chau canh thang|   62|    1|hometown|10/17/2023 16:47|DARK|
|      80|             bo hung|  173|    1|   plaza|10/17/2023 16:38|DARK|
|     291|     buom dem van ho|   49|    1|   plaza| 10/18/2023 0:03|DARK|
+--------+--------------------+-----+-----+--------+----------------+----+
only showing top 6 rows



#### withColumnRenamed() – **Rename a column (Transformations)**

In [30]:
# Insect dataset but with 'insect_name' changed to 'type_of_insect' and 'pole' changed to 'stick'
insect_renamed = insect_df.withColumnRenamed('insect_name', 'type_of_insect')
insect_renamed = insect_renamed.withColumnRenamed('pole', 'stick')
insect_renamed.show(3)

+--------+--------------+-----+-----+--------+----------------+-----+
|insectID|type_of_insect|price|crown|location|     date_caught|stick|
+--------+--------------+-----+-----+--------+----------------+-----+
|       1|  nhen thay ma|   30| NULL|   plaza|10/17/2023 12:12| DARK|
|       2|    buom trang|   68| NULL|   plaza|10/17/2023 12:12| DARK|
|       3|  hop tien sao|  100| NULL|    camp|10/17/2023 14:42| DARK|
+--------+--------------+-----+-----+--------+----------------+-----+
only showing top 3 rows



#### printSchema() - **Print the type and name of all columns**

In [31]:
insect_df.printSchema()

root
 |-- insectID: integer (nullable = true)
 |-- insect_name: string (nullable = true)
 |-- price: integer (nullable = true)
 |-- crown: integer (nullable = true)
 |-- location: string (nullable = true)
 |-- date_caught: string (nullable = true)
 |-- pole: string (nullable = true)



#### orderby() – **Like ORDER BY, but stored in a variable (Transformations)**

In [32]:
# Show the dataset but in ascending 'price' order
insect_price_ascending = insect_df.orderBy('price')
insect_price_ascending.show(3)

+--------+--------------------+-----+-----+--------+----------------+----+
|insectID|         insect_name|price|crown|location|     date_caught|pole|
+--------+--------------------+-----+-----+--------+----------------+----+
|    1138|          buom trang|    0| NULL|    camp|10/19/2023 12:50|NULL|
|      22|              oc sen|   20| NULL|hometown|10/17/2023 14:54|DARK|
|      33|bo canh cung xanh la|   20| NULL|   plaza|10/17/2023 15:00|DARK|
+--------+--------------------+-----+-----+--------+----------------+----+
only showing top 3 rows



In [33]:
# Show the dataset but in descending 'price' order
insect_price_descending = insect_df.orderBy('price', ascending=False)
insect_price_descending.show(3)

+--------+--------------------+-----+-----+--------+----------------+----+
|insectID|         insect_name|price|crown|location|     date_caught|pole|
+--------+--------------------+-----+-----+--------+----------------+----+
|     715| buom phuong machaon|  532|    1|hometown|10/18/2023 23:16|NULL|
|    2639|bo ngua chua thay ma|  520|    1|hometown|10/23/2023 15:44|NULL|
|    4448|bo canh cung khon...|  516| NULL|    camp|11/27/2023 18:49|NULL|
+--------+--------------------+-----+-----+--------+----------------+----+
only showing top 3 rows



#### columns – **Return the name of all columns**

In [34]:
# Show column names
insect_df.columns

['insectID',
 'insect_name',
 'price',
 'crown',
 'location',
 'date_caught',
 'pole']

In [35]:
insect_renamed.columns

['insectID',
 'type_of_insect',
 'price',
 'crown',
 'location',
 'date_caught',
 'stick']

#### describe() - **Exactly the same as "describe" in Python Pandas, which shows summary of numeric columns**

In [36]:
# Show numeric descriptions
insect_df.describe()

DataFrame[summary: string, insectID: string, insect_name: string, price: string, crown: string, location: string, date_caught: string, pole: string]

In this case, all the columns are *string-typed* due to the lack of data cleaning, so the "**describe**" display nothing here.

#### Executing SQL queries using sql()

*Method 1: Execute the query directly*

In [38]:
insect_df.createOrReplaceTempView("insect_table")

In [40]:
# Select the entire table
insect_sql1 = spark.sql("SELECT * FROM insect_table")
insect_sql1.show(5)

+--------+---------------+-----+-----+--------+----------------+----+
|insectID|    insect_name|price|crown|location|     date_caught|pole|
+--------+---------------+-----+-----+--------+----------------+----+
|       1|   nhen thay ma|   30| NULL|   plaza|10/17/2023 12:12|DARK|
|       2|     buom trang|   68| NULL|   plaza|10/17/2023 12:12|DARK|
|       3|   hop tien sao|  100| NULL|    camp|10/17/2023 14:42|DARK|
|       4|bo hung thay ma|   62| NULL|    camp|10/17/2023 14:48|DARK|
|       5|            ran|   20| NULL|    camp|10/17/2023 14:48|DARK|
+--------+---------------+-----+-----+--------+----------------+----+
only showing top 5 rows



In [43]:
# Select 10 records with highest price
insect_sql2 = spark.sql("SELECT * FROM insect_table ORDER BY price DESC")
insect_sql2.show(10)

+--------+--------------------+-----+-----+--------+----------------+----+
|insectID|         insect_name|price|crown|location|     date_caught|pole|
+--------+--------------------+-----+-----+--------+----------------+----+
|     715| buom phuong machaon|  532|    1|hometown|10/18/2023 23:16|NULL|
|    2639|bo ngua chua thay ma|  520|    1|hometown|10/23/2023 15:44|NULL|
|    4448|bo canh cung khon...|  516| NULL|    camp|11/27/2023 18:49|NULL|
|    3444| buom phuong machaon|  513|    1|   plaza| 10/26/2023 2:40|NULL|
|    1060|          tac ke hoa|  512| NULL|    camp|10/19/2023 12:20|NULL|
|    2073|chuon chuon ngo h...|  510|    1|hometown|10/22/2023 10:15|NULL|
|    3171|      bo odontolabis|  509|    1|    camp|10/24/2023 17:28|NULL|
|    4049|        bo ngua chua|  508|    1|    camp|10/31/2023 17:31|NULL|
|     182|        buom thay ma|  504| NULL|   plaza|10/17/2023 17:20|DARK|
|     877|        bo ngua chua|  503|    1|hometown| 10/19/2023 1:46|NULL|
+--------+---------------

In [45]:
# Select all records that has location "camp", with price higher than 400 but lower than 450
insect_sql3 = spark.sql("SELECT * FROM insect_table WHERE location = 'camp' and price > 400 and price < 450")
insect_sql3.show()

+--------+----------------+-----+-----+--------+----------------+----+
|insectID|     insect_name|price|crown|location|     date_caught|pole|
+--------+----------------+-----+-----+--------+----------------+----+
|    4470|than lan co diem|  422| NULL|    camp|11/27/2023 19:12|NULL|
+--------+----------------+-----+-----+--------+----------------+----+



In [52]:
# Select the sum, average, max, min of price for each location group
insect_sql4 = spark.sql("SELECT location, SUM(price), ROUND(AVG(price), 3), MAX(price), MIN(price) FROM insect_table GROUP BY location")
insect_sql4.show()

+--------+----------+--------------------+----------+----------+
|location|sum(price)|round(avg(price), 3)|max(price)|min(price)|
+--------+----------+--------------------+----------+----------+
|hometown|     78065|              63.211|       532|        20|
|   sewer|     24998|              59.661|       170|        20|
|    camp|     49938|              63.615|       516|         0|
|    city|     77453|              62.817|       500|        20|
|   plaza|     53072|              65.279|       513|        20|
+--------+----------+--------------------+----------+----------+



*Method 2: Execute the query as a variable*

In [54]:
insect_df.createOrReplaceTempView("insect_table") #-> this is a MUST, before write any SQL queries in Spark

In [55]:
# Select all records that has location "camp", with price higher than 400 but lower than 450
query1 = '''SELECT * FROM insect_table WHERE location = 'camp' and price > 400 and price < 450'''
spark.sql(query1).show()

+--------+----------------+-----+-----+--------+----------------+----+
|insectID|     insect_name|price|crown|location|     date_caught|pole|
+--------+----------------+-----+-----+--------+----------------+----+
|    4470|than lan co diem|  422| NULL|    camp|11/27/2023 19:12|NULL|
+--------+----------------+-----+-----+--------+----------------+----+



In [56]:
# Select the sum, average, max, min of price for each location group
query2 = '''SELECT location, SUM(price), ROUND(AVG(price), 3), MAX(price), MIN(price) FROM insect_table GROUP BY location'''
spark.sql(query2).show()

+--------+----------+--------------------+----------+----------+
|location|sum(price)|round(avg(price), 3)|max(price)|min(price)|
+--------+----------+--------------------+----------+----------+
|hometown|     78065|              63.211|       532|        20|
|   sewer|     24998|              59.661|       170|        20|
|    camp|     49938|              63.615|       516|         0|
|    city|     77453|              62.817|       500|        20|
|   plaza|     53072|              65.279|       513|        20|
+--------+----------+--------------------+----------+----------+



In [57]:
# Select the top 3 highest price records from every location group
query3 = '''WITH tab AS(
  SELECT RANK() OVER (PARTITION BY location ORDER BY price DESC) AS Ranking, * FROM insect_table
)
SELECT * FROM tab
WHERE Ranking <= 3
'''
spark.sql(query3).show()

+-------+--------+--------------------+-----+-----+--------+----------------+----+
|Ranking|insectID|         insect_name|price|crown|location|     date_caught|pole|
+-------+--------+--------------------+-----+-----+--------+----------------+----+
|      1|    4448|bo canh cung khon...|  516| NULL|    camp|11/27/2023 18:49|NULL|
|      2|    1060|          tac ke hoa|  512| NULL|    camp|10/19/2023 12:20|NULL|
|      3|    3171|      bo odontolabis|  509|    1|    camp|10/24/2023 17:28|NULL|
|      1|    3271|chuon chuon ngo h...|  500|    1|    city|10/25/2023 15:24|NULL|
|      2|    4007|bo ngua hoa phong...|  487| NULL|    city|10/31/2023 17:27|NULL|
|      3|    2663|    than lan co diem|  363| NULL|    city|10/23/2023 15:52|NULL|
|      1|     715| buom phuong machaon|  532|    1|hometown|10/18/2023 23:16|NULL|
|      2|    2639|bo ngua chua thay ma|  520|    1|hometown|10/23/2023 15:44|NULL|
|      3|    2073|chuon chuon ngo h...|  510|    1|hometown|10/22/2023 10:15|NULL|
|   