## 1- Establishing Connection and Reading Topics from S3

In [0]:
# pyspark functions
from pyspark.sql.functions import *
# URL processing
import urllib

In [0]:
# Define the path to the Delta table
delta_table_path = "dbfs:/user/hive/warehouse/authentication_credentials"

# Read the Delta table to a Spark DataFrame
aws_keys_df = spark.read.format("delta").load(delta_table_path)

In [0]:
# Get the AWS access key and secret key from the spark dataframe
ACCESS_KEY = aws_keys_df.select('Access key ID').collect()[0]['Access key ID']
SECRET_KEY = aws_keys_df.select('Secret access key').collect()[0]['Secret access key']

# Encode the secret key
ENCODED_SECRET_KEY = urllib.parse.quote(string=SECRET_KEY, safe="")

In [0]:

# AWS S3 bucket name
AWS_S3_BUCKET = "user-0affec486183-bucket"
# Mount name for the bucket
MOUNT_PATH = "/mnt/mount_path"
# Source url
SOURCE_URL = "s3n://{0}:{1}@{2}".format(ACCESS_KEY, ENCODED_SECRET_KEY, AWS_S3_BUCKET)
# Mount the drive
try:
  dbutils.fs.unmount(MOUNT_PATH)
except:
  print("Exception: Directory is not mounted!")
finally:
  print("Finally: Mount Directory anyway!")
  dbutils.fs.mount(SOURCE_URL, MOUNT_PATH)

In [0]:
print(SOURCE_URL)

In [0]:
display(dbutils.fs.ls("/mnt/mount_path/topics/../.."))

path,name,size,modificationTime
dbfs:/mnt/ 0abf7f0cd605_Bucket/,0abf7f0cd605_Bucket/,0,0
dbfs:/mnt/0a1667ad2f7f/,0a1667ad2f7f/,0,0
dbfs:/mnt/0a1e5630c127/,0a1e5630c127/,0,1723234171640
dbfs:/mnt/0a3c6c045333/,0a3c6c045333/,0,0
dbfs:/mnt/0a3db223d459_storage/,0a3db223d459_storage/,0,1723234171640
dbfs:/mnt/0aa58e5ad07d-aws-s3-bucket/,0aa58e5ad07d-aws-s3-bucket/,0,0
dbfs:/mnt/0abf7f0cd605/,0abf7f0cd605/,0,0
dbfs:/mnt/0abf7f0cd605_Bucket/,0abf7f0cd605_Bucket/,0,0
dbfs:/mnt/0abf7f0cd605_mount/,0abf7f0cd605_mount/,0,0
dbfs:/mnt/0affc011d3cf-mount/,0affc011d3cf-mount/,0,0


In [0]:
%sql
--Disable format checks during the reading of Delta tables
SET spark.databricks.delta.formatCheck.enabled=false

key,value
spark.databricks.delta.formatCheck.enabled,False


In [0]:
# File location and type
# Asterisk(*) indicates reading all the content of the specified file that have .json extension
file_location = "/mnt/mount_path/topics/0affec486183.pin/partition=0/*.json"
file_type = "json"
# Ask Spark to infer the schema
infer_schema = "true"
# Read in JSONs from mounted S3 bucket
df_pin = spark.read.format(file_type).option("inferSchema", infer_schema).load(file_location)
# Display Spark dataframe to check its content
display(df_pin)

category,description,downloaded,follower_count,image_src,index,is_image_or_video,poster_name,save_location,tag_list,title,unique_id
home-decor,"Традиционные шведские коттеджи, обычно с красным фасадом — это настоящее воплощением идеального зимнего уюта. Они обычно оформлены очень просто и ✌PUFIK. Beautiful Interiors. On…",1,136k,https://i.pinimg.com/originals/32/eb/72/32eb72e4fd8654c115a64528bd1f34b4.png,6717,image,PUFIK Interiors & Inspirations,Local save in /data/home-decor,"Scandinavian Cottage,Swedish Cottage,Swedish Home Decor,Swedish Farmhouse,Swedish Style,Swedish Kitchen,Kitchen Black,Swedish House,Cozy Cottage",〚 Уютные шведские коттеджи от Carina Olander 〛 ◾ Фото ◾ Идеи ◾ Дизайн,bc5ab9ee-505e-44f6-92ba-677fe4fdf3e3
christmas,"Features: Material:Lint Size:48ｘ18cm Quantity:1 pc Shape:Santa Claus, snowman. Elk Occasion:Christmas Description: 1. Fashion design, high quality 2. Santa Claus, snowman. Elk C…",1,5k,https://i.pinimg.com/originals/b5/7f/21/b57f219fa89c1165b57525b8eae711da.jpg,1706,image,Wear24-7,Local save in /data/christmas,"Merry Christmas To You,Christmas Toys,Great Christmas Gifts,Christmas Snowman,Christmas Ornaments,Holiday,Christmas Party Decorations,Christmas Themes,Decoration Party",Standing Figurine Toys Xmas Santa Claus Snowman Reindeer Figure Plush Dolls Christmas Decorations Ornaments Home Indoor Table Ornaments Christmas Party Tree Hanging Decor Toys Gifts for Kids Friends…,b5c8a1b5-9e90-4522-9bec-2477b698d5b7
christmas,"Christmas decorating ideas for porches. Beautiful holiday decor ideas for front porches both small and large. Outdoor decorations like sleds, lanterns, Christmas trees, wreaths,…",1,46k,https://i.pinimg.com/videos/thumbnails/originals/40/83/f5/4083f5b4971bf235f89a4784ab87271e.0000001.jpg,2482,video,"Life on Summerhill | Home, Holiday Decor & DIY Website",Local save in /data/christmas,"Diy Christmas Decorations For Home,Farmhouse Christmas Decor,Christmas Home,Christmas Holidays,Christmas Front Porches,How To Decorate For Christmas,Christmas Porch Ideas,Christmas Decorating Ideas,Large Outdoor Christmas Decorations",FORNT PORCH CHRISTMAS DECORATING IDEAS,08604f20-fa17-4b9a-9949-781717eca6cd
vehicles,"By David Crane ; defrev (at) gmail (dot) com All photos contained in this article were shot by DefenseReview.com (DR), and are copyrighted. DefenseReview.com owns the copyright…",1,709,https://i.pinimg.com/originals/36/63/12/366312d747da1358397610a86bf21b20.jpg,10538,image,Ricky Lee,Local save in /data/vehicles,"Army Vehicles,Armored Vehicles,Cool Trucks,Cool Cars,Amphibious Vehicle,Offroader,Bug Out Vehicle,Vehicle Wraps,Terrain Vehicle",BC Customs (BCC) Search and Rescue Tactical Vehicle-5 (SRTV-5) Baja Racing-Type All-Terrain Combat Vehicle Armed/Weaponized with 7.62mm NATO Garwood Industries (GI) M134G Minigun/Gatling Gun: SXOR…,5d9fa7e2-2118-4442-99b6-537d60463a6a
art,Marble Wall Art Modern Abstract Canvas Artwork Contemporary Home Decor Canvas Wall Art Ready to Hang Canvas Each canvas is professionally printed and hand-stretched in the USA.…,1,305,https://i.pinimg.com/originals/b2/6e/95/b26e950a283805d09ef9a4a279781217.jpg,527,image,Wall Canvas Mall,Local save in /data/art,"Modern Art Paintings,Modern Artwork,Modern Wall Art,Blue Artwork,Modern Canvas Art,Contemporary Home Decor,Modern Art Prints,Framed Canvas Prints,Wall Art Prints","Blue Gold Marble Canvas , Luxury Wall Art, Abstract Wall Decor, Navy Blue Abstract, Modern Artwork, Oversize Canvas Art, Contemporary Art - 1 Panel 12x9 / Gallery Wrap",ed8af037-ee87-4a80-97ac-99f5b153cf7e
event-planning,"Updated: January 25, 2017 You’ve organized some events for your family, friends or community and you have gained a budding reputation for knowing how put events together. You’ve…",1,4k,https://i.pinimg.com/originals/c3/2b/c6/c32bc6ad263857cb0eea19f9cd12beb9.jpg,4357,image,EventPlanning.com | Learn How To Become An Event Planner,Local save in /data/event-planning,"Event Planning Quotes,Event Planning Checklist,Event Planning Business,Business Events,Business Ideas,Business Names,Business Opportunities,Corporate Events,Wedding Event Planner",First Steps in Launching Your Own Event Business - Learn About Event Planning,ccf116e9-9096-4943-a344-1960ce216445
art,If I could only choose one paint brush it would be the angled brush! I am going to break down four separate Techniques I like to use an Angled Paint Brush with a video painting…,1,20k,https://i.pinimg.com/originals/cc/8e/81/cc8e8190f773d5e3bb7d86890b566da7.png,25,image,The Social Easel Online Paint Studio | Video Painting Tutorials,Local save in /data/art,"Fall Canvas Painting,Basic Painting,Acrylic Painting Flowers,Canvas Painting Tutorials,Autumn Painting,Painting Techniques,Diy Painting,Painting & Drawing,Canvas Art",How to use an Angled Paint Brush! Painting Techniques with The Social Easel Online Paint Studio,f19b91c7-2a58-41ae-a013-3806d248baec
christmas,15 unique Christmas porch ideas that will leave you feeling inspired and help you tackle decorating your own entryway for the holidays! It’s almost time to start decorating for…,1,19k,https://i.pinimg.com/originals/ff/f8/3b/fff83b02aeb29e2e9341a56fc5e63345.png,1967,image,Ashley - Modern Glam,Local save in /data/christmas,"Exterior Christmas Lights,Front Door Christmas Decorations,Christmas Lights Outside,Christmas House Lights,Decorating With Christmas Lights,Porch Decorating,Christmas Porch Decorations,Front Porch Ideas For Christmas,Christmas Lights Outdoor Trees",15 Fun & Festive Christmas Porch Ideas,0b9d5b95-51a6-465e-ae4a-2cb68ceada29
finance,"Passive income is a great goal, and nothing is more passive than dividends from stocks. Here's how much money you will need to make $1,000 a year in dividends.",1,9k,https://i.pinimg.com/originals/fa/88/3f/fa883f38e875a08d90f8b37ad46acbd1.png,5130,image,Young Adult Money,Local save in /data/finance,"Financial Tips,Financial Literacy,Financial Planning,Stock Ticker,High Interest Savings Account,Dividend Investing,Energy Saving Tips,Dow Jones Industrial Average,Dividend Stocks","How Much Money You Will Need to Make How Much Money You Will Need to Make $1,000 in Dividends,000 in Dividends | Young Adult Money",82578274-6d14-4c0c-a755-aee515556bdc
christmas,"My favorite 75+ Neutral Christmas Home Decor for decorating your house during the Holiday Season in earth tones and a farmhouse, rustic style all winter. I love this modern, sim…",1,31k,https://i.pinimg.com/originals/86/84/39/868439dd894969e3abd6a2a8a9fe1e9c.jpg,2604,image,Everyday Wholesome,Local save in /data/christmas,"Colorful Christmas Decorations,Colorful Christmas Tree,Christmas Centerpieces,Christmas Colors,Xmas Colors,Winter Decorations,Christmas Trends,Christmas Inspiration,Christmas Home",75+ Neutral Christmas Home Decor for the Holiday Season in Farmhouse Style using Earth Tones Modern,087b0fa9-f901-4262-aa0a-6caf234d1b35


In [0]:
# File location and type
# Asterisk(*) indicates reading all the content of the specified file that have .json extension
file_location = "/mnt/mount_path/topics/0affec486183.user/partition=0/*.json"
file_type = "json"
# Ask Spark to infer the schema
infer_schema = "true"
# Read in JSONs from mounted S3 bucket
df_user= spark.read.format(file_type).option("inferSchema", infer_schema).load(file_location)
# Display Spark dataframe to check its content
display(df_user)

age,date_joined,first_name,ind,last_name
27,2016-03-08 13:38:37,Christopher,2015,Bradshaw
59,2017-05-12 21:22:17,Alexander,10673,Cervantes
48,2016-02-27 16:57:44,Christopher,1857,Hamilton
45,2016-09-15 06:02:53,Christopher,10020,Hawkins
39,2016-06-29 20:43:59,Christina,6398,Davenport
20,2015-10-23 04:13:23,Alexandria,3599,Alvarado
20,2015-10-23 04:13:23,Alexandria,4256,Alvarado
44,2016-12-18 16:05:39,Michelle,1901,Richardson
20,2015-10-23 04:13:23,Alexandria,3831,Alvarado
23,2015-10-31 19:20:09,Alexandria,6602,Anderson


In [0]:
# File location and type
# Asterisk(*) indicates reading all the content of the specified file that have .json extension
file_location = "/mnt/mount_path/topics/0affec486183.geo/partition=0/*.json"
file_type = "json"
# Ask Spark to infer the schema
infer_schema = "true"
# Read in JSONs from mounted S3 bucket
df_geo = spark.read.format(file_type).option("inferSchema", infer_schema).load(file_location)
# Display Spark dataframe to check its content
display(df_geo)

country,ind,latitude,longitude,timestamp
Antarctica (the territory South of 60 deg S),5162,-71.6607,-149.206,2019-09-27 19:06:43
Antarctica (the territory South of 60 deg S),1335,-77.9931,-175.682,2022-03-19 17:29:42
Antarctica (the territory South of 60 deg S),9185,-10.3764,-22.9809,2019-10-06 18:12:55
Antarctica (the territory South of 60 deg S),9335,-88.4642,-171.061,2020-11-14 23:42:22
Saint Vincent and the Grenadines,2301,13.4683,51.7244,2020-11-14 00:25:28
Holy See (Vatican City State),7180,-22.7118,-167.739,2018-04-22 21:33:50
Svalbard & Jan Mayen Islands,10204,-14.3257,26.9087,2020-04-06 02:56:29
French Southern Territories,6014,-26.6026,155.206,2019-04-30 12:33:13
Bouvet Island (Bouvetoya),8677,-84.3984,-144.933,2020-02-12 23:24:01
Bouvet Island (Bouvetoya),9270,-84.3984,-144.933,2022-05-23 03:24:24


## 2- Cleaning Dataframes

In [0]:
# Clean df_pin

In [0]:
type(df_pin)

In [0]:
df_pin.dtypes

In [0]:
Dict_Null = {col:df_pin.filter(df_pin[col].isNull()).count() for col in df_pin.columns}
Dict_Null

In [0]:
# Option 1 - Faster

replace_dict = {
  'tag_list': 'N,o, ,T,a,g,s, ,A,v,a,i,l,a,b,l,e',
  'image_src': 'Image src error.',
  'follower_count': 'User Info Error:',
  'description': 'No description available%',
  'title': 'No Title Data Available',
  'poster_name': 'User Info Error'
}

from pyspark.sql import functions as F
from pyspark.sql.functions import when, col

for key, value in replace_dict.items():
  df_pin = df_pin.withColumn(key, when(col(key).like(value), None).otherwise(col(key)))

In [0]:
# Option 2 - Slower

from pyspark.sql import functions as F

df_pin_clean = df_pin.replace({'N,o, ,T,a,g,s, ,A,v,a,i,l,a,b,l,e': None}, subset=['tag_list']) \
                    .replace({'Image src error.': None}, subset=['image_src']) \
                    .replace({'User Info Error:': None}, subset=['follower_count']) \
                    .replace({'No description available Story format': None}, subset=['description']) \
                    .replace({'No Title Data Available': None}, subset=['title']) \
                    .replace({'User Info Error': None}, subset=['Poster_name'])

def replaceEmptyCols(df, columns:[]):
    for c in columns:
        df = df.withColumn(c, F.when((F.col(c) == '')  | (F.col(c).isNull()), None).otherwise(F.col(c)))
    return df

df_pin_clean = replaceEmptyCols(df_pin_clean, [c for c in df_pin_clean.columns if c not in ['unique_id']]) #.show()
display(df_pin_clean)

category,description,downloaded,follower_count,image_src,index,is_image_or_video,poster_name,save_location,tag_list,title,unique_id
home-decor,"Традиционные шведские коттеджи, обычно с красным фасадом — это настоящее воплощением идеального зимнего уюта. Они обычно оформлены очень просто и ✌PUFIK. Beautiful Interiors. On…",1,136k,https://i.pinimg.com/originals/32/eb/72/32eb72e4fd8654c115a64528bd1f34b4.png,6717,image,PUFIK Interiors & Inspirations,Local save in /data/home-decor,"Scandinavian Cottage,Swedish Cottage,Swedish Home Decor,Swedish Farmhouse,Swedish Style,Swedish Kitchen,Kitchen Black,Swedish House,Cozy Cottage",〚 Уютные шведские коттеджи от Carina Olander 〛 ◾ Фото ◾ Идеи ◾ Дизайн,bc5ab9ee-505e-44f6-92ba-677fe4fdf3e3
christmas,"Features: Material:Lint Size:48ｘ18cm Quantity:1 pc Shape:Santa Claus, snowman. Elk Occasion:Christmas Description: 1. Fashion design, high quality 2. Santa Claus, snowman. Elk C…",1,5k,https://i.pinimg.com/originals/b5/7f/21/b57f219fa89c1165b57525b8eae711da.jpg,1706,image,Wear24-7,Local save in /data/christmas,"Merry Christmas To You,Christmas Toys,Great Christmas Gifts,Christmas Snowman,Christmas Ornaments,Holiday,Christmas Party Decorations,Christmas Themes,Decoration Party",Standing Figurine Toys Xmas Santa Claus Snowman Reindeer Figure Plush Dolls Christmas Decorations Ornaments Home Indoor Table Ornaments Christmas Party Tree Hanging Decor Toys Gifts for Kids Friends…,b5c8a1b5-9e90-4522-9bec-2477b698d5b7
christmas,"Christmas decorating ideas for porches. Beautiful holiday decor ideas for front porches both small and large. Outdoor decorations like sleds, lanterns, Christmas trees, wreaths,…",1,46k,https://i.pinimg.com/videos/thumbnails/originals/40/83/f5/4083f5b4971bf235f89a4784ab87271e.0000001.jpg,2482,video,"Life on Summerhill | Home, Holiday Decor & DIY Website",Local save in /data/christmas,"Diy Christmas Decorations For Home,Farmhouse Christmas Decor,Christmas Home,Christmas Holidays,Christmas Front Porches,How To Decorate For Christmas,Christmas Porch Ideas,Christmas Decorating Ideas,Large Outdoor Christmas Decorations",FORNT PORCH CHRISTMAS DECORATING IDEAS,08604f20-fa17-4b9a-9949-781717eca6cd
vehicles,"By David Crane ; defrev (at) gmail (dot) com All photos contained in this article were shot by DefenseReview.com (DR), and are copyrighted. DefenseReview.com owns the copyright…",1,709,https://i.pinimg.com/originals/36/63/12/366312d747da1358397610a86bf21b20.jpg,10538,image,Ricky Lee,Local save in /data/vehicles,"Army Vehicles,Armored Vehicles,Cool Trucks,Cool Cars,Amphibious Vehicle,Offroader,Bug Out Vehicle,Vehicle Wraps,Terrain Vehicle",BC Customs (BCC) Search and Rescue Tactical Vehicle-5 (SRTV-5) Baja Racing-Type All-Terrain Combat Vehicle Armed/Weaponized with 7.62mm NATO Garwood Industries (GI) M134G Minigun/Gatling Gun: SXOR…,5d9fa7e2-2118-4442-99b6-537d60463a6a
art,Marble Wall Art Modern Abstract Canvas Artwork Contemporary Home Decor Canvas Wall Art Ready to Hang Canvas Each canvas is professionally printed and hand-stretched in the USA.…,1,305,https://i.pinimg.com/originals/b2/6e/95/b26e950a283805d09ef9a4a279781217.jpg,527,image,Wall Canvas Mall,Local save in /data/art,"Modern Art Paintings,Modern Artwork,Modern Wall Art,Blue Artwork,Modern Canvas Art,Contemporary Home Decor,Modern Art Prints,Framed Canvas Prints,Wall Art Prints","Blue Gold Marble Canvas , Luxury Wall Art, Abstract Wall Decor, Navy Blue Abstract, Modern Artwork, Oversize Canvas Art, Contemporary Art - 1 Panel 12x9 / Gallery Wrap",ed8af037-ee87-4a80-97ac-99f5b153cf7e
event-planning,"Updated: January 25, 2017 You’ve organized some events for your family, friends or community and you have gained a budding reputation for knowing how put events together. You’ve…",1,4k,https://i.pinimg.com/originals/c3/2b/c6/c32bc6ad263857cb0eea19f9cd12beb9.jpg,4357,image,EventPlanning.com | Learn How To Become An Event Planner,Local save in /data/event-planning,"Event Planning Quotes,Event Planning Checklist,Event Planning Business,Business Events,Business Ideas,Business Names,Business Opportunities,Corporate Events,Wedding Event Planner",First Steps in Launching Your Own Event Business - Learn About Event Planning,ccf116e9-9096-4943-a344-1960ce216445
art,If I could only choose one paint brush it would be the angled brush! I am going to break down four separate Techniques I like to use an Angled Paint Brush with a video painting…,1,20k,https://i.pinimg.com/originals/cc/8e/81/cc8e8190f773d5e3bb7d86890b566da7.png,25,image,The Social Easel Online Paint Studio | Video Painting Tutorials,Local save in /data/art,"Fall Canvas Painting,Basic Painting,Acrylic Painting Flowers,Canvas Painting Tutorials,Autumn Painting,Painting Techniques,Diy Painting,Painting & Drawing,Canvas Art",How to use an Angled Paint Brush! Painting Techniques with The Social Easel Online Paint Studio,f19b91c7-2a58-41ae-a013-3806d248baec
christmas,15 unique Christmas porch ideas that will leave you feeling inspired and help you tackle decorating your own entryway for the holidays! It’s almost time to start decorating for…,1,19k,https://i.pinimg.com/originals/ff/f8/3b/fff83b02aeb29e2e9341a56fc5e63345.png,1967,image,Ashley - Modern Glam,Local save in /data/christmas,"Exterior Christmas Lights,Front Door Christmas Decorations,Christmas Lights Outside,Christmas House Lights,Decorating With Christmas Lights,Porch Decorating,Christmas Porch Decorations,Front Porch Ideas For Christmas,Christmas Lights Outdoor Trees",15 Fun & Festive Christmas Porch Ideas,0b9d5b95-51a6-465e-ae4a-2cb68ceada29
finance,"Passive income is a great goal, and nothing is more passive than dividends from stocks. Here's how much money you will need to make $1,000 a year in dividends.",1,9k,https://i.pinimg.com/originals/fa/88/3f/fa883f38e875a08d90f8b37ad46acbd1.png,5130,image,Young Adult Money,Local save in /data/finance,"Financial Tips,Financial Literacy,Financial Planning,Stock Ticker,High Interest Savings Account,Dividend Investing,Energy Saving Tips,Dow Jones Industrial Average,Dividend Stocks","How Much Money You Will Need to Make How Much Money You Will Need to Make $1,000 in Dividends,000 in Dividends | Young Adult Money",82578274-6d14-4c0c-a755-aee515556bdc
christmas,"My favorite 75+ Neutral Christmas Home Decor for decorating your house during the Holiday Season in earth tones and a farmhouse, rustic style all winter. I love this modern, sim…",1,31k,https://i.pinimg.com/originals/86/84/39/868439dd894969e3abd6a2a8a9fe1e9c.jpg,2604,image,Everyday Wholesome,Local save in /data/christmas,"Colorful Christmas Decorations,Colorful Christmas Tree,Christmas Centerpieces,Christmas Colors,Xmas Colors,Winter Decorations,Christmas Trends,Christmas Inspiration,Christmas Home",75+ Neutral Christmas Home Decor for the Holiday Season in Farmhouse Style using Earth Tones Modern,087b0fa9-f901-4262-aa0a-6caf234d1b35


In [0]:
# pd.__version__
# import pandas as pd
# Replace empty entries and entries with no relevant data in each column with Nones
#df_pin_clean = df_pin.copy() #pandas
#newDataFrame = oldDataFrame.select('*') #spark


# for series_name, series in df_pin_clean.items():
#   if series.dtype == 'string':
#     df_pin_clean[series_name] = df_pin_clean[series_name].mask(pd.isna(df_pin_clean[series_name]), None).replace({np.inf: None})
#   elif series.dtype == 'bigint':
#     df_pin_clean[series_name] = df_pin_clean.replace({float('nan'): None}).replace({np.inf: None})
#     print(series_name)
#     print(series)
# df_pin_clean = df_pin.where(pd.notnull(df_pin), None).replace({np.inf: None})

# replace_dict = {
#   'tag_list':{'N,o, ,T,a,g,s, ,A,v,a,i,l,a,b,l,e': None},
#   'image_src':{'Image src error.': None},
#   'follower_count':{'User Info Error:': None},
#   'description':{'No description available%': None},
#   'title':{'No Title Data Available': None},
#   'poster_name':{'User Info Error': None}
# }

#df_pin_clean.replace(replace_dict, inplace=True)




In [0]:
# replace k and M with zeros
df_pin = df_pin.withColumn("follower_count", regexp_replace("follower_count", "k", "000"))
df_pin = df_pin.withColumn("follower_count", regexp_replace("follower_count", "M", "000000"))

# convert numeric columns to integer type
df_pin = df_pin.withColumn("follower_count", col("follower_count").cast('int'))
df_pin = df_pin.withColumn("downloaded", col("downloaded").cast('int'))
df_pin = df_pin.withColumn("index", col("index").cast('int'))

In [0]:
display(df_pin)

category,description,downloaded,follower_count,image_src,index,is_image_or_video,poster_name,save_location,tag_list,title,unique_id
home-decor,"Традиционные шведские коттеджи, обычно с красным фасадом — это настоящее воплощением идеального зимнего уюта. Они обычно оформлены очень просто и ✌PUFIK. Beautiful Interiors. On…",1,136k,https://i.pinimg.com/originals/32/eb/72/32eb72e4fd8654c115a64528bd1f34b4.png,6717,image,PUFIK Interiors & Inspirations,Local save in /data/home-decor,"Scandinavian Cottage,Swedish Cottage,Swedish Home Decor,Swedish Farmhouse,Swedish Style,Swedish Kitchen,Kitchen Black,Swedish House,Cozy Cottage",〚 Уютные шведские коттеджи от Carina Olander 〛 ◾ Фото ◾ Идеи ◾ Дизайн,bc5ab9ee-505e-44f6-92ba-677fe4fdf3e3
christmas,"Features: Material:Lint Size:48ｘ18cm Quantity:1 pc Shape:Santa Claus, snowman. Elk Occasion:Christmas Description: 1. Fashion design, high quality 2. Santa Claus, snowman. Elk C…",1,5k,https://i.pinimg.com/originals/b5/7f/21/b57f219fa89c1165b57525b8eae711da.jpg,1706,image,Wear24-7,Local save in /data/christmas,"Merry Christmas To You,Christmas Toys,Great Christmas Gifts,Christmas Snowman,Christmas Ornaments,Holiday,Christmas Party Decorations,Christmas Themes,Decoration Party",Standing Figurine Toys Xmas Santa Claus Snowman Reindeer Figure Plush Dolls Christmas Decorations Ornaments Home Indoor Table Ornaments Christmas Party Tree Hanging Decor Toys Gifts for Kids Friends…,b5c8a1b5-9e90-4522-9bec-2477b698d5b7
christmas,"Christmas decorating ideas for porches. Beautiful holiday decor ideas for front porches both small and large. Outdoor decorations like sleds, lanterns, Christmas trees, wreaths,…",1,46k,https://i.pinimg.com/videos/thumbnails/originals/40/83/f5/4083f5b4971bf235f89a4784ab87271e.0000001.jpg,2482,video,"Life on Summerhill | Home, Holiday Decor & DIY Website",Local save in /data/christmas,"Diy Christmas Decorations For Home,Farmhouse Christmas Decor,Christmas Home,Christmas Holidays,Christmas Front Porches,How To Decorate For Christmas,Christmas Porch Ideas,Christmas Decorating Ideas,Large Outdoor Christmas Decorations",FORNT PORCH CHRISTMAS DECORATING IDEAS,08604f20-fa17-4b9a-9949-781717eca6cd
vehicles,"By David Crane ; defrev (at) gmail (dot) com All photos contained in this article were shot by DefenseReview.com (DR), and are copyrighted. DefenseReview.com owns the copyright…",1,709,https://i.pinimg.com/originals/36/63/12/366312d747da1358397610a86bf21b20.jpg,10538,image,Ricky Lee,Local save in /data/vehicles,"Army Vehicles,Armored Vehicles,Cool Trucks,Cool Cars,Amphibious Vehicle,Offroader,Bug Out Vehicle,Vehicle Wraps,Terrain Vehicle",BC Customs (BCC) Search and Rescue Tactical Vehicle-5 (SRTV-5) Baja Racing-Type All-Terrain Combat Vehicle Armed/Weaponized with 7.62mm NATO Garwood Industries (GI) M134G Minigun/Gatling Gun: SXOR…,5d9fa7e2-2118-4442-99b6-537d60463a6a
art,Marble Wall Art Modern Abstract Canvas Artwork Contemporary Home Decor Canvas Wall Art Ready to Hang Canvas Each canvas is professionally printed and hand-stretched in the USA.…,1,305,https://i.pinimg.com/originals/b2/6e/95/b26e950a283805d09ef9a4a279781217.jpg,527,image,Wall Canvas Mall,Local save in /data/art,"Modern Art Paintings,Modern Artwork,Modern Wall Art,Blue Artwork,Modern Canvas Art,Contemporary Home Decor,Modern Art Prints,Framed Canvas Prints,Wall Art Prints","Blue Gold Marble Canvas , Luxury Wall Art, Abstract Wall Decor, Navy Blue Abstract, Modern Artwork, Oversize Canvas Art, Contemporary Art - 1 Panel 12x9 / Gallery Wrap",ed8af037-ee87-4a80-97ac-99f5b153cf7e
event-planning,"Updated: January 25, 2017 You’ve organized some events for your family, friends or community and you have gained a budding reputation for knowing how put events together. You’ve…",1,4k,https://i.pinimg.com/originals/c3/2b/c6/c32bc6ad263857cb0eea19f9cd12beb9.jpg,4357,image,EventPlanning.com | Learn How To Become An Event Planner,Local save in /data/event-planning,"Event Planning Quotes,Event Planning Checklist,Event Planning Business,Business Events,Business Ideas,Business Names,Business Opportunities,Corporate Events,Wedding Event Planner",First Steps in Launching Your Own Event Business - Learn About Event Planning,ccf116e9-9096-4943-a344-1960ce216445
art,If I could only choose one paint brush it would be the angled brush! I am going to break down four separate Techniques I like to use an Angled Paint Brush with a video painting…,1,20k,https://i.pinimg.com/originals/cc/8e/81/cc8e8190f773d5e3bb7d86890b566da7.png,25,image,The Social Easel Online Paint Studio | Video Painting Tutorials,Local save in /data/art,"Fall Canvas Painting,Basic Painting,Acrylic Painting Flowers,Canvas Painting Tutorials,Autumn Painting,Painting Techniques,Diy Painting,Painting & Drawing,Canvas Art",How to use an Angled Paint Brush! Painting Techniques with The Social Easel Online Paint Studio,f19b91c7-2a58-41ae-a013-3806d248baec
christmas,15 unique Christmas porch ideas that will leave you feeling inspired and help you tackle decorating your own entryway for the holidays! It’s almost time to start decorating for…,1,19k,https://i.pinimg.com/originals/ff/f8/3b/fff83b02aeb29e2e9341a56fc5e63345.png,1967,image,Ashley - Modern Glam,Local save in /data/christmas,"Exterior Christmas Lights,Front Door Christmas Decorations,Christmas Lights Outside,Christmas House Lights,Decorating With Christmas Lights,Porch Decorating,Christmas Porch Decorations,Front Porch Ideas For Christmas,Christmas Lights Outdoor Trees",15 Fun & Festive Christmas Porch Ideas,0b9d5b95-51a6-465e-ae4a-2cb68ceada29
finance,"Passive income is a great goal, and nothing is more passive than dividends from stocks. Here's how much money you will need to make $1,000 a year in dividends.",1,9k,https://i.pinimg.com/originals/fa/88/3f/fa883f38e875a08d90f8b37ad46acbd1.png,5130,image,Young Adult Money,Local save in /data/finance,"Financial Tips,Financial Literacy,Financial Planning,Stock Ticker,High Interest Savings Account,Dividend Investing,Energy Saving Tips,Dow Jones Industrial Average,Dividend Stocks","How Much Money You Will Need to Make How Much Money You Will Need to Make $1,000 in Dividends,000 in Dividends | Young Adult Money",82578274-6d14-4c0c-a755-aee515556bdc
christmas,"My favorite 75+ Neutral Christmas Home Decor for decorating your house during the Holiday Season in earth tones and a farmhouse, rustic style all winter. I love this modern, sim…",1,31k,https://i.pinimg.com/originals/86/84/39/868439dd894969e3abd6a2a8a9fe1e9c.jpg,2604,image,Everyday Wholesome,Local save in /data/christmas,"Colorful Christmas Decorations,Colorful Christmas Tree,Christmas Centerpieces,Christmas Colors,Xmas Colors,Winter Decorations,Christmas Trends,Christmas Inspiration,Christmas Home",75+ Neutral Christmas Home Decor for the Holiday Season in Farmhouse Style using Earth Tones Modern,087b0fa9-f901-4262-aa0a-6caf234d1b35


In [0]:
df_pin.dtypes

In [0]:
# Clean the data in the save_location
df_pin = df_pin.withColumn("save_location", regexp_replace("save_location", "Local save in ", ""))

In [0]:
# Rename inde to ind
df_pin = df_pin.withColumnRenamed("index", "ind")

In [0]:
# Reorder the DataFrame columns
new_column_order_pin = [
    "ind",
    "unique_id",
    "title",
    "description",
    "follower_count",
    "poster_name",
    "tag_list",
    "is_image_or_video",
    "image_src",
    "save_location",
    "category"
]
df_pin = df_pin.select(new_column_order_pin)

In [0]:
display(df_pin)

ind,unique_id,title,description,follower_count,poster_name,tag_list,is_image_or_video,image_src,save_location,category
6717,bc5ab9ee-505e-44f6-92ba-677fe4fdf3e3,〚 Уютные шведские коттеджи от Carina Olander 〛 ◾ Фото ◾ Идеи ◾ Дизайн,"Традиционные шведские коттеджи, обычно с красным фасадом — это настоящее воплощением идеального зимнего уюта. Они обычно оформлены очень просто и ✌PUFIK. Beautiful Interiors. On…",136000.0,PUFIK Interiors & Inspirations,"Scandinavian Cottage,Swedish Cottage,Swedish Home Decor,Swedish Farmhouse,Swedish Style,Swedish Kitchen,Kitchen Black,Swedish House,Cozy Cottage",image,https://i.pinimg.com/originals/32/eb/72/32eb72e4fd8654c115a64528bd1f34b4.png,/data/home-decor,home-decor
1706,b5c8a1b5-9e90-4522-9bec-2477b698d5b7,Standing Figurine Toys Xmas Santa Claus Snowman Reindeer Figure Plush Dolls Christmas Decorations Ornaments Home Indoor Table Ornaments Christmas Party Tree Hanging Decor Toys Gifts for Kids Friends…,"Features: Material:Lint Size:48ｘ18cm Quantity:1 pc Shape:Santa Claus, snowman. Elk Occasion:Christmas Description: 1. Fashion design, high quality 2. Santa Claus, snowman. Elk C…",5000.0,Wear24-7,"Merry Christmas To You,Christmas Toys,Great Christmas Gifts,Christmas Snowman,Christmas Ornaments,Holiday,Christmas Party Decorations,Christmas Themes,Decoration Party",image,https://i.pinimg.com/originals/b5/7f/21/b57f219fa89c1165b57525b8eae711da.jpg,/data/christmas,christmas
2482,08604f20-fa17-4b9a-9949-781717eca6cd,FORNT PORCH CHRISTMAS DECORATING IDEAS,"Christmas decorating ideas for porches. Beautiful holiday decor ideas for front porches both small and large. Outdoor decorations like sleds, lanterns, Christmas trees, wreaths,…",46000.0,"Life on Summerhill | Home, Holiday Decor & DIY Website","Diy Christmas Decorations For Home,Farmhouse Christmas Decor,Christmas Home,Christmas Holidays,Christmas Front Porches,How To Decorate For Christmas,Christmas Porch Ideas,Christmas Decorating Ideas,Large Outdoor Christmas Decorations",video,https://i.pinimg.com/videos/thumbnails/originals/40/83/f5/4083f5b4971bf235f89a4784ab87271e.0000001.jpg,/data/christmas,christmas
10538,5d9fa7e2-2118-4442-99b6-537d60463a6a,BC Customs (BCC) Search and Rescue Tactical Vehicle-5 (SRTV-5) Baja Racing-Type All-Terrain Combat Vehicle Armed/Weaponized with 7.62mm NATO Garwood Industries (GI) M134G Minigun/Gatling Gun: SXOR…,"By David Crane ; defrev (at) gmail (dot) com All photos contained in this article were shot by DefenseReview.com (DR), and are copyrighted. DefenseReview.com owns the copyright…",709.0,Ricky Lee,"Army Vehicles,Armored Vehicles,Cool Trucks,Cool Cars,Amphibious Vehicle,Offroader,Bug Out Vehicle,Vehicle Wraps,Terrain Vehicle",image,https://i.pinimg.com/originals/36/63/12/366312d747da1358397610a86bf21b20.jpg,/data/vehicles,vehicles
527,ed8af037-ee87-4a80-97ac-99f5b153cf7e,"Blue Gold Marble Canvas , Luxury Wall Art, Abstract Wall Decor, Navy Blue Abstract, Modern Artwork, Oversize Canvas Art, Contemporary Art - 1 Panel 12x9 / Gallery Wrap",Marble Wall Art Modern Abstract Canvas Artwork Contemporary Home Decor Canvas Wall Art Ready to Hang Canvas Each canvas is professionally printed and hand-stretched in the USA.…,305.0,Wall Canvas Mall,"Modern Art Paintings,Modern Artwork,Modern Wall Art,Blue Artwork,Modern Canvas Art,Contemporary Home Decor,Modern Art Prints,Framed Canvas Prints,Wall Art Prints",image,https://i.pinimg.com/originals/b2/6e/95/b26e950a283805d09ef9a4a279781217.jpg,/data/art,art
4357,ccf116e9-9096-4943-a344-1960ce216445,First Steps in Launching Your Own Event Business - Learn About Event Planning,"Updated: January 25, 2017 You’ve organized some events for your family, friends or community and you have gained a budding reputation for knowing how put events together. You’ve…",4000.0,EventPlanning.com | Learn How To Become An Event Planner,"Event Planning Quotes,Event Planning Checklist,Event Planning Business,Business Events,Business Ideas,Business Names,Business Opportunities,Corporate Events,Wedding Event Planner",image,https://i.pinimg.com/originals/c3/2b/c6/c32bc6ad263857cb0eea19f9cd12beb9.jpg,/data/event-planning,event-planning
25,f19b91c7-2a58-41ae-a013-3806d248baec,How to use an Angled Paint Brush! Painting Techniques with The Social Easel Online Paint Studio,If I could only choose one paint brush it would be the angled brush! I am going to break down four separate Techniques I like to use an Angled Paint Brush with a video painting…,20000.0,The Social Easel Online Paint Studio | Video Painting Tutorials,"Fall Canvas Painting,Basic Painting,Acrylic Painting Flowers,Canvas Painting Tutorials,Autumn Painting,Painting Techniques,Diy Painting,Painting & Drawing,Canvas Art",image,https://i.pinimg.com/originals/cc/8e/81/cc8e8190f773d5e3bb7d86890b566da7.png,/data/art,art
1967,0b9d5b95-51a6-465e-ae4a-2cb68ceada29,15 Fun & Festive Christmas Porch Ideas,15 unique Christmas porch ideas that will leave you feeling inspired and help you tackle decorating your own entryway for the holidays! It’s almost time to start decorating for…,19000.0,Ashley - Modern Glam,"Exterior Christmas Lights,Front Door Christmas Decorations,Christmas Lights Outside,Christmas House Lights,Decorating With Christmas Lights,Porch Decorating,Christmas Porch Decorations,Front Porch Ideas For Christmas,Christmas Lights Outdoor Trees",image,https://i.pinimg.com/originals/ff/f8/3b/fff83b02aeb29e2e9341a56fc5e63345.png,/data/christmas,christmas
5130,82578274-6d14-4c0c-a755-aee515556bdc,"How Much Money You Will Need to Make How Much Money You Will Need to Make $1,000 in Dividends,000 in Dividends | Young Adult Money","Passive income is a great goal, and nothing is more passive than dividends from stocks. Here's how much money you will need to make $1,000 a year in dividends.",9000.0,Young Adult Money,"Financial Tips,Financial Literacy,Financial Planning,Stock Ticker,High Interest Savings Account,Dividend Investing,Energy Saving Tips,Dow Jones Industrial Average,Dividend Stocks",image,https://i.pinimg.com/originals/fa/88/3f/fa883f38e875a08d90f8b37ad46acbd1.png,/data/finance,finance
2604,087b0fa9-f901-4262-aa0a-6caf234d1b35,75+ Neutral Christmas Home Decor for the Holiday Season in Farmhouse Style using Earth Tones Modern,"My favorite 75+ Neutral Christmas Home Decor for decorating your house during the Holiday Season in earth tones and a farmhouse, rustic style all winter. I love this modern, sim…",31000.0,Everyday Wholesome,"Colorful Christmas Decorations,Colorful Christmas Tree,Christmas Centerpieces,Christmas Colors,Xmas Colors,Winter Decorations,Christmas Trends,Christmas Inspiration,Christmas Home",image,https://i.pinimg.com/originals/86/84/39/868439dd894969e3abd6a2a8a9fe1e9c.jpg,/data/christmas,christmas


In [0]:
df_pin.printSchema()

In [0]:
## df_geo

In [0]:
df_geo.dtypes

In [0]:
# New coordinates column, array with lat,long
from pyspark.sql.functions import array
df_geo = df_geo.withColumn("coordinates", array("latitude", "longitude"))

In [0]:
display(df_geo)

country,ind,latitude,longitude,timestamp,coordinates
Antarctica (the territory South of 60 deg S),5162,-71.6607,-149.206,2019-09-27 19:06:43,"List(-71.6607, -149.206)"
Antarctica (the territory South of 60 deg S),1335,-77.9931,-175.682,2022-03-19 17:29:42,"List(-77.9931, -175.682)"
Antarctica (the territory South of 60 deg S),9185,-10.3764,-22.9809,2019-10-06 18:12:55,"List(-10.3764, -22.9809)"
Antarctica (the territory South of 60 deg S),9335,-88.4642,-171.061,2020-11-14 23:42:22,"List(-88.4642, -171.061)"
Saint Vincent and the Grenadines,2301,13.4683,51.7244,2020-11-14 00:25:28,"List(13.4683, 51.7244)"
Holy See (Vatican City State),7180,-22.7118,-167.739,2018-04-22 21:33:50,"List(-22.7118, -167.739)"
Svalbard & Jan Mayen Islands,10204,-14.3257,26.9087,2020-04-06 02:56:29,"List(-14.3257, 26.9087)"
French Southern Territories,6014,-26.6026,155.206,2019-04-30 12:33:13,"List(-26.6026, 155.206)"
Bouvet Island (Bouvetoya),8677,-84.3984,-144.933,2020-02-12 23:24:01,"List(-84.3984, -144.933)"
Bouvet Island (Bouvetoya),9270,-84.3984,-144.933,2022-05-23 03:24:24,"List(-84.3984, -144.933)"


In [0]:
# drop lat, long
df_geo = df_geo.drop('latitude', 'longitude')

In [0]:
df_geo.printSchema()

In [0]:
# reorder the df)geo
new_column_order_geo = [
    "ind",
    "country",
    "coordinates",
    "timestamp",
]
df_geo = df_geo.select(new_column_order_geo)

In [0]:
display(df_geo)

ind,country,coordinates,timestamp
5162,Antarctica (the territory South of 60 deg S),"List(-71.6607, -149.206)",2019-09-27 19:06:43
1335,Antarctica (the territory South of 60 deg S),"List(-77.9931, -175.682)",2022-03-19 17:29:42
9185,Antarctica (the territory South of 60 deg S),"List(-10.3764, -22.9809)",2019-10-06 18:12:55
9335,Antarctica (the territory South of 60 deg S),"List(-88.4642, -171.061)",2020-11-14 23:42:22
2301,Saint Vincent and the Grenadines,"List(13.4683, 51.7244)",2020-11-14 00:25:28
7180,Holy See (Vatican City State),"List(-22.7118, -167.739)",2018-04-22 21:33:50
10204,Svalbard & Jan Mayen Islands,"List(-14.3257, 26.9087)",2020-04-06 02:56:29
6014,French Southern Territories,"List(-26.6026, 155.206)",2019-04-30 12:33:13
8677,Bouvet Island (Bouvetoya),"List(-84.3984, -144.933)",2020-02-12 23:24:01
9270,Bouvet Island (Bouvetoya),"List(-84.3984, -144.933)",2022-05-23 03:24:24


In [0]:
# Clean df_user

In [0]:
df_user.printSchema()

In [0]:
# create new user_name from first_name and last_name
df_user = df_user.withColumn("user_name", concat("first_name", "last_name"))

In [0]:
# drop first_name and last_name
df_user = df_user.drop('first_name', 'last_name')

In [0]:
# convert date_joined to timestamp
df_user = df_user.withColumn("date_joined", to_timestamp("date_joined"))

In [0]:
# reorder the df)geo
new_column_order_user = [
    "ind",
    "user_name",
    "age",
    "date_joined",
]
df_user = df_user.select(new_column_order_user)

In [0]:
df_user.printSchema()

In [0]:
display(df_user)

ind,user_name,age,date_joined
2015,ChristopherBradshaw,27,2016-03-08T13:38:37.000+0000
10673,AlexanderCervantes,59,2017-05-12T21:22:17.000+0000
1857,ChristopherHamilton,48,2016-02-27T16:57:44.000+0000
10020,ChristopherHawkins,45,2016-09-15T06:02:53.000+0000
6398,ChristinaDavenport,39,2016-06-29T20:43:59.000+0000
3599,AlexandriaAlvarado,20,2015-10-23T04:13:23.000+0000
4256,AlexandriaAlvarado,20,2015-10-23T04:13:23.000+0000
1901,MichelleRichardson,44,2016-12-18T16:05:39.000+0000
3831,AlexandriaAlvarado,20,2015-10-23T04:13:23.000+0000
6602,AlexandriaAnderson,23,2015-10-31T19:20:09.000+0000


In [0]:
## Analysis of the data

In [0]:
# Join to carry out analytics on combined data
df = df_pin.join(df_geo, df_geo['ind'] == df_pin['ind'], how='inner').drop(df_geo['ind'])
df = df.join(df_user, df['ind'] == df_user['ind'], how='inner').drop(df_user['ind'])

In [0]:
df.printSchema()

In [0]:
display(df)

ind,unique_id,title,description,follower_count,poster_name,tag_list,is_image_or_video,image_src,save_location,category,country,coordinates,timestamp,user_name,age,date_joined
6717,bc5ab9ee-505e-44f6-92ba-677fe4fdf3e3,〚 Уютные шведские коттеджи от Carina Olander 〛 ◾ Фото ◾ Идеи ◾ Дизайн,"Традиционные шведские коттеджи, обычно с красным фасадом — это настоящее воплощением идеального зимнего уюта. Они обычно оформлены очень просто и ✌PUFIK. Beautiful Interiors. On…",136000.0,PUFIK Interiors & Inspirations,"Scandinavian Cottage,Swedish Cottage,Swedish Home Decor,Swedish Farmhouse,Swedish Style,Swedish Kitchen,Kitchen Black,Swedish House,Cozy Cottage",image,https://i.pinimg.com/originals/32/eb/72/32eb72e4fd8654c115a64528bd1f34b4.png,/data/home-decor,home-decor,Netherlands,"List(-58.7512, -162.324)",2022-09-07 16:35:00,ElizabethGreen,20,2016-06-21T10:08:09.000+0000
1706,b5c8a1b5-9e90-4522-9bec-2477b698d5b7,Standing Figurine Toys Xmas Santa Claus Snowman Reindeer Figure Plush Dolls Christmas Decorations Ornaments Home Indoor Table Ornaments Christmas Party Tree Hanging Decor Toys Gifts for Kids Friends…,"Features: Material:Lint Size:48ｘ18cm Quantity:1 pc Shape:Santa Claus, snowman. Elk Occasion:Christmas Description: 1. Fashion design, high quality 2. Santa Claus, snowman. Elk C…",5000.0,Wear24-7,"Merry Christmas To You,Christmas Toys,Great Christmas Gifts,Christmas Snowman,Christmas Ornaments,Holiday,Christmas Party Decorations,Christmas Themes,Decoration Party",image,https://i.pinimg.com/originals/b5/7f/21/b57f219fa89c1165b57525b8eae711da.jpg,/data/christmas,christmas,Aruba,"List(-71.5025, -179.257)",2017-11-24 23:36:46,AmyAdams,20,2015-10-24T05:05:28.000+0000
2482,08604f20-fa17-4b9a-9949-781717eca6cd,FORNT PORCH CHRISTMAS DECORATING IDEAS,"Christmas decorating ideas for porches. Beautiful holiday decor ideas for front porches both small and large. Outdoor decorations like sleds, lanterns, Christmas trees, wreaths,…",46000.0,"Life on Summerhill | Home, Holiday Decor & DIY Website","Diy Christmas Decorations For Home,Farmhouse Christmas Decor,Christmas Home,Christmas Holidays,Christmas Front Porches,How To Decorate For Christmas,Christmas Porch Ideas,Christmas Decorating Ideas,Large Outdoor Christmas Decorations",video,https://i.pinimg.com/videos/thumbnails/originals/40/83/f5/4083f5b4971bf235f89a4784ab87271e.0000001.jpg,/data/christmas,christmas,Bermuda,"List(63.4563, -164.709)",2019-09-13 08:20:13,DavidMoss,22,2016-03-01T07:11:48.000+0000
10538,5d9fa7e2-2118-4442-99b6-537d60463a6a,BC Customs (BCC) Search and Rescue Tactical Vehicle-5 (SRTV-5) Baja Racing-Type All-Terrain Combat Vehicle Armed/Weaponized with 7.62mm NATO Garwood Industries (GI) M134G Minigun/Gatling Gun: SXOR…,"By David Crane ; defrev (at) gmail (dot) com All photos contained in this article were shot by DefenseReview.com (DR), and are copyrighted. DefenseReview.com owns the copyright…",709.0,Ricky Lee,"Army Vehicles,Armored Vehicles,Cool Trucks,Cool Cars,Amphibious Vehicle,Offroader,Bug Out Vehicle,Vehicle Wraps,Terrain Vehicle",image,https://i.pinimg.com/originals/36/63/12/366312d747da1358397610a86bf21b20.jpg,/data/vehicles,vehicles,Belarus,"List(-82.4354, 59.1527)",2018-01-29 03:49:45,JordanGrant,42,2016-07-29T00:56:59.000+0000
527,ed8af037-ee87-4a80-97ac-99f5b153cf7e,"Blue Gold Marble Canvas , Luxury Wall Art, Abstract Wall Decor, Navy Blue Abstract, Modern Artwork, Oversize Canvas Art, Contemporary Art - 1 Panel 12x9 / Gallery Wrap",Marble Wall Art Modern Abstract Canvas Artwork Contemporary Home Decor Canvas Wall Art Ready to Hang Canvas Each canvas is professionally printed and hand-stretched in the USA.…,305.0,Wall Canvas Mall,"Modern Art Paintings,Modern Artwork,Modern Wall Art,Blue Artwork,Modern Canvas Art,Contemporary Home Decor,Modern Art Prints,Framed Canvas Prints,Wall Art Prints",image,https://i.pinimg.com/originals/b2/6e/95/b26e950a283805d09ef9a4a279781217.jpg,/data/art,art,Aruba,"List(-75.3, -169.77)",2020-11-15 18:35:20,CarlosDixon,24,2015-11-09T03:47:45.000+0000
4357,ccf116e9-9096-4943-a344-1960ce216445,First Steps in Launching Your Own Event Business - Learn About Event Planning,"Updated: January 25, 2017 You’ve organized some events for your family, friends or community and you have gained a budding reputation for knowing how put events together. You’ve…",4000.0,EventPlanning.com | Learn How To Become An Event Planner,"Event Planning Quotes,Event Planning Checklist,Event Planning Business,Business Events,Business Ideas,Business Names,Business Opportunities,Corporate Events,Wedding Event Planner",image,https://i.pinimg.com/originals/c3/2b/c6/c32bc6ad263857cb0eea19f9cd12beb9.jpg,/data/event-planning,event-planning,Bahamas,"List(-75.4909, -179.908)",2020-03-06 09:56:43,AllisonAdams,22,2015-11-30T10:51:59.000+0000
25,f19b91c7-2a58-41ae-a013-3806d248baec,How to use an Angled Paint Brush! Painting Techniques with The Social Easel Online Paint Studio,If I could only choose one paint brush it would be the angled brush! I am going to break down four separate Techniques I like to use an Angled Paint Brush with a video painting…,20000.0,The Social Easel Online Paint Studio | Video Painting Tutorials,"Fall Canvas Painting,Basic Painting,Acrylic Painting Flowers,Canvas Painting Tutorials,Autumn Painting,Painting Techniques,Diy Painting,Painting & Drawing,Canvas Art",image,https://i.pinimg.com/originals/cc/8e/81/cc8e8190f773d5e3bb7d86890b566da7.png,/data/art,art,Ecuador,"List(-81.3019, 63.8961)",2021-12-02 12:40:33,AmberGray,24,2017-07-01T07:56:15.000+0000
1967,0b9d5b95-51a6-465e-ae4a-2cb68ceada29,15 Fun & Festive Christmas Porch Ideas,15 unique Christmas porch ideas that will leave you feeling inspired and help you tackle decorating your own entryway for the holidays! It’s almost time to start decorating for…,19000.0,Ashley - Modern Glam,"Exterior Christmas Lights,Front Door Christmas Decorations,Christmas Lights Outside,Christmas House Lights,Decorating With Christmas Lights,Porch Decorating,Christmas Porch Decorations,Front Porch Ideas For Christmas,Christmas Lights Outdoor Trees",image,https://i.pinimg.com/originals/ff/f8/3b/fff83b02aeb29e2e9341a56fc5e63345.png,/data/christmas,christmas,Australia,"List(32.74, -179.581)",2021-02-05 10:37:28,JayFlynn,24,2016-05-12T16:28:27.000+0000
5130,82578274-6d14-4c0c-a755-aee515556bdc,"How Much Money You Will Need to Make How Much Money You Will Need to Make $1,000 in Dividends,000 in Dividends | Young Adult Money","Passive income is a great goal, and nothing is more passive than dividends from stocks. Here's how much money you will need to make $1,000 a year in dividends.",9000.0,Young Adult Money,"Financial Tips,Financial Literacy,Financial Planning,Stock Ticker,High Interest Savings Account,Dividend Investing,Energy Saving Tips,Dow Jones Industrial Average,Dividend Stocks",image,https://i.pinimg.com/originals/fa/88/3f/fa883f38e875a08d90f8b37ad46acbd1.png,/data/finance,finance,Grenada,"List(-86.6858, -160.5)",2020-10-24 23:43:15,LindaBlackwell,20,2015-12-11T03:23:37.000+0000
2604,087b0fa9-f901-4262-aa0a-6caf234d1b35,75+ Neutral Christmas Home Decor for the Holiday Season in Farmhouse Style using Earth Tones Modern,"My favorite 75+ Neutral Christmas Home Decor for decorating your house during the Holiday Season in earth tones and a farmhouse, rustic style all winter. I love this modern, sim…",31000.0,Everyday Wholesome,"Colorful Christmas Decorations,Colorful Christmas Tree,Christmas Centerpieces,Christmas Colors,Xmas Colors,Winter Decorations,Christmas Trends,Christmas Inspiration,Christmas Home",image,https://i.pinimg.com/originals/86/84/39/868439dd894969e3abd6a2a8a9fe1e9c.jpg,/data/christmas,christmas,Antigua and Barbuda,"List(-80.8933, -104.972)",2018-12-01 09:23:35,AshleyEvans,30,2016-02-21T12:54:01.000+0000


In [0]:
# Find the most popular Pinterest category people post to based on their country?

from pyspark.sql.window import Window
window_spec = Window.partitionBy("country").orderBy(col('category_count').desc())
#df_grouped = df.withColumn("category_count", count('category').over(window_spec))

df_grouped = df.groupBy("country", "category").agg(count("category").alias("category_count")) \
    .withColumn("rank", row_number().over(window_spec)).filter("rank == 1").drop("rank")


In [0]:
display(df_grouped)

country,category,category_count
Afghanistan,education,5
Albania,mens-fashion,46
Algeria,quotes,8
American Samoa,tattoos,3
Andorra,tattoos,4
Angola,art,1
Anguilla,diy-and-crafts,2
Antarctica (the territory South of 60 deg S),tattoos,2
Antigua and Barbuda,christmas,9
Argentina,tattoos,3


In [0]:
# Find how many posts each category had between 2018 and 2022?

window_spec = Window.partitionBy("post_year").orderBy(col('category_count').desc())
#df_grouped = df.withColumn("category_count", count('category').over(window_spec))

df_grouped = df.withColumn("post_year", year("timestamp")).filter((col("post_year") >= 2018) & (col("post_year") <= 2022)) \
    .groupBy("post_year","category").agg(count("category").alias("category_count"))  \
    .withColumn("rank", row_number().over(window_spec)).filter("rank == 1").drop("rank")

In [0]:
display(df_grouped)

post_year,category,category_count
2018,education,10
2019,travel,12
2020,mens-fashion,52
2021,quotes,9
2022,beauty,16


In [0]:
# For each country find the user with the most followers.
window_spec = Window.partitionBy("country").orderBy(col('follower_count').desc())

df_grouped = df.withColumn("rank", row_number().over(window_spec)) \
    .filter(col("rank") == 1) \
    .select("country", "poster_name", "follower_count")


In [0]:
display(df_grouped)

country,poster_name,follower_count
Afghanistan,9GAG,3000000
Albania,The Minds Journal,5000000
Algeria,YourTango,942000
American Samoa,Mamas Uncut,8000000
Andorra,Teachers Pay Teachers,1000000
Angola,Tastemade,8000000
Anguilla,Thank Your Body,85000
Antarctica (the territory South of 60 deg S),Refinery29,1000000
Antigua and Barbuda,Country Living Magazine,1000000
Argentina,Next Luxury,800000


In [0]:
# Based on the above query, find the country with the user with most followers.
# frist find the highest follower_count across all records. This is the coutn for the user with highest follower_count
max_followers =  df_grouped.select(max("follower_count")).collect()[0][0]
max_followers

In [0]:
# find the user with the highest follower_count by filtering the df_grouped by the max_followers
df_grouped.filter(df_grouped.follower_count == max_followers).show()
# .select("*").where(col("follower_count") == max_followers_all_countries)

#two countries hold a user with the highest follower_count

In [0]:
# What is the most popular category people post to based on the following age groups:

# 18-24
# 25-35
# 36-50
# +50

# first construct age groupings column
df = df.withColumn("age_group", when(col("age") <= 24, "18-24") \
    .when((col("age") > 24) & (col("age") <= 35), "25-35") \
        .when((col("age") > 35) & (col("age") <= 50), "36-50") \
            .when(col("age") > 50, "+50") \
                .otherwise(None))


In [0]:
display(df)

ind,unique_id,title,description,follower_count,poster_name,tag_list,is_image_or_video,image_src,save_location,category,country,coordinates,timestamp,user_name,age,date_joined,age_group
6717,bc5ab9ee-505e-44f6-92ba-677fe4fdf3e3,〚 Уютные шведские коттеджи от Carina Olander 〛 ◾ Фото ◾ Идеи ◾ Дизайн,"Традиционные шведские коттеджи, обычно с красным фасадом — это настоящее воплощением идеального зимнего уюта. Они обычно оформлены очень просто и ✌PUFIK. Beautiful Interiors. On…",136000.0,PUFIK Interiors & Inspirations,"Scandinavian Cottage,Swedish Cottage,Swedish Home Decor,Swedish Farmhouse,Swedish Style,Swedish Kitchen,Kitchen Black,Swedish House,Cozy Cottage",image,https://i.pinimg.com/originals/32/eb/72/32eb72e4fd8654c115a64528bd1f34b4.png,/data/home-decor,home-decor,Netherlands,"List(-58.7512, -162.324)",2022-09-07 16:35:00,ElizabethGreen,20,2016-06-21T10:08:09.000+0000,18-24
1706,b5c8a1b5-9e90-4522-9bec-2477b698d5b7,Standing Figurine Toys Xmas Santa Claus Snowman Reindeer Figure Plush Dolls Christmas Decorations Ornaments Home Indoor Table Ornaments Christmas Party Tree Hanging Decor Toys Gifts for Kids Friends…,"Features: Material:Lint Size:48ｘ18cm Quantity:1 pc Shape:Santa Claus, snowman. Elk Occasion:Christmas Description: 1. Fashion design, high quality 2. Santa Claus, snowman. Elk C…",5000.0,Wear24-7,"Merry Christmas To You,Christmas Toys,Great Christmas Gifts,Christmas Snowman,Christmas Ornaments,Holiday,Christmas Party Decorations,Christmas Themes,Decoration Party",image,https://i.pinimg.com/originals/b5/7f/21/b57f219fa89c1165b57525b8eae711da.jpg,/data/christmas,christmas,Aruba,"List(-71.5025, -179.257)",2017-11-24 23:36:46,AmyAdams,20,2015-10-24T05:05:28.000+0000,18-24
2482,08604f20-fa17-4b9a-9949-781717eca6cd,FORNT PORCH CHRISTMAS DECORATING IDEAS,"Christmas decorating ideas for porches. Beautiful holiday decor ideas for front porches both small and large. Outdoor decorations like sleds, lanterns, Christmas trees, wreaths,…",46000.0,"Life on Summerhill | Home, Holiday Decor & DIY Website","Diy Christmas Decorations For Home,Farmhouse Christmas Decor,Christmas Home,Christmas Holidays,Christmas Front Porches,How To Decorate For Christmas,Christmas Porch Ideas,Christmas Decorating Ideas,Large Outdoor Christmas Decorations",video,https://i.pinimg.com/videos/thumbnails/originals/40/83/f5/4083f5b4971bf235f89a4784ab87271e.0000001.jpg,/data/christmas,christmas,Bermuda,"List(63.4563, -164.709)",2019-09-13 08:20:13,DavidMoss,22,2016-03-01T07:11:48.000+0000,18-24
10538,5d9fa7e2-2118-4442-99b6-537d60463a6a,BC Customs (BCC) Search and Rescue Tactical Vehicle-5 (SRTV-5) Baja Racing-Type All-Terrain Combat Vehicle Armed/Weaponized with 7.62mm NATO Garwood Industries (GI) M134G Minigun/Gatling Gun: SXOR…,"By David Crane ; defrev (at) gmail (dot) com All photos contained in this article were shot by DefenseReview.com (DR), and are copyrighted. DefenseReview.com owns the copyright…",709.0,Ricky Lee,"Army Vehicles,Armored Vehicles,Cool Trucks,Cool Cars,Amphibious Vehicle,Offroader,Bug Out Vehicle,Vehicle Wraps,Terrain Vehicle",image,https://i.pinimg.com/originals/36/63/12/366312d747da1358397610a86bf21b20.jpg,/data/vehicles,vehicles,Belarus,"List(-82.4354, 59.1527)",2018-01-29 03:49:45,JordanGrant,42,2016-07-29T00:56:59.000+0000,36-50
527,ed8af037-ee87-4a80-97ac-99f5b153cf7e,"Blue Gold Marble Canvas , Luxury Wall Art, Abstract Wall Decor, Navy Blue Abstract, Modern Artwork, Oversize Canvas Art, Contemporary Art - 1 Panel 12x9 / Gallery Wrap",Marble Wall Art Modern Abstract Canvas Artwork Contemporary Home Decor Canvas Wall Art Ready to Hang Canvas Each canvas is professionally printed and hand-stretched in the USA.…,305.0,Wall Canvas Mall,"Modern Art Paintings,Modern Artwork,Modern Wall Art,Blue Artwork,Modern Canvas Art,Contemporary Home Decor,Modern Art Prints,Framed Canvas Prints,Wall Art Prints",image,https://i.pinimg.com/originals/b2/6e/95/b26e950a283805d09ef9a4a279781217.jpg,/data/art,art,Aruba,"List(-75.3, -169.77)",2020-11-15 18:35:20,CarlosDixon,24,2015-11-09T03:47:45.000+0000,18-24
4357,ccf116e9-9096-4943-a344-1960ce216445,First Steps in Launching Your Own Event Business - Learn About Event Planning,"Updated: January 25, 2017 You’ve organized some events for your family, friends or community and you have gained a budding reputation for knowing how put events together. You’ve…",4000.0,EventPlanning.com | Learn How To Become An Event Planner,"Event Planning Quotes,Event Planning Checklist,Event Planning Business,Business Events,Business Ideas,Business Names,Business Opportunities,Corporate Events,Wedding Event Planner",image,https://i.pinimg.com/originals/c3/2b/c6/c32bc6ad263857cb0eea19f9cd12beb9.jpg,/data/event-planning,event-planning,Bahamas,"List(-75.4909, -179.908)",2020-03-06 09:56:43,AllisonAdams,22,2015-11-30T10:51:59.000+0000,18-24
25,f19b91c7-2a58-41ae-a013-3806d248baec,How to use an Angled Paint Brush! Painting Techniques with The Social Easel Online Paint Studio,If I could only choose one paint brush it would be the angled brush! I am going to break down four separate Techniques I like to use an Angled Paint Brush with a video painting…,20000.0,The Social Easel Online Paint Studio | Video Painting Tutorials,"Fall Canvas Painting,Basic Painting,Acrylic Painting Flowers,Canvas Painting Tutorials,Autumn Painting,Painting Techniques,Diy Painting,Painting & Drawing,Canvas Art",image,https://i.pinimg.com/originals/cc/8e/81/cc8e8190f773d5e3bb7d86890b566da7.png,/data/art,art,Ecuador,"List(-81.3019, 63.8961)",2021-12-02 12:40:33,AmberGray,24,2017-07-01T07:56:15.000+0000,18-24
1967,0b9d5b95-51a6-465e-ae4a-2cb68ceada29,15 Fun & Festive Christmas Porch Ideas,15 unique Christmas porch ideas that will leave you feeling inspired and help you tackle decorating your own entryway for the holidays! It’s almost time to start decorating for…,19000.0,Ashley - Modern Glam,"Exterior Christmas Lights,Front Door Christmas Decorations,Christmas Lights Outside,Christmas House Lights,Decorating With Christmas Lights,Porch Decorating,Christmas Porch Decorations,Front Porch Ideas For Christmas,Christmas Lights Outdoor Trees",image,https://i.pinimg.com/originals/ff/f8/3b/fff83b02aeb29e2e9341a56fc5e63345.png,/data/christmas,christmas,Australia,"List(32.74, -179.581)",2021-02-05 10:37:28,JayFlynn,24,2016-05-12T16:28:27.000+0000,18-24
5130,82578274-6d14-4c0c-a755-aee515556bdc,"How Much Money You Will Need to Make How Much Money You Will Need to Make $1,000 in Dividends,000 in Dividends | Young Adult Money","Passive income is a great goal, and nothing is more passive than dividends from stocks. Here's how much money you will need to make $1,000 a year in dividends.",9000.0,Young Adult Money,"Financial Tips,Financial Literacy,Financial Planning,Stock Ticker,High Interest Savings Account,Dividend Investing,Energy Saving Tips,Dow Jones Industrial Average,Dividend Stocks",image,https://i.pinimg.com/originals/fa/88/3f/fa883f38e875a08d90f8b37ad46acbd1.png,/data/finance,finance,Grenada,"List(-86.6858, -160.5)",2020-10-24 23:43:15,LindaBlackwell,20,2015-12-11T03:23:37.000+0000,18-24
2604,087b0fa9-f901-4262-aa0a-6caf234d1b35,75+ Neutral Christmas Home Decor for the Holiday Season in Farmhouse Style using Earth Tones Modern,"My favorite 75+ Neutral Christmas Home Decor for decorating your house during the Holiday Season in earth tones and a farmhouse, rustic style all winter. I love this modern, sim…",31000.0,Everyday Wholesome,"Colorful Christmas Decorations,Colorful Christmas Tree,Christmas Centerpieces,Christmas Colors,Xmas Colors,Winter Decorations,Christmas Trends,Christmas Inspiration,Christmas Home",image,https://i.pinimg.com/originals/86/84/39/868439dd894969e3abd6a2a8a9fe1e9c.jpg,/data/christmas,christmas,Antigua and Barbuda,"List(-80.8933, -104.972)",2018-12-01 09:23:35,AshleyEvans,30,2016-02-21T12:54:01.000+0000,25-35


In [0]:
# now we can find popular category per age group
window_spec = Window.partitionBy("age_group").orderBy(desc("category_count"))
df_grouped = df.groupBy("age_group", "category").agg(count("category").alias("category_count")) \
    .withColumn("rank", row_number().over(window_spec)).filter(col("rank") == 1).drop("rank")


In [0]:
display(df_grouped)

age_group,category,category_count
+50,beauty,5
18-24,mens-fashion,58
25-35,art,11
36-50,travel,12


In [0]:
# median follower count for users in their age groups
df.groupBy("age_group").agg(percentile_approx("follower_count", 0.5).alias("median_follower_count")).show()

In [0]:
# Find how many users have joined between 2015 and 2020

window_spec = Window.partitionBy("post_year").orderBy(col('category_count').desc())
#df_grouped = df.withColumn("category_count", count('category').over(window_spec))

df_grouped = df.withColumn("post_year", year("date_joined")).filter((col("post_year") >= 2015) & (col("post_year") <= 2020)) \
    .groupBy("post_year").agg(countDistinct("poster_name").alias("number_users_joined"))

In [0]:
display(df_grouped)

post_year,number_users_joined
2015,91
2016,163
2017,58


In [0]:
# Find the median follower count of users have joined between 2015 and 2020.

df.select('poster_name','date_joined', 'follower_count').distinct() \
    .withColumn("post_year", year("date_joined")).filter((col("post_year") >= 2015) & (col("post_year") <= 2020)) \
    .groupBy("post_year").agg(percentile_approx("follower_count", 0.5).alias("median_follower_count")).show()

In [0]:
df.select('poster_name','date_joined', 'follower_count', 'age_group').distinct() \
    .withColumn("post_year", year("date_joined")).filter((col("post_year") >= 2015) & (col("post_year") <= 2020)) \
    .groupBy("post_year","age_group").agg(percentile_approx("follower_count", 0.5).alias("median_follower_count")).show()

In [0]:
dbutils.fs.unmount(MOUNT_PATH)