In [1]:
# Loading the libraries
library(tidyverse)
library(httr)
library(jsonlite)
library(lubridate)

Registered S3 methods overwritten by 'ggplot2':
  method         from 
  [.quosures     rlang
  c.quosures     rlang
  print.quosures rlang
Registered S3 method overwritten by 'rvest':
  method            from
  read_xml.response xml2
-- Attaching packages --------------------------------------- tidyverse 1.2.1 --
v ggplot2 3.1.1       v purrr   0.3.2  
v tibble  2.1.1       v dplyr   0.8.0.1
v tidyr   0.8.3       v stringr 1.4.0  
v readr   1.3.1       v forcats 0.4.0  
-- Conflicts ------------------------------------------ tidyverse_conflicts() --
x dplyr::filter() masks stats::filter()
x dplyr::lag()    masks stats::lag()

Attaching package: 'jsonlite'

The following object is masked from 'package:purrr':

    flatten


Attaching package: 'lubridate'

The following object is masked from 'package:base':

    date



In [2]:
url <- "https://api.pasarnow.com/api/appProductListNotLoggedIn?page=0&category=CAT-20-06-0000004&subcategory=" # the data from the first API URL 
json <- fromJSON(url) # Scraping the json from the URL
json$pages -> total_pages   # Total number of pages of Sayur products
json$total -> total_products  # Total number of products 
pages <- ceiling(total_products/total_pages) # Per page product listing

In [3]:
# Loop to scrape all the products in every page
for (pages in (0:pages)) {
  print(paste0("Getting data from page:", pages)) # Printing the message of all the pages step by step
  pages_url <- paste0("https://api.pasarnow.com/api/appProductListNotLoggedIn?page=", pages, "&category=CAT-20-06-0000004&subcategory=") # Scraping the data from all the URLs of API
  tmp_json <- fromJSON(pages_url)  # Extracting the JSON format from the every page
  tmp_json$products[["description"]] -> description  # Extracting description of the products
  tmp_json$products[["product_name"]] -> product_name # Extracting product names 
  tmp_json$products[["original_price"]] -> original_price # Extracting original prices of products
  tmp_json$products[["resale_price"]] -> resale_price # Extracting the resale prices of products
  data.frame(product_name, description, original_price, resale_price) -> tmp_df # Saving them in dataframe
  
  # Loop to save all the dataframe from the pages
  if(pages == 0){
    sayur_pasarnow <- data.frame(product_name, description, original_price, resale_price) # the first scraping dataframe
  } else {
    sayur_pasarnow <- rbind(sayur_pasarnow, tmp_df)  # Binding the every new dataframes to the existing ones
  }
}

[1] "Getting data from page:0"
[1] "Getting data from page:1"
[1] "Getting data from page:2"
[1] "Getting data from page:3"
[1] "Getting data from page:4"
[1] "Getting data from page:5"
[1] "Getting data from page:6"
[1] "Getting data from page:7"
[1] "Getting data from page:8"
[1] "Getting data from page:9"
[1] "Getting data from page:10"


In [4]:
scrape_date <- now() # Current Date
date_stamp <- scrape_date %>% format("%y%m%d") # Formatting the date in YYYY-MM-DD format

In [5]:
write_csv(sayur_pasarnow, path = paste0("scrape_sayur_pasarnow_", date_stamp, ".csv")) # Writing the dataframe into CSV file format

In [6]:
read_csv('scrape_sayur_pasarnow_200820.csv')

Parsed with column specification:
cols(
  product_name = col_character(),
  description = col_character(),
  original_price = col_double(),
  resale_price = col_double()
)


product_name,description,original_price,resale_price
Cabe Merah Keriting 1kg,Cabe Merah Keriting 1kg Harga dapat berubah sewaktu-waktu,26800,22000
Timun Lalap 1kg,Timun Lalap 1kg Harga dapat berubah sewaktu-waktu,14800,8900
Cabe Rawit Merah 1kg,Cabe Rawit Merah 1kg Harga dapat berubah sewaktu-waktu,35000,22400
Tomat Reguler 500gr,Tomat Reguler 500gr Harga dapat berubah sewaktu-waktu,10900,7500
Oyong 1kg,Oyong 1kg Harga dapat berubah sewaktu-waktu,17000,14000
Paprika Hijau 1 Pack 3pcs,Paprika Hijau 1 Pack - 3pcs Harga dapat berubah sewaktu-waktu,27700,19900
Cabe Merah Keriting 250gr,Cabe Merah Keriting 250gr Harga dapat berubah sewaktu-waktu,7500,5700
Bawang Putih Bonggol 500gr,Bawang Putih Bonggol/kating utuh 500gr Harga dapat berubah swwaktu-waktu,16000,11800
Kacang Panjang 500gr,Kacang Panjang 500gr Harga dapat berubah sewaktu-waktu,13000,10200
Bawang Putih Kating 500gr,Bawang Putih Kating 500gr Harga dapat berubah sewaktu-waktu,18900,15700
