Skip to content

Commit

Permalink
#5 Add partial logging to skelbiu_lt_scraper
Browse files Browse the repository at this point in the history
  • Loading branch information
zexa committed Oct 12, 2021
1 parent 9ee57e1 commit 61835aa
Show file tree
Hide file tree
Showing 7 changed files with 50 additions and 8 deletions.
7 changes: 7 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions common-scraper/src/common_scraper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,15 +60,15 @@ where
if let Ok(mut potential_listing_share) = potential_listing_share_mutex.lock() {
match potential_listing_share.get() {
SemaphoreShareResult::Red => {
println!("Found RED on thread {}", thread_index);
// println!("Found RED on thread {}", thread_index);
break;
}
SemaphoreShareResult::Green(potential_listing) => {
println!("Found GREEN on thread {}", thread_index);
// println!("Found GREEN on thread {}", thread_index);
potential_listing_option = Some(potential_listing);
}
SemaphoreShareResult::Yellow => {
println!("Found YELLOW on thread {}", thread_index);
// println!("Found YELLOW on thread {}", thread_index);
continue;
}
}
Expand Down
2 changes: 1 addition & 1 deletion common-scraper/src/potential_listing.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use url::Url;

#[derive(Clone)]
#[derive(Debug, Clone)]
pub struct PotentialListing {
listing_url: Url,
listing_page_url: Url,
Expand Down
1 change: 1 addition & 0 deletions skelbiu-lt-scraper/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ reqwest = { version = "0.11.5", features = ["blocking"]}
scraper = "0.12.0"
serde = { version = "1.0.117", features = ["derive"] }
serde_json = "1.0"
slog = "2.7.0"
3 changes: 3 additions & 0 deletions skelbiu-lt-scraper/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
#[macro_use]
extern crate slog;

mod skelbiu_lt_listing;
mod skelbiu_lt_listing_scraper;
mod skelbiu_lt_scraper;
Expand Down
29 changes: 27 additions & 2 deletions skelbiu-lt-scraper/src/skelbiu_lt_listing_scraper.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
use crate::skelbiu_lt_listing::SkelbiuLtListing;
use common_scraper::{ListingScraper, PotentialListing};
use scraper::Selector;
use std::ops::Index;
use slog::Logger;

pub struct SkelbiuLtListingScraper {
logger: Logger,
id_selector: Selector,
title_selector: Selector,
description_selector: Selector,
Expand All @@ -17,6 +18,7 @@ pub struct SkelbiuLtListingScraper {

impl SkelbiuLtListingScraper {
pub fn new(
logger: Logger,
id_selector: &str,
title_selector: &str,
description_selector: &str,
Expand All @@ -38,6 +40,7 @@ impl SkelbiuLtListingScraper {
let price_selector = Selector::parse(price_selector).unwrap();

Self {
logger,
id_selector,
title_selector,
description_selector,
Expand All @@ -53,9 +56,14 @@ impl SkelbiuLtListingScraper {

impl ListingScraper<SkelbiuLtListing> for SkelbiuLtListingScraper {
fn scrape_listing(&self, potential_listing: &PotentialListing) -> Option<SkelbiuLtListing> {
debug!(self.logger, "Started logging {:?}", &potential_listing);

let listing_url = potential_listing.listing_url().to_string();
if let Ok(response) = reqwest::blocking::get(&listing_url) {
debug!(self.logger, "Got response from {}", &listing_url);

let html = scraper::Html::parse_document(response.text().unwrap().as_str());
debug!(self.logger, "Parsed html for {}", &listing_url);

let title = html
.select(&self.title_selector)
Expand All @@ -65,6 +73,7 @@ impl ListingScraper<SkelbiuLtListing> for SkelbiuLtListingScraper {
.collect::<String>()
.trim()
.to_string();
debug!(self.logger, "Found title for {}", &listing_url);

let description = html
.select(&self.description_selector)
Expand All @@ -74,6 +83,7 @@ impl ListingScraper<SkelbiuLtListing> for SkelbiuLtListingScraper {
.collect::<String>()
.trim()
.to_string();
debug!(self.logger, "Found description for {}", &listing_url);

let id = html
.select(&self.id_selector)
Expand All @@ -84,6 +94,7 @@ impl ListingScraper<SkelbiuLtListing> for SkelbiuLtListingScraper {
.replace("ID: ", "")
.trim()
.to_string();
debug!(self.logger, "Found id for {}", &listing_url);

let views = html
.select(&self.view_selector)
Expand All @@ -93,6 +104,7 @@ impl ListingScraper<SkelbiuLtListing> for SkelbiuLtListingScraper {
.collect::<String>()
.trim()
.to_string();
debug!(self.logger, "Found views for {}", &listing_url);

let updated_at = html
.select(&self.updated_at_selector)
Expand All @@ -102,6 +114,7 @@ impl ListingScraper<SkelbiuLtListing> for SkelbiuLtListingScraper {
.collect::<String>()
.trim()
.replace("Atnaujintas ", "");
debug!(self.logger, "Found updated_at for {}", &listing_url);

let liked_amount = html
.select(&self.liked_amount_selector)
Expand All @@ -111,25 +124,37 @@ impl ListingScraper<SkelbiuLtListing> for SkelbiuLtListingScraper {
.collect::<String>()
.trim()
.replace("Įsimintas ", "");
debug!(self.logger, "Found liked_amount for {}", &listing_url);

let mut location = html
.select(&self.location_selector)
.next()
.unwrap_or_else(|| panic!("Could not find location for {}", &listing_url))
.text()
.collect::<String>();
location.truncate(location.find("Siųsti siuntą vos nuo").unwrap());
if let Some(send_index) = location.find("Siųsti siuntą vos nuo") {
location.truncate(send_index);
}
location = location.trim().to_string();
debug!(self.logger, "Found location for {}", &listing_url);

let quality = if let Some(quality) = html.select(&self.quality_selector).next() {
debug!(self.logger, "Found quality for {}", &listing_url);

Some(quality.text().collect::<String>().trim().to_string())
} else {
debug!(self.logger, "Could not find quality for {}", &listing_url);

None
};

let price = if let Some(price) = html.select(&self.price_selector).next() {
debug!(self.logger, "Found price for {}", &listing_url);

Some(price.text().collect::<String>().trim().to_string())
} else {
debug!(self.logger, "Could not find price for {}", &listing_url);

None
};

Expand Down
10 changes: 8 additions & 2 deletions skelbiu-lt-scraper/src/skelbiu_lt_scraper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,19 @@ use crate::skelbiu_lt_listing_scraper::SkelbiuLtListingScraper;
use common_scraper::{
CommonPageScraper, CommonScrapper, ListingScraper, PageScraper, ScraperSettings,
};
use slog::Logger;

pub struct SkelbiuLtScraper {
logger: Logger,
scraper_settings: ScraperSettings,
}

impl SkelbiuLtScraper {
pub fn new(scraper_settings: ScraperSettings) -> Self {
Self { scraper_settings }
pub fn new(logger: Logger, scraper_settings: ScraperSettings) -> Self {
Self {
logger,
scraper_settings,
}
}
}

Expand All @@ -26,6 +31,7 @@ impl CommonScrapper<SkelbiuLtListing> for SkelbiuLtScraper {
fn get_listing_scraper(&self) -> Box<dyn ListingScraper<SkelbiuLtListing>> {
// TODO: Refactor this to use DI & clone
Box::new(SkelbiuLtListingScraper::new(
self.logger.clone(),
".id",
"h1[itemprop=name]",
".description",
Expand Down

0 comments on commit 61835aa

Please sign in to comment.