In [1]:
# Practice web scraping using BS4
# Website: Craigslist - Because I want to get a new PC :)

import requests
from bs4 import BeautifulSoup

In [4]:
# get the data
url = "https://newyork.craigslist.org/d/computers/search/sya"
data = requests.get(url)

# load data into bs4
soup = BeautifulSoup(data.text, "html.parser")

In [35]:
# Look for the result-info <p> tags
result_info = soup.find_all('p', {'class':'result-info'})

# Each result-info tag has one listing
for result in result_info:
    # Title of the post item
    title = result.find('a', {'class':"result-title hdrlnk"}).text
    
    # Price of the post item
    price = result.find('span', {'class':'result-price'}).text
    
    # Use regular expression to get the Year of machine
    import re
    
    # Find the year (20XX)
    year_re = re.compile(r'.*(20\d{2}).*')
    match = year_re.match(title)

    if match:
        year = match.group(1)
    
    # Only want MAC Machines
    if 'mac' in title.lower():
        print('|'.join([price.ljust(10), year.ljust(10), title]))

$300      |2010      |Mid 2010 Apple iMac 27" I7 2.93GHz 16GB 1TB 1GB Video High Sierra
$750      |2016      |Unused 12" MacBook (2016, 1.2 m5/8GB/500GB/1 Battery cycle), MS Office
$750      |2015      |12” Apple Macbook Retina (Early 2015) Space Gray SSD
$550      |2017      |15" Retina MacBook Pro (2.6 QUAD Core i7/8GB/500GB/73 Battery Cycles)
$700      |2015      |2015 MacBook Pro (13"), 2.7GHz i5/8GB/256GB, Model with all PORTS
$750      |2016      |Unused 12" MacBook (2016, 1.2 m5/8GB/500GB/1 Battery cycle), MS Office
$60       |2016      |Apple Macbook Adapters
$375      |2015      |2015 Apple MacBook Air Core i5 1.6GHz 8GB RAM 128GB SSD 13”
$300      |2012      |MacBook Pro 13-Inch Core i5 Retina 2012 - Very good condition
$899      |2012      |Mac mini
$350      |2010      |27 inch Imac (2010)
$655      |2015      |MacBook 2015 - 500GB, Retina Display
$500      |2010      |Mid-2010 21.5" iMac 3.6GHz i5, 8Gb RAM, 600Gb SSD
$65       |2010      |iMac 7.1 20" Apple computer
$1800 

In [49]:
# list to store all the result
search_result = []

def Read_Craigslist(page_url):
    '''
        Helper function
        Put it in a function and loop for a couple of results
        Take a string page_url and check for new mac listings
    '''
    data = requests.get(page_url)

    # load data into bs4
    soup = BeautifulSoup(data.text, "html.parser")
    
    # look for the result-info <p> tags
    result_info = soup.find_all('p', {'class':'result-info'})

    # each result-info tag has one listing
    for result in result_info:
        # title of the post item
        title = result.find('a', {'class':"result-title hdrlnk"}).text

        # price of the post item
        price = result.find('span', {'class':'result-price'}).text
        price = int(price.replace('$', ''))

        # use regular expression to get the Year of machine
        import re

        # find the year (20XX)
        year_re = re.compile(r'.*(20\d{2}).*')
        match = year_re.match(title)

        # only want MAC Machines
        # from experience, usually if it is cheaper than $100
        # It is usually parts, box, or questionable listing.
        title_str = title.lower()
        if 'mac' in title_str and price > 100:
            if match:
                # Extract the year from title
                year = match.group(1)
            else:
                year = "NA"
                
            # determine if what type of machine it is
            if 'imac' in title_str:
                cat = 'desktop'
            elif 'macbook' in title_str:
                cat = 'laptop'
            elif 'mac pro' in title_str:
                cat = 'desktop'
            
            # display result with piple delimiter
            new_item = [cat, price, year, title]
            
            # print the information to the terminal as feedback
            for information in new_item:
                print(str(information).ljust(10), end='')
            # print a new line to space out
            print()
            
            search_result.append(new_item)
            
# get the data
Read_Craigslist("https://newyork.craigslist.org/d/computers/search/sya")
Read_Craigslist("https://newyork.craigslist.org/search/syp?s=120")
Read_Craigslist("https://newyork.craigslist.org/search/syp?s=240")
Read_Craigslist("https://newyork.craigslist.org/search/syp?s=360")
Read_Craigslist("https://newyork.craigslist.org/search/syp?s=480")

laptop    450       2012      Apple MacBook Pro 15" Mid 2012 Laptop Intel I7 2.3ghz 8gb RAM 500gb
desktop   2000      2013      Apple Mac Pro 2013
desktop   220       NA        3  MACPRO MACHINES 1 iMAC 1MACBOOK PRO
laptop    240       2011      13" MacBook Pro 2011 - 2.7GHz Intel Core i7 - 250GB Storage + Charger
laptop    1300      2016      2016 MacBook Pro 15inch
desktop   300       2010      Mid 2010 Apple iMac 27" I7 2.93GHz 16GB 1TB 1GB Video High Sierra
laptop    750       2016      Unused 12" MacBook (2016, 1.2 m5/8GB/500GB/1 Battery cycle), MS Office
laptop    750       2015      12” Apple Macbook Retina (Early 2015) Space Gray SSD
laptop    550       NA        15" Retina MacBook Pro (2.6 QUAD Core i7/8GB/500GB/73 Battery Cycles)
laptop    700       2015      2015 MacBook Pro (13"), 2.7GHz i5/8GB/256GB, Model with all PORTS
laptop    750       2016      Unused 12" MacBook (2016, 1.2 m5/8GB/500GB/1 Battery cycle), MS Office
laptop    375       2015      2015 Apple MacBook Air 

In [50]:
import pandas as pd

header = ['cat', 'price', 'year', 'listing_title']
apple_df = pd.DataFrame(search_result,
                        columns = header)

In [51]:
apple_df

Unnamed: 0,cat,price,year,listing_title
0,laptop,450,2012.0,"Apple MacBook Pro 15"" Mid 2012 Laptop Intel I7..."
1,desktop,2000,2013.0,Apple Mac Pro 2013
2,desktop,220,,3 MACPRO MACHINES 1 iMAC 1MACBOOK PRO
3,laptop,240,2011.0,"13"" MacBook Pro 2011 - 2.7GHz Intel Core i7 - ..."
4,laptop,1300,2016.0,2016 MacBook Pro 15inch
5,desktop,300,2010.0,"Mid 2010 Apple iMac 27"" I7 2.93GHz 16GB 1TB 1G..."
6,laptop,750,2016.0,"Unused 12"" MacBook (2016, 1.2 m5/8GB/500GB/1 B..."
7,laptop,750,2015.0,12” Apple Macbook Retina (Early 2015) Space Gr...
8,laptop,550,,"15"" Retina MacBook Pro (2.6 QUAD Core i7/8GB/5..."
9,laptop,700,2015.0,"2015 MacBook Pro (13""), 2.7GHz i5/8GB/256GB, M..."


In [None]:
# To do tomorrow
# Count how many machines for sale there are
# When are they listed?