In [1]:
import re, time, collections
import requests
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup

## Goal: match data from <https://webrobots.io/kickstarter-datasets/>

...then script the process, in functions or classes that accept a project's url as input. 
- <https://docs.python.org/3/tutorial/classes.html#class-objects>
- <https://docs.python-requests.org/en/master/user/quickstart/#response-content>
- <https://www.crummy.com/software/BeautifulSoup/bs4/doc/#making-the-soup>
- <https://www.crummy.com/software/BeautifulSoup/bs4/doc/#searching-the-tree>
- <https://www.crummy.com/software/BeautifulSoup/bs4/doc/#navigating-the-tree>
- <https://www.crummy.com/software/BeautifulSoup/bs4/doc/#find-all>

In [256]:
class KickSoup:
    """retrieve kickstarter data for a specified project url; requires 'requests' and 'bs4.BeautifulSoup'"""

    def __init__(self, url):

        r = requests.get(url)
        soup = BeautifulSoup(r.content, 'html.parser')

        self.page_title = soup.title.text

        # self.project_name = soup.h2.text
        # self.project_name = soup.find('h2').text
        for project in soup.find_all('h2'):
            if 'project-name' in project.attrs['class']:
                self.project_name = project.text

        # self.blurb = soup.h2.next_element.next_element.next_element.text
        self.blurb = soup.p.find_next('p').text

        # self.goal = soup.find(attrs={'class':'money'}).text
        # self.goal = soup.find('span', class_='money').text        
        for money in soup.find_all('span', class_='money'):
            if "pledged" in money.previous:
                self.goal = money.text



In [255]:
for project in soup.find_all('h2'):
    if 'project-name' in project.attrs['class']:
        print(project.text)

Sushi Boat
Sushi Boat


In [262]:
checkit = KickSoup('https://www.kickstarter.com/projects/westra/american-sikh')

In [263]:
checkit.goal

'$60,000'

In [264]:
checkit.blurb

'An animated short about seeking acceptance in an intolerant world.'

In [265]:
checkit.project_name

'American Sikh'

In [266]:
checkit.page_title

'American Sikh by Ryan Westra — Kickstarter'

In [45]:
url = 'https://www.kickstarter.com/projects/japanimegames/sushi-boat'
r = requests.get(url)
soup = BeautifulSoup(r.content, 'html.parser')

In [None]:
attrs={"data-foo": "value"}

In [216]:
soup.find(attrs={'class':'money'}).text

'$25,000'

In [212]:
soup.find(attrs={'class':'money'}).previous

'pledged of '

In [219]:
soup.find(attrs={'class':'money'}).parent.text

'pledged of $25,000'

In [223]:
for money in soup.find_all('span', class_='money'):
    if "pledged" in money.previous:
        self.goal = money

<span class="money">$25,000</span>
<span class="money">$25,000</span>
<span class="money">$25,000</span>
<span class="money">$25,000</span>


In [221]:
for money in soup.find_all('span', class_='money'):
    if "pledged" in money.next.next.text:
        print(money)

<span class="money">$25,000</span>
<span class="money">$25,000</span>


In [176]:
for span in soup.find_all(attrs={"class": "money"}):
    print(span)

<span class="money">$25,000</span>
<span class="money">$25,000</span>
<span class="money">$25,000</span>
<span class="money">$25,000</span>
<span class="money">$1</span>
<span class="money">$60</span>
<span class="money">$60</span>


In [162]:
soup.find('span', id='money')

In [163]:
soup.span.find(re.compile('^\$'))

In [170]:
soup.span.extract   #find('class'=='money')

<bound method PageElement.extract of <span class="ml1"><span>Tabletop Games</span></span>>

In [160]:
soup.findAll('span')

[<span class="ml1"><span>Tabletop Games</span></span>,
 <span>Tabletop Games</span>,
 <span class="ml1">Portland, OR</span>,
 <span class="ksr-green-700 inline-block bold type-16 type-28-md"><span class="ksr-green-500"></span></span>,
 <span class="ksr-green-500"></span>,
 <span class="block dark-grey-500 type-12 type-14-md lh3-lg"><span class="inline-block hide-sm">pledged of <span class="money">$25,000</span></span><span class="inline-block-sm hide">pledged of <span class="money">$25,000</span> goal</span></span>,
 <span class="inline-block hide-sm">pledged of <span class="money">$25,000</span></span>,
 <span class="money">$25,000</span>,
 <span class="inline-block-sm hide">pledged of <span class="money">$25,000</span> goal</span>,
 <span class="money">$25,000</span>,
 <span></span>,
 <span class="block dark-grey-500 type-12 type-14-md lh3-lg">backers</span>,
 <span class="block type-16 type-28-md bold dark-grey-500">16</span>,
 <span class="block navy-600 type-12 type-14-md lh3-lg">

In [141]:
soup.findAll('span')

[<span class="ml1"><span>Tabletop Games</span></span>,
 <span>Tabletop Games</span>,
 <span class="ml1">Portland, OR</span>,
 <span class="ksr-green-700 inline-block bold type-16 type-28-md"><span class="ksr-green-500"></span></span>,
 <span class="ksr-green-500"></span>,
 <span class="block dark-grey-500 type-12 type-14-md lh3-lg"><span class="inline-block hide-sm">pledged of <span class="money">$25,000</span></span><span class="inline-block-sm hide">pledged of <span class="money">$25,000</span> goal</span></span>,
 <span class="inline-block hide-sm">pledged of <span class="money">$25,000</span></span>,
 <span class="money">$25,000</span>,
 <span class="inline-block-sm hide">pledged of <span class="money">$25,000</span> goal</span>,
 <span class="money">$25,000</span>,
 <span></span>,
 <span class="block dark-grey-500 type-12 type-14-md lh3-lg">backers</span>,
 <span class="block type-16 type-28-md bold dark-grey-500">16</span>,
 <span class="block navy-600 type-12 type-14-md lh3-lg">

In [44]:
soup.find('h2').text

'Sushi Boat'

In [132]:
soup.p.find_next('p').text

"Sushi Boat is the fast, fun party game that's guaranteed to make you hungry! Compete against other players to swipe the most sushi."

In [53]:
soup.title

<title>Sushi Boat by Japanime Games — Kickstarter</title>

In [62]:
soup.find('h2', class_= re.compile())

<h2 class="type-28 type-24-md soft-black mb1 project-name">Sushi Boat</h2>

In [122]:
soup.find('h2').text

'Sushi Boat'

In [123]:
soup.find('h2').next_sibling

In [250]:
'project-name' in soup.h2.attrs['class']

True

In [249]:
soup.h2.attrs['class'][4]

'project-name'

In [124]:
soup.h2.text

'Sushi Boat'

In [121]:
soup.h2.next_element.next_element.next_element.text

"Sushi Boat is the fast, fun party game that's guaranteed to make you hungry! Compete against other players to swipe the most sushi."

In [120]:
for x in soup.h2.next_elements:
    print(x)

Sushi Boat
<div class="grid-col-12 grid-col-2-md flex items-center mb3"></div>
<div class="grid-col-12"><p class="type-14 type-18-md soft-black project-description mb1">Sushi Boat is the fast, fun party game that's guaranteed to make you hungry! Compete against other players to swipe the most sushi.</p></div>
<p class="type-14 type-18-md soft-black project-description mb1">Sushi Boat is the fast, fun party game that's guaranteed to make you hungry! Compete against other players to swipe the most sushi.</p>
Sushi Boat is the fast, fun party game that's guaranteed to make you hungry! Compete against other players to swipe the most sushi.
<div class="grid-col-12"><div class="block hide-md border-top border-bottom border-grey-500 nested-full-width-xs nested-full-width-sm my2 mb5"><button class="type-14 flex w100p"><div class="my2 flex justify-between w100p"><div class="flex"><img alt="Japanime Games" class="border-box radius100p bg-grey-400 w7 h7 shrink0 mr2" src="https://ksr-ugc.imgix.net

In [101]:
# for tag in soup.find(re.compile("['project.name']")):
#     print(tag.text)




  docElement = this.document.documentElement;
  docElement.className=docElement.className.replace(/\bno-js\b/,'') + 'js';




  (function(){
      window._pxAppId = 'PXUy3R669N';
      // Custom parameters
      // window._pxParam1 = "<param1>";
      var p = document.getElementsByTagName('script')[0],
          s = document.createElement('script');
      s.async = 1;
      s.src = '/Uy3R669N/init.js';
      p.parentNode.insertBefore(s,p);
  }());




  !function(){var analytics=window.analytics=window.analytics||[];if(!analytics.initialize)if(analytics.invoked)window.console&&console.error&&console.error("Segment snippet included twice.");else{analytics.invoked=!0;analytics.methods=["trackSubmit","trackClick","trackLink","trackForm","pageview","identify","reset","group","track","ready","alias","debug","page","once","off","on","addSourceMiddleware","addIntegrationMiddleware","setAnonymousId","addDestinationMiddleware"];analytics.factory=function(e){return function(){var t=Array.prot

In [92]:
for downer in soup.find('h2'):
    print(downer) 

Sushi Boat


In [54]:
soup.body.contents

['\n',
 <div class="NS_layouts__notifications">
 </div>,
 '\n',
 <div class="relative" data-start-project-path="/learn" id="global-header">
 <div><section class="section_global-nav js-site-nav-container bg-white flex flex-wrap flex-nowrap-md border-bottom border-grey-400 justify-between w100p" data-reactroot=""><div class="w100p py4 py2-md h7-md h5 flex items-center justify-center"><a class="keyboard-focusable mb0 w30" href="/?ref=nav"><svg class="valign-middle" height="100%" viewbox="0 0 598 70" width="100%" xmlns="http://www.w3.org/2000/svg"><title>Kickstarter</title><g class="fill-ksr10-green" fill-rule="nonzero"><path d="M523,15.9 L523,47.3 C523,54.8 529.046512,61 536.403101,61 C542.449612,61 547.186047,57.3 548.899225,52 L550.310078,54.5 C552.829457,58.7 557.263566,61 561.79845,61 C569.155039,61 575,55.2 575,47.8 C575,45.3 574.395349,42.9 572.984496,40.7 L567.744186,32.4 C571.573643,29.5 573.891473,24.9 573.891473,19.3 C573.891473,9 565.627907,1 555.046512,1 L537.612403,1 C528.139

In [42]:
project_name = soup.find('h2').text

In [34]:
soup.querySelector("#react-project-header > div > div.grid-container.flex.flex-column > div.grid-row.pt7-lg.mt3.mt0-lg.mb6-lg.order2-md.order1-lg > div > div.grid-row.hide-md.flex.flex-column.flex-row-md.relative > div:nth-child(1) > h2")

TypeError: 'NoneType' object is not callable