In [1]:
import sys
import time
from bs4 import BeautifulSoup  
import requests
import pandas as pd

In [2]:
page = requests.get("https://forecast.weather.gov/MapClick.php?lat=37.7772&lon=-122.4168")

soup = BeautifulSoup(page.content, 'html.parser')
seven_day = soup.find(id="seven-day-forecast")
forecast_items = seven_day.find_all(class_="tombstone-container")


In [3]:
tonight = forecast_items[0]


In [5]:
print(tonight.prettify())

<div class="tombstone-container">
 <p class="period-name">
  Today
  <br/>
  <br/>
 </p>
 <p>
  <img alt="Today: Mostly sunny, with a high near 68. Breezy, with a west southwest wind 7 to 12 mph increasing to 19 to 24 mph in the afternoon. Winds could gust as high as 31 mph. " class="forecast-icon" src="DualImage.php?i=sct&amp;j=wind_sct" title="Today: Mostly sunny, with a high near 68. Breezy, with a west southwest wind 7 to 12 mph increasing to 19 to 24 mph in the afternoon. Winds could gust as high as 31 mph. "/>
 </p>
 <p class="short-desc">
  Mostly Sunny
  <br/>
  then Mostly
  <br/>
  Sunny and
  <br/>
  Breezy
 </p>
 <p class="temp temp-high">
  High: 68 °F
 </p>
</div>


# Extracting information from the page


In [6]:
period = tonight.find(class_="period-name").get_text()
short_desc = tonight.find(class_="short-desc").get_text()
temp = tonight.find(class_="temp").get_text()


In [7]:
print(period)
print(short_desc)
print(temp)

Today
Mostly Sunnythen MostlySunny andBreezy
High: 68 °F


 extract the title attribute from the img tag. To do this, we just treat the BeautifulSoup object like a dictionary,
and pass in the attribute we want as a key

In [8]:
img = tonight.find("img")
desc = img['title']


In [9]:
print(desc)

Today: Mostly sunny, with a high near 68. Breezy, with a west southwest wind 7 to 12 mph increasing to 19 to 24 mph in the afternoon. Winds could gust as high as 31 mph. 


# Extracting all the information from the page


In [10]:
period_tags = seven_day.select(".tombstone-container .period-name")
periods = [pt.get_text() for pt in period_tags]


In [11]:
periods

['Today',
 'Tonight',
 'Friday',
 'FridayNight',
 'Saturday',
 'SaturdayNight',
 'Sunday',
 'SundayNight',
 'Monday']

As we can see above, our technique gets us each of the period names, in order.

We can apply the same technique to get the other three fields

In [12]:
short_descs = [sd.get_text() for sd in seven_day.select(".tombstone-container .short-desc")]
temps = [t.get_text() for t in seven_day.select(".tombstone-container .temp")]
descs = [d["title"] for d in seven_day.select(".tombstone-container img")]


In [13]:
print(short_descs)
print(temps)
print(descs)

['Mostly Sunnythen MostlySunny andBreezy', 'ChanceShowers', 'Partly Sunnythen Sunnyand Breezy', 'Partly Cloudy', 'Mostly Sunny', 'Partly Cloudy', 'Mostly Sunny', 'Partly Cloudy', 'Mostly Sunny']
['High: 68 °F', 'Low: 57 °F', 'High: 69 °F', 'Low: 55 °F', 'High: 68 °F', 'Low: 56 °F', 'High: 71 °F', 'Low: 56 °F', 'High: 71 °F']
['Today: Mostly sunny, with a high near 68. Breezy, with a west southwest wind 7 to 12 mph increasing to 19 to 24 mph in the afternoon. Winds could gust as high as 31 mph. ', 'Tonight: A 30 percent chance of showers and thunderstorms, mainly after 11pm.  Mostly cloudy, with a low around 57. West wind 16 to 21 mph decreasing to 9 to 14 mph in the evening. Winds could gust as high as 26 mph.  New rainfall amounts of less than a tenth of an inch, except higher amounts possible in thunderstorms. ', 'Friday: Mostly cloudy, then gradually becoming sunny, with a high near 69. Breezy, with a west wind 15 to 23 mph, with gusts as high as 31 mph. ', 'Friday Night: Partly clo

# Combining our data into a Pandas Dataframe


In [14]:
weather = pd.DataFrame({
    "period": periods,
    "short_desc": short_descs,
    "temp": temps,
    "desc":descs
})

In [15]:
weather


Unnamed: 0,period,short_desc,temp,desc
0,Today,Mostly Sunnythen MostlySunny andBreezy,High: 68 °F,"Today: Mostly sunny, with a high near 68. Bree..."
1,Tonight,ChanceShowers,Low: 57 °F,Tonight: A 30 percent chance of showers and th...
2,Friday,Partly Sunnythen Sunnyand Breezy,High: 69 °F,"Friday: Mostly cloudy, then gradually becoming..."
3,FridayNight,Partly Cloudy,Low: 55 °F,"Friday Night: Partly cloudy, with a low around..."
4,Saturday,Mostly Sunny,High: 68 °F,"Saturday: Mostly sunny, with a high near 68. W..."
5,SaturdayNight,Partly Cloudy,Low: 56 °F,"Saturday Night: Partly cloudy, with a low arou..."
6,Sunday,Mostly Sunny,High: 71 °F,"Sunday: Mostly sunny, with a high near 71."
7,SundayNight,Partly Cloudy,Low: 56 °F,"Sunday Night: Partly cloudy, with a low around..."
8,Monday,Mostly Sunny,High: 71 °F,"Monday: Mostly sunny, with a high near 71."
