# Major Events in US History

## Web Scraping 

#### Import Libraries

In [1]:
from bs4 import BeautifulSoup as soup
import requests

#### Request the Web Content and Save it as BeautifulSoup Object

In [2]:
URL = ['https://www.historycentral.com/Today/Index.html', 'https://www.historycentral.com/Today/21st.html']
html_content = []
for link in URL:
    r = requests.get(link)
    html_content.append(soup(r.content))

#### Use BeautifulSoup Methods to Extract The Year and Events

In [8]:
major_events = []

for page in html_content:
    table = page.find('table')
    major_events += [row.find('a').string for row in table.find_all('tr') if row.find('td') and row.find('a').string]



### Regular Expression

#### Turn Dash (-) to " " and Split the Year and Event

In [13]:
import re
events = [re.sub('-'," ",event) for event in major_events]
events = [event.split(' ', 1) for event in events]
events

[['1974', 'Soyuz Apollo'],
 ['1974', 'Ford Pardons Nixon'],
 ['1975', ' Helsinki Accords'],
 ['1975', 'NY Saved From Bankruptcy'],
 ['1976', 'Apple II Introduced'],
 ['1976', 'Carter Elected'],
 ['1977', ' USS Gives Up Panama Canal'],
 ['1979', 'Camp David Peace Accords'],
 ['1979', 'Trade Agreement'],
 ['1979', 'Three Mile Island'],
 ['1979', 'Salt II'],
 ['1980', 'Iran Seizes US Hostages'],
 ['1980', 'Soviets Invade Afghanistan'],
 ['1980', 'Reagan Election'],
 ['1981', 'Reagan fires Air Traffic Controllers'],
 ['1981Assassination', 'Attempt on Reaga'],
 ['1981', 'First Woman on Supreme Court'],
 ['1981', '$180 Billion Arms Build Up'],
 ['983', 'Bombing of US Compound in Beirut'],
 ['1984', 'Reagan Reelected'],
 ['1985', 'Reagan Tax Reform'],
 ['1986', 'Shuttle Explodes'],
 ['1986', 'US Responds to Libyan Terror'],
 ['1986', 'Iran Contra Deal Unearthed'],
 ['1988', 'INF Treaty Signed'],
 ['1988', 'George Bush Elected'],
 ['1989', 'US Seizes Panamanian Leader'],
 ['1990', 'Coalition F

## Pandas

In [14]:
import pandas as pd

#### Make A Data frame with 'Year' and 'Event' as columns

In [32]:
df = pd.DataFrame(events, columns=['Year', 'Event'])

#### Locate row 15 and Fix the Year and Event Event = 'Assassination Attempt on Reagan' and Year = "1981"

In [33]:
df.loc[15].Year = '1981'
df.loc[15].Event = 'Assassination Attempt on Reagan'
df.head(16)

Unnamed: 0,Year,Event
0,1974,Soyuz Apollo
1,1974,Ford Pardons Nixon
2,1975,Helsinki Accords
3,1975,NY Saved From Bankruptcy
4,1976,Apple II Introduced
5,1976,Carter Elected
6,1977,USS Gives Up Panama Canal
7,1979,Camp David Peace Accords
8,1979,Trade Agreement
9,1979,Three Mile Island


#### Use Replace method to turn '983' to '1983'

In [34]:
df.head(20)

Unnamed: 0,Year,Event
0,1974,Soyuz Apollo
1,1974,Ford Pardons Nixon
2,1975,Helsinki Accords
3,1975,NY Saved From Bankruptcy
4,1976,Apple II Introduced
5,1976,Carter Elected
6,1977,USS Gives Up Panama Canal
7,1979,Camp David Peace Accords
8,1979,Trade Agreement
9,1979,Three Mile Island


In [35]:
df= df.replace('983', '1983')

#### Strip the Event Column

In [36]:
df['Event'] = df.Event.str.strip()

#### Group all the events in same year

In [41]:
df = df.groupby('Year')['Event'].apply(' / '.join).reset_index()

#### Save as CSV

In [43]:
df.to_csv('major-events-clean.csv', index=False)

#### Read CSV

In [44]:
df = pd.read_csv('major-events-clean.csv')

In [45]:
df

Unnamed: 0,Year,Event
0,1974,Soyuz Apollo / Ford Pardons Nixon
1,1975,Helsinki Accords / NY Saved From Bankruptcy
2,1976,Apple II Introduced / Carter Elected
3,1977,USS Gives Up Panama Canal
4,1979,Camp David Peace Accords / Trade Agreement / T...
5,1980,Iran Seizes US Hostages / Soviets Invade Afgha...
6,1981,Reagan fires Air Traffic Controllers / Assassi...
7,1983,Bombing of US Compound in Beirut
8,1984,Reagan Reelected
9,1985,Reagan Tax Reform
