# target url
https://en.wikipedia.org/wiki/Nankai_megathrust_earthquakes

### Import required libraries for web scraping

In [1]:
from bs4 import BeautifulSoup
import urllib.request
import pandas as pd
import requests

### requesting url

In [2]:
url = 'https://en.wikipedia.org/wiki/Nankai_megathrust_earthquakes'

In [3]:
source = urllib.request.urlopen(url)

### creating object of BeautifulSoup

In [4]:
soup = BeautifulSoup(source,'lxml')

In [5]:
soup

<!DOCTYPE html>
<html class="client-nojs" dir="ltr" lang="en">
<head>
<meta charset="utf-8"/>
<title>Nankai megathrust earthquakes - Wikipedia</title>
<script>document.documentElement.className="client-js";RLCONF={"wgBreakFrames":false,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"1aec427f-2cc7-4ff0-a290-89f2cc58a3e8","wgCSPNonce":false,"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":false,"wgNamespaceNumber":0,"wgPageName":"Nankai_megathrust_earthquakes","wgTitle":"Nankai megathrust earthquakes","wgCurRevisionId":1024821435,"wgRevisionId":1024821435,"wgArticleId":18193229,"wgIsArticle":true,"wgIsRedirect":false,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["CS1 Japanese-language sources (ja)","Articles with short description","Short description matches Wikidat

In [6]:
main_table=soup('table', class_="wikitable sortable")[0]

In [7]:
main_table

<table class="wikitable sortable">
<tbody><tr bgcolor="#ececec">
<th>#
</th>
<th>Date/Time‡
</th>
<th>Name
</th>
<th><a class="mw-redirect" href="/wiki/Seismic_scales#Magnitude_scales" title="Seismic scales">Magnitude</a>
</th>
<th>Ruptured segments
</th>
<th>Tsunami
<p>max. wave
height (m)
</p>
</th>
<th>Coordinates
</th>
<th>Fatalities
</th>
<th>Sources
</th></tr>
<tr>
<td>1
</td>
<td>684-11-29 22:00
</td>
<td><a class="mw-redirect" href="/wiki/684_Hakuho_earthquake" title="684 Hakuho earthquake">684 Hakuho earthquake</a>
</td>
<td align="right">8.4
</td>
<td align="right">A+B +?(C+D+E)
</td>
<td align="right">3.0
</td>
<td><style data-mw-deduplicate="TemplateStyles:r994658806">.mw-parser-output .geo-default,.mw-parser-output .geo-dms,.mw-parser-output .geo-dec{display:inline}.mw-parser-output .geo-nondefault,.mw-parser-output .geo-multi-punct{display:none}.mw-parser-output .longitude,.mw-parser-output .latitude{white-space:nowrap}</style><span class="plainlinks nourlexpansion"><a cl

### Scraping head

In [8]:
head = []
for i in range(len(main_table.find_all('th'))):
    head.append(main_table.find_all('th')[i].get_text())

In [9]:
head

['#\n',
 'Date/Time‡\n',
 'Name\n',
 'Magnitude\n',
 'Ruptured segments\n',
 'Tsunami\nmax. wave\nheight\xa0(m)\n\n',
 'Coordinates\n',
 'Fatalities\n',
 'Sources\n']

In [10]:
df_head=pd.DataFrame(columns=head)
df_head

Unnamed: 0,#\n,Date/Time‡\n,Name\n,Magnitude\n,Ruptured segments\n,Tsunami\nmax. wave\nheight (m)\n\n,Coordinates\n,Fatalities\n,Sources\n


### Removing ' \n ' in head

In [11]:
head = df_head.columns.str.replace('\n','')
head

Index(['#', 'Date/Time‡', 'Name', 'Magnitude', 'Ruptured segments',
       'Tsunamimax. waveheight (m)', 'Coordinates', 'Fatalities', 'Sources'],
      dtype='object')

### Scraping tabular data

In [12]:
main_table.find_all('td')

[<td>1
 </td>,
 <td>684-11-29 22:00
 </td>,
 <td><a class="mw-redirect" href="/wiki/684_Hakuho_earthquake" title="684 Hakuho earthquake">684 Hakuho earthquake</a>
 </td>,
 <td align="right">8.4
 </td>,
 <td align="right">A+B +?(C+D+E)
 </td>,
 <td align="right">3.0
 </td>,
 <td><style data-mw-deduplicate="TemplateStyles:r994658806">.mw-parser-output .geo-default,.mw-parser-output .geo-dms,.mw-parser-output .geo-dec{display:inline}.mw-parser-output .geo-nondefault,.mw-parser-output .geo-multi-punct{display:none}.mw-parser-output .longitude,.mw-parser-output .latitude{white-space:nowrap}</style><span class="plainlinks nourlexpansion"><a class="external text" href="//geohack.toolforge.org/geohack.php?pagename=Nankai_megathrust_earthquakes&amp;params=32.8_N_134.3_E_"><span class="geo-nondefault"><span class="geo-dms" title="Maps, aerial photos, and other data for this location"><span class="latitude">32°48′N</span> <span class="longitude">134°18′E</span></span></span><span class="geo-multi

In [13]:
data=[]
for d in range(len(main_table.find_all('td'))):
    data.append(main_table.find_all('td')[d].get_text())

In [14]:
n=len(main_table.find_all('th'))
data = [data[i:i+n] for i in range(0,len(data),n)]

In [15]:
data

[['1\n',
  '684-11-29 22:00\n',
  '684 Hakuho earthquake\n',
  '8.4\n',
  'A+B +?(C+D+E)\n',
  '3.0\n',
  '32°48′N 134°18′E\ufeff / \ufeff32.8°N 134.3°E\ufeff / 32.8; 134.3\n',
  'many\n',
  '[12][13][14]\n'],
 ['2\n',
  '887-8-26 16:00\n',
  'Ninna Nankai earthquake\n',
  '8.6\n',
  'A+B +?(C+D+E)\n',
  '10.0\n',
  '33°00′N 135°00′E\ufeff / \ufeff33.0°N 135.0°E\ufeff / 33.0; 135.0\n',
  'many\n',
  '[12][13][14]\n'],
 ['3\n',
  '1096-12-17 08:00\n',
  '\n',
  '8.4\n',
  'C +?(D+E)\n',
  '7.0\n',
  '34°00′N 137°30′E\ufeff / \ufeff34.0°N 137.5°E\ufeff / 34.0; 137.5\n',
  '\n',
  '[12][13][14]\n'],
 ['4\n',
  '1099-2-22 06:00\n',
  'Kōwa Nankaido earthquake\n',
  '8.0\n',
  'A+B\n',
  '\n',
  '33°00′N 135°30′E\ufeff / \ufeff33.0°N 135.5°E\ufeff / 33.0; 135.5\n',
  '\n',
  '[13][14]\n'],
 ['5\n',
  '1360-11-22\n',
  'Uncertain event\n',
  '7.0\n',
  '\n',
  '6.0\n',
  '33°24′N 136°12′E\ufeff / \ufeff33.4°N 136.2°E\ufeff / 33.4; 136.2\n',
  '\n',
  '[12][13][14]\n'],
 ['6\n',
  '1361-8-03 

### Creating DataFrame using head and data

In [16]:
dataset= pd.DataFrame(data,columns=head)

In [17]:
dataset

Unnamed: 0,#,Date/Time‡,Name,Magnitude,Ruptured segments,Tsunamimax. waveheight (m),Coordinates,Fatalities,Sources
0,1\n,684-11-29 22:00\n,684 Hakuho earthquake\n,8.4\n,A+B +?(C+D+E)\n,3.0\n,32°48′N 134°18′E﻿ / ﻿32.8°N 134.3°E﻿ / 32.8; 1...,many\n,[12][13][14]\n
1,2\n,887-8-26 16:00\n,Ninna Nankai earthquake\n,8.6\n,A+B +?(C+D+E)\n,10.0\n,33°00′N 135°00′E﻿ / ﻿33.0°N 135.0°E﻿ / 33.0; 1...,many\n,[12][13][14]\n
2,3\n,1096-12-17 08:00\n,\n,8.4\n,C +?(D+E)\n,7.0\n,34°00′N 137°30′E﻿ / ﻿34.0°N 137.5°E﻿ / 34.0; 1...,\n,[12][13][14]\n
3,4\n,1099-2-22 06:00\n,Kōwa Nankaido earthquake\n,8.0\n,A+B\n,\n,33°00′N 135°30′E﻿ / ﻿33.0°N 135.5°E﻿ / 33.0; 1...,\n,[13][14]\n
4,5\n,1360-11-22\n,Uncertain event\n,7.0\n,\n,6.0\n,33°24′N 136°12′E﻿ / ﻿33.4°N 136.2°E﻿ / 33.4; 1...,\n,[12][13][14]\n
5,6\n,1361-8-03 04:00\n,Shōhei Nankaido earthquake\n,8.4\n,A+B +?(C+D)\n,\n,33°00′N 135°00′E﻿ / ﻿33.0°N 135.0°E﻿ / 33.0; 1...,\n,[13][14]\n
6,7\n,1498-9-20 08:00\n,1498 Meiō Nankaidō earthquake\n,8.6\n,?(A+B) C+D +?E\n,\n,34°00′N 138°00′E﻿ / ﻿34.0°N 138.0°E﻿ / 34.0; 1...,thousands\n,[13][14]\n
7,8\n,1605-2-3 20:00\n,1605 Keichō Nankaido earthquake\n,7.9\n,A+B+C+D\n,10.0\n,33°00′N 135°00′E﻿ / ﻿33.0°N 135.0°E﻿ / 33.0; 1...,thousands\n,[12][13][14]\n
8,9\n,1707-10-28 14:00\n,1707 Hōei earthquake\n,8.6\n,A+B+C+D+E\n,25.7\n,33°00′N 136°00′E﻿ / ﻿33.0°N 136.0°E﻿ / 33.0; 1...,"5,000\n",[12][13][14]\n
9,10\n,1854-12-23 09:00\n,1854 Tōkai earthquake\n,8.4\n,C+D+E\n,21.0\n,34°00′N 137°48′E﻿ / ﻿34.0°N 137.8°E﻿ / 34.0; 1...,"2,000\n",[12][13][14]\n


### Removing '\n' in Data

In [18]:
for col in dataset.columns:
    dataset[col]=dataset[col].str.replace('\n','')

In [19]:
dataset

Unnamed: 0,#,Date/Time‡,Name,Magnitude,Ruptured segments,Tsunamimax. waveheight (m),Coordinates,Fatalities,Sources
0,1,684-11-29 22:00,684 Hakuho earthquake,8.4,A+B +?(C+D+E),3.0,32°48′N 134°18′E﻿ / ﻿32.8°N 134.3°E﻿ / 32.8; 1...,many,[12][13][14]
1,2,887-8-26 16:00,Ninna Nankai earthquake,8.6,A+B +?(C+D+E),10.0,33°00′N 135°00′E﻿ / ﻿33.0°N 135.0°E﻿ / 33.0; 1...,many,[12][13][14]
2,3,1096-12-17 08:00,,8.4,C +?(D+E),7.0,34°00′N 137°30′E﻿ / ﻿34.0°N 137.5°E﻿ / 34.0; 1...,,[12][13][14]
3,4,1099-2-22 06:00,Kōwa Nankaido earthquake,8.0,A+B,,33°00′N 135°30′E﻿ / ﻿33.0°N 135.5°E﻿ / 33.0; 1...,,[13][14]
4,5,1360-11-22,Uncertain event,7.0,,6.0,33°24′N 136°12′E﻿ / ﻿33.4°N 136.2°E﻿ / 33.4; 1...,,[12][13][14]
5,6,1361-8-03 04:00,Shōhei Nankaido earthquake,8.4,A+B +?(C+D),,33°00′N 135°00′E﻿ / ﻿33.0°N 135.0°E﻿ / 33.0; 1...,,[13][14]
6,7,1498-9-20 08:00,1498 Meiō Nankaidō earthquake,8.6,?(A+B) C+D +?E,,34°00′N 138°00′E﻿ / ﻿34.0°N 138.0°E﻿ / 34.0; 1...,thousands,[13][14]
7,8,1605-2-3 20:00,1605 Keichō Nankaido earthquake,7.9,A+B+C+D,10.0,33°00′N 135°00′E﻿ / ﻿33.0°N 135.0°E﻿ / 33.0; 1...,thousands,[12][13][14]
8,9,1707-10-28 14:00,1707 Hōei earthquake,8.6,A+B+C+D+E,25.7,33°00′N 136°00′E﻿ / ﻿33.0°N 136.0°E﻿ / 33.0; 1...,5000,[12][13][14]
9,10,1854-12-23 09:00,1854 Tōkai earthquake,8.4,C+D+E,21.0,34°00′N 137°48′E﻿ / ﻿34.0°N 137.8°E﻿ / 34.0; 1...,2000,[12][13][14]
