### Import libraries.

In [1]:
import pandas as pd

from bs4 import BeautifulSoup
import time
from selenium import webdriver
from selenium.webdriver.common.by import By

### Use Selenium to open sourse.

In [2]:
driver = webdriver.Chrome()

In [3]:
url = 'https://www.8a.nu/crags/sportclimbing/turkey/geyik-bayiri/routes'

In [4]:
driver.get(url)

time.sleep(3)

### Create dataframe for saving parsing info.

* *grade* — grade of the route;
* *name* — name of the route;
* *sector* — route's sector;
* *ascents* — count of tops of the route;
* *fos_ratio* — ratio of flash and onsight ascents (from first try);
* *recommendation* — percent of recommendation of the route;
* *stars* — route's rating (0 to 5 stars). 


In [5]:
data_container = pd.DataFrame(
    columns = ['grade',
               'name',
               'sector',
               'ascents',
               'fos_ratio',
               'recommendations',
               'stars']
)

data_container

Unnamed: 0,grade,name,sector,ascents,fos_ratio,recommendations,stars


### Use BeautifulSoup for parsing.

In [6]:
while True:

    # add our page into soup of html objects
    
    html = driver.page_source
    soup = BeautifulSoup(html)

    # take whole table with routes info from page
    
    elem_full_table = soup.find('table', class_='main-table zlags-table').find('tbody').find_all('tr')

    # read <tr> information row by row, adding it to our
    # DataFrame container
    
    for item in elem_full_table:
        grade = item.find('div', class_='grade').get_text()
        name = item.find('p', class_='name-link').get_text()
        sector = item.find('p', class_='sub-link').find_next('a').find_next('a').get_text()
        ascents = item.find(class_='col-ascents number').get_text()
        fos_ratio = item.find(class_='col-ratio').get_text()
        recommendations = item.find(class_='col-recommend').get_text()
    
        # counting stars of the route (5 — max value)
        stars = item.find(class_='star-rating').find_all('label', class_='is-selected')
    
        # creating new row with one route info
        new_row = \
            {'grade': grade.strip(),
             'name': name.strip(),
             'sector': sector.strip(),
             'ascents': ascents.strip(),
             'fos_ratio': fos_ratio.strip(),
             'recommendations': recommendations.strip(),
             'stars': len(stars)}
    
        # rewrite data in dataframe
        data_container = pd.concat(
            [data_container, pd.DataFrame([new_row])], ignore_index=True
        )
   
    try:
        # going to the next page link's text
    
        driver.find_element(By.LINK_TEXT, 'NEXT').click()
        time.sleep(2)
        
    except:
        break
        
print("FREE!")

FREE!


In [7]:
data_container

Unnamed: 0,grade,name,sector,ascents,fos_ratio,recommendations,stars
0,7a,Freedom is a Battle,Trebenna West,781,66 %,10 %,3
1,7a,Karınca,Magara,699,55 %,16 %,4
2,7b,Lycian Highway,Trebenna West,645,70 %,13 %,4
3,6b+,Saxafon,Sarkit,591,66 %,20 %,4
4,6b,Nirvana,Magara,589,78 %,10 %,3
...,...,...,...,...,...,...,...
2100,6a+,Session of Hapiness,Anatolia Sag,1,0 %,0 %,0
2101,6b+,Comftably,Echoes,1,100 %,0 %,0
2102,6a,En Iyisi o (Neco),Poseidon,1,100 %,0 %,0
2103,7c+,Sunatorium,Cesme,1,0 %,0 %,4


In [8]:
data_container.to_csv('routes_info_raw.csv', encoding='utf-8', index=False)