# ДЗ #11. Сбор данных <a id="0"></a>

### Target: https://plane-sale.com

* [Парсинг](#1)
* [Граббинг](#2)
* [EDA](#3)

In [1]:
# Импорт основных библиотек
from datetime import datetime
import time
import requests
from bs4 import BeautifulSoup
from fake_useragent import UserAgent

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

%matplotlib inline

---
<a id="1"></a>
## Парсинг [(Наверх)](#0) 

In [2]:
main_link = 'https://plane-sale.com'

### Процедурная

In [3]:
def get_soup(page_link):
    response = requests.get(page_link, headers={'User-Agent': UserAgent().chrome})
    return None if not response.ok else BeautifulSoup(response.content, "html.parser")

def get_total_pages(page_link):
    soup = get_soup(page_link + str(1))
    total_pages = soup.find("div", attrs={'class':'count_item'}).text.strip().split(':')[1]
    return int(total_pages.strip())

def get_planes_types(soup):
    planes_types_soup = soup.findAll("a", attrs={'class':'search_type_plane_point'})
    planes_types_links = [main_link + (pl['href']).replace('/ru/', '/en/') for pl in planes_types_soup]
    return planes_types_links

def get_planes_links(soup):
    cards_links = soup.findAll("div", attrs={'class':'point_aircraft_line'}, id=True)
    planes_links = [main_link + cl.find('a')['href'] for cl in cards_links]
    return planes_links

def _get_card_date(plane_card):
    plane_record_date = plane_card.find("div", attrs={'class':'detail_icon_info_text'}).text.strip()
    return None if not plane_record_date else datetime.date(datetime.strptime(plane_record_date, "%d %B %Y"))

def _get_plane_name(plane_card):
    plane_card_title = plane_card.find("h1", attrs={'class':'h1_page_detail'})  
    return None if not plane_card_title else plane_card_title.text.strip()

def _get_plane_price(plane_card):
    plane_price = plane_card.find("div", attrs={'class':'detail_price_value'}).text.strip()  
    return plane_price if plane_price.replace(' ', '').isalpha() else int(plane_price.replace(' ','')[:-1])

def _get_seller_name(plane_card):
    seller_name = plane_card.find("div", attrs={'class':'detail_user_name'})
    return None if not seller_name else seller_name.br.next_element.strip()

def _get_plane_location(plane_card):
    location = plane_card.find("div", attrs={'class':'detail_air_craft_location_value'})
    return None if not location else location.text.strip()

def _get_plane_basic_info(plane_card):
    basic_info = dict()
    plane_table = plane_card.find("table", attrs={'class':'table_first_description_air_craft'})
    if plane_table:
        for row in plane_table.findAll('tr'):
            td = row.findAll("td")
            basic_info[td[0].text.strip()[:-1]] = td[1].text.strip()
    return basic_info

def _get_plane_desc(plane_card):
    detail_desc = plane_card.find("div", attrs={'class':'detail_description detail_description_original'})
    return None if not detail_desc else detail_desc.text.strip()

def get_plane_info(plane_card):
    plane_basic_info = _get_plane_basic_info(plane_card)
    info = {'Date': _get_card_date(plane_card),
            'Model': _get_plane_name(plane_card),
            'Category': plane_basic_info['Category'],
            'Manufacturer': None if not 'Manufacturer' in plane_basic_info.keys() else plane_basic_info['Manufacturer'],
            'Model': None if not 'Model' in plane_basic_info.keys() else plane_basic_info['Model'],
            'Year': None if not 'Year' in plane_basic_info.keys() else plane_basic_info['Year'],
            'RAID (hours)': None if not 'RAID (hours)' in plane_basic_info.keys() else plane_basic_info['RAID (hours)'],
            'Registration number': None if not 'Registration number' in plane_basic_info.keys() else plane_basic_info['Registration number'],
            'Serial number': None if not 'Serial number' in plane_basic_info.keys() else plane_basic_info['Serial number'],
            'Seller': _get_seller_name(plane_card),
            'Location': _get_plane_location(plane_card),
            'Description': _get_plane_desc(plane_card),
            'Price': _get_plane_price(plane_card)}
    return info

---
<a id="2"></a>
## Граббинг [(Наверх)](#0) 

In [7]:
planes_types = get_planes_types(get_soup(main_link))
planes = pd.DataFrame(columns=['Date', 'Model', 'Category', 'Manufacturer', 'Model', 'Year', 
                               'RAID (hours)', 'Registration number', 'Serial number','Seller', 
                               'Location', 'Description', 'Price'])

for plane_type in tqdm(planes_types, desc='Types', leave=False):
    total_pages = get_total_pages(plane_type)
    
    for page in tqdm(range(10, total_pages), desc='Pages', leave=False):
        links = get_planes_links(get_soup(plane_type + '?page=' + str(page)))

        for plane_link in tqdm(links, desc='Cards', leave=False):
            soup = get_soup(plane_link)
            if soup is not None:
                planes = planes.append(get_plane_info(soup), ignore_index=True)
                time.sleep(0.3)
        time.sleep(1)

HBox(children=(FloatProgress(value=0.0, description='Types', max=5.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Pages', max=93.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Cards', max=1.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Cards', max=1.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Cards', max=1.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Pages', max=24.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Cards', max=1.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Pages', max=64.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Cards', max=1.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Pages', max=76.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Cards', max=1.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Cards', max=1.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Cards', max=1.0, style=ProgressStyle(de…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Pages', max=64.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Cards', max=21.0, style=ProgressStyle(description_width='…

In [8]:
# Заменим отсутствующие значения на np.NaN
planes.replace(to_replace=[None, 'on request'], value=np.nan, inplace=True)

In [9]:
# Сохраним датасет в CSV
filename = './data/planes.csv'
planes.to_csv(filename, index=False)

---
<a id="3"></a>
## EDA [(Наверх)](#0) 

In [10]:
# Проверим корректно ли создан датасет
planes

Unnamed: 0,Date,Model,Category,Manufacturer,Model.1,Year,RAID (hours),Registration number,Serial number,Seller,Location,Description,Price
0,2020-05-28,210l,Single engine,Cessna,210l,1976,,N210RX,,Casey Electric llc,,,215000.0
1,2020-05-28,Pa-28-235 cherokee 235,Single engine,Piper,Pa-28-235 cherokee 235,1970,,N8527N,,"AircraftMerchants, LLC",,,84900.0
2,2020-05-28,"M20f ""executive"" turbo",Single engine,Mooney,"M20f ""executive"" turbo",1967,,N2928L,,"Skywagons.com, LLC",,,79500.0
3,2020-05-28,V35b turbo bonanza,Single engine,Beechcraft,V35b turbo bonanza,1966,,HBEFH,,"Aeromeccanica, SA",,,141154.0
4,2020-05-28,210,Single engine,Cessna,210,1966,,HBCSS,,"Aeromeccanica, SA",,,119438.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
6552,2018-10-14,R66 turbine,Helicopter,Robinson,R66 turbine,2015,,,,Fly Q Limited,United Kingdom,You choose,855000.0
6553,2018-10-14,R44 clipper ii,Helicopter,Robinson,R44 clipper ii,2008,700,CFKKA,12369,,"Canada, Whitehorse, YXY",All original parts\r\n-- 12 year overhaul at 7...,353000.0
6554,2018-10-14,Kiss k209 m,Helicopter,Fama,Kiss k209 m,2013,200,FPKIS,26,,"France, Cannes, LFMD","EFIS multifonctions (carte GPS), torque-mtre, ...",221813.0
6555,2018-10-14,R22 beta 2,Helicopter,Robinson,R22 beta 2,1999,2135,DHFSO,2882,,"Germany, Hamburg, EDDH",Engine refitted 03/2020,169410.0


In [11]:
# Узнаем размер датасета
planes.shape

(6557, 13)

In [12]:
# Узнаем свойства датасета
planes.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6557 entries, 0 to 6556
Data columns (total 13 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Date                 6557 non-null   object 
 1   Model                6557 non-null   object 
 2   Category             6557 non-null   object 
 3   Manufacturer         6557 non-null   object 
 4   Model                6557 non-null   object 
 5   Year                 6413 non-null   object 
 6   RAID (hours)         5001 non-null   object 
 7   Registration number  3179 non-null   object 
 8   Serial number        1636 non-null   object 
 9   Seller               5082 non-null   object 
 10  Location             3818 non-null   object 
 11  Description          4994 non-null   object 
 12  Price                2821 non-null   float64
dtypes: float64(1), object(12)
memory usage: 666.1+ KB
