In [2]:
import pandas as pd
from bs4 import BeautifulSoup
import requests
import numpy as np
from datetime import datetime
from itertools import product
import json
import multiprocessing

# this is just a file with a worker function for multiprocessing
# (otherwise multiprocessing doesn't work in Jupyter on Windows)
#import worker 

In [3]:
# brand names have been coppied from auto.ru manually and stored in brands.xlsx
df_brands = pd.read_excel('brands.xlsx')
df_brands

Unnamed: 0,Brands,Count
0,AC,2
1,AMC,2
2,Acura,46
3,Alfa Romeo,27
4,Alpina,2
...,...,...
120,ЛуАЗ,11
121,Москвич,101
122,СМЗ,1
123,ТагАЗ,34


In [4]:
# let's remove rare (<400) and Russian/Soviet brands
df_brands = df_brands[(df_brands['Count'] > 400) & ~(df_brands['Brands'].isin(['LADA (ВАЗ)', 'ГАЗ', 'УАЗ']))]

In [24]:
df_brands

Unnamed: 0,Brands,Count
8,Audi,2750
9,BMW,5283
19,Chevrolet,1352
21,Citroen,692
39,Ford,2017
51,Honda,574
53,Hyundai,3470
54,Infiniti,599
59,Jeep,401
60,Kia,3850


In [8]:
def crawl_auto(brands_lst, year_from=1980, year_to=2021, fr=1, to=10000000, radius=200):
    """
    This function crawls auto.ru. Idea for this function was 
    taken from https://github.com/DarkLabel1/YouTube/blob/master/Auto_ru.py
    
    After that it was slightly modified to fit the requirements of kaggle competition
    
    Arguments:
        brands - a list of brands to process
        year_from and year_to - range of car production years
        fr and to - used for testing (you can slice year_brand list to make the output longer or shorter
        if you need to test something)
        radius - radius from Moscow
    """
    # capitalize brand names
    brands = brands_lst.copy()
    brands = [b.upper() for b in brands]
    
    # all the years to consider
    year_range = list(np.arange(year_from, year_to))
    
    # pairs of year and brand to iterate over them
    year_brand = list(product(year_range, brands))
    
    # these 2 won't change
    URL = 'https://auto.ru/-/ajax/desktop/listing/' #URL for the post request

    # header for the post request
    HEADERS = {
        'Accept': '*/*',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'ru-RU,ru;q=0.8,en-US;q=0.5,en;q=0.3',
        'Connection': 'keep-alive',
        'Content-Length': '137',
        'content-type': 'application/json',
        'Cookie': 'autoru_gdpr=1; _csrf_token=1c0ed592ec162073ac34d79ce511f0e50d195f763abd8c24; autoru_sid=a%3Ag5e3b198b299o5jhpv6nlk0ro4daqbpf.fa3630dbc880ea80147c661111fb3270%7C1580931467355.604800.8HnYnADZ6dSuzP1gctE0Fw.cd59AHgDSjoJxSYHCHfDUoj-f2orbR5pKj6U0ddu1G4; autoruuid=g5e3b198b299o5jhpv6nlk0ro4daqbpf.fa3630dbc880ea80147c661111fb3270; suid=48a075680eac323f3f9ad5304157467a.bc50c5bde34519f174ccdba0bd791787; from_lifetime=1580933172327; from=yandex; X-Vertis-DC=myt; crookie=bp+bI7U7P7sm6q0mpUwAgWZrbzx3jePMKp8OPHqMwu9FdPseXCTs3bUqyAjp1fRRTDJ9Z5RZEdQLKToDLIpc7dWxb90=; cmtchd=MTU4MDkzMTQ3MjU0NQ==; yandexuid=1758388111580931457; bltsr=1; navigation_promo_seen-recalls=true',
        'Host': 'auto.ru',
        'origin': 'https://auto.ru',
        'Referer': 'https://auto.ru/ryazan/cars/mercedes/all/',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:72.0) Gecko/20100101 Firefox/72.0',
        'x-client-app-version': '202002.03.092255',
        'x-client-date': '1580933207763',
        'x-csrf-token': '1c0ed592ec162073ac34d79ce511f0e50d195f763abd8c24',
        'x-page-request-id': '60142cd4f0c0edf51f96fd0134c6f02a',
        'x-requested-with': 'fetch'
    }    
    
    result = []
    
    for yb in year_brand[fr:to]:  
        i = 1 # initializing i for pagination 
        l = 1 # initializing l that will be replaced by len(data) below
        while l > 0:
            # Post request parameters are changed within the loop
            PARAMS = {
                 'catalog_filter' : [{"mark": yb[1]}],
                 'section': "all",
                 'category': "cars",
                 'sort': "fresh_relevance_1-desc",
                 'page': i,
                 'geo_radius' : str(radius),
                 'year_from' : str(yb[0]),
                 'year_to' : str(yb[0]),
                 'geo_id' : [213]
                }

            i+=1
            
            response = requests.post(URL, json=PARAMS, headers=HEADERS) 
            try:
                data = response.json()['offers']
            except: 
                print(f'Failed for {yb[1]}, year {yb[0]} page {i} - let us go on!')
                
            l = len(data)
            if l > 0:
                print(f'{yb[1]}, year {yb[0]} : {l} entries')
                for o in data:
                    result.append(o)
    
    print('Crawling done!!!')
    return result;   

In [7]:
def save_to_file(j):
    """
    Saves a json string to a file
    """
    jsonString = json.dumps(j, indent=4)
    with open("data.json", "w") as f:
        f.write(jsonString)

In [9]:
j = crawl_auto(brands_lst=list(df_brands['Brands']), year_from=1900, year_to=2022)

# I didn't have time to complete the parser in one go. That's why I decided to save the results to JSON and process
# this file later. The file for Moscow (+200 KM radius) and all the main brands starting from 1970 takes about 1.9 GB. Please
# take that into account if you decide to use the same approach.
save_to_file(j)

FORD, year 1923 : 1 entries
FORD, year 1927 : 1 entries
FORD, year 1928 : 1 entries
FORD, year 1932 : 2 entries
BMW, year 1937 : 2 entries
AUDI, year 1938 : 1 entries
OPEL, year 1938 : 1 entries
FORD, year 1939 : 1 entries
OPEL, year 1939 : 1 entries
BMW, year 1947 : 1 entries
BMW, year 1948 : 1 entries
BMW, year 1949 : 2 entries
FORD, year 1949 : 1 entries
BMW, year 1950 : 2 entries
FORD, year 1950 : 1 entries
BMW, year 1951 : 1 entries
CHEVROLET, year 1951 : 1 entries
OPEL, year 1954 : 1 entries
CHEVROLET, year 1955 : 1 entries
PORSCHE, year 1956 : 1 entries
FORD, year 1957 : 1 entries
CHEVROLET, year 1959 : 1 entries
PORSCHE, year 1959 : 1 entries
SKODA, year 1960 : 1 entries
CHEVROLET, year 1964 : 1 entries
PORSCHE, year 1964 : 1 entries
VOLKSWAGEN, year 1964 : 1 entries
VOLKSWAGEN, year 1965 : 1 entries
CITROEN, year 1967 : 1 entries
BMW, year 1969 : 1 entries
OPEL, year 1969 : 1 entries
CHEVROLET, year 1970 : 2 entries
FORD, year 1970 : 1 entries
CHEVROLET, year 1971 : 1 entries


TOYOTA, year 1996 : 3 entries
VOLKSWAGEN, year 1996 : 22 entries
VOLVO, year 1996 : 8 entries
AUDI, year 1997 : 27 entries
BMW, year 1997 : 37 entries
CHEVROLET, year 1997 : 9 entries
FORD, year 1997 : 24 entries
HONDA, year 1997 : 22 entries
HYUNDAI, year 1997 : 11 entries
JEEP, year 1997 : 3 entries
KIA, year 1997 : 7 entries
LEXUS, year 1997 : 3 entries
MAZDA, year 1997 : 12 entries
MITSUBISHI, year 1997 : 26 entries
NISSAN, year 1997 : 37 entries
OPEL, year 1997 : 34 entries
PEUGEOT, year 1997 : 3 entries
RENAULT, year 1997 : 8 entries
SKODA, year 1997 : 20 entries
SUBARU, year 1997 : 13 entries
SUZUKI, year 1997 : 5 entries
TOYOTA, year 1997 : 38 entries
TOYOTA, year 1997 : 32 entries
VOLKSWAGEN, year 1997 : 35 entries
VOLVO, year 1997 : 13 entries
AUDI, year 1998 : 37 entries
AUDI, year 1998 : 1 entries
BMW, year 1998 : 37 entries
BMW, year 1998 : 3 entries
CHEVROLET, year 1998 : 5 entries
CITROEN, year 1998 : 3 entries
FORD, year 1998 : 15 entries
HONDA, year 1998 : 29 entries
H

HYUNDAI, year 2005 : 13 entries
INFINITI, year 2005 : 14 entries
JEEP, year 2005 : 7 entries
KIA, year 2005 : 37 entries
KIA, year 2005 : 14 entries
LEXUS, year 2005 : 19 entries
MINI, year 2005 : 3 entries
MAZDA, year 2005 : 38 entries
MAZDA, year 2005 : 33 entries
MITSUBISHI, year 2005 : 38 entries
MITSUBISHI, year 2005 : 37 entries
MITSUBISHI, year 2005 : 37 entries
MITSUBISHI, year 2005 : 15 entries
NISSAN, year 2005 : 38 entries
NISSAN, year 2005 : 37 entries
NISSAN, year 2005 : 19 entries
OPEL, year 2005 : 14 entries
PEUGEOT, year 2005 : 20 entries
PORSCHE, year 2005 : 16 entries
RENAULT, year 2005 : 38 entries
SKODA, year 2005 : 14 entries
SUBARU, year 2005 : 23 entries
SUZUKI, year 2005 : 23 entries
TOYOTA, year 2005 : 37 entries
TOYOTA, year 2005 : 14 entries
VOLKSWAGEN, year 2005 : 37 entries
VOLKSWAGEN, year 2005 : 37 entries
VOLKSWAGEN, year 2005 : 7 entries
VOLVO, year 2005 : 23 entries
AUDI, year 2006 : 37 entries
AUDI, year 2006 : 37 entries
AUDI, year 2006 : 3 entries
B

SKODA, year 2008 : 1 entries
SUBARU, year 2008 : 35 entries
SUZUKI, year 2008 : 38 entries
SUZUKI, year 2008 : 37 entries
SUZUKI, year 2008 : 6 entries
TOYOTA, year 2008 : 38 entries
TOYOTA, year 2008 : 37 entries
TOYOTA, year 2008 : 37 entries
TOYOTA, year 2008 : 37 entries
TOYOTA, year 2008 : 37 entries
TOYOTA, year 2008 : 37 entries
TOYOTA, year 2008 : 8 entries
VOLKSWAGEN, year 2008 : 38 entries
VOLKSWAGEN, year 2008 : 37 entries
VOLKSWAGEN, year 2008 : 37 entries
VOLKSWAGEN, year 2008 : 37 entries
VOLKSWAGEN, year 2008 : 37 entries
VOLKSWAGEN, year 2008 : 37 entries
VOLKSWAGEN, year 2008 : 13 entries
VOLVO, year 2008 : 38 entries
VOLVO, year 2008 : 37 entries
VOLVO, year 2008 : 13 entries
AUDI, year 2009 : 38 entries
AUDI, year 2009 : 37 entries
AUDI, year 2009 : 33 entries
BMW, year 2009 : 37 entries
BMW, year 2009 : 37 entries
BMW, year 2009 : 37 entries
BMW, year 2009 : 37 entries
BMW, year 2009 : 5 entries
CHEVROLET, year 2009 : 38 entries
CHEVROLET, year 2009 : 37 entries
CHE

BMW, year 2012 : 4 entries
CHEVROLET, year 2012 : 38 entries
CHEVROLET, year 2012 : 37 entries
CHEVROLET, year 2012 : 37 entries
CHEVROLET, year 2012 : 37 entries
CHEVROLET, year 2012 : 37 entries
CHEVROLET, year 2012 : 37 entries
CHEVROLET, year 2012 : 37 entries
CHEVROLET, year 2012 : 19 entries
CITROEN, year 2012 : 37 entries
CITROEN, year 2012 : 37 entries
CITROEN, year 2012 : 13 entries
FORD, year 2012 : 38 entries
FORD, year 2012 : 37 entries
FORD, year 2012 : 37 entries
FORD, year 2012 : 37 entries
FORD, year 2012 : 37 entries
FORD, year 2012 : 20 entries
HONDA, year 2012 : 37 entries
HONDA, year 2012 : 16 entries
HYUNDAI, year 2012 : 38 entries
HYUNDAI, year 2012 : 37 entries
HYUNDAI, year 2012 : 37 entries
HYUNDAI, year 2012 : 37 entries
HYUNDAI, year 2012 : 37 entries
HYUNDAI, year 2012 : 37 entries
HYUNDAI, year 2012 : 15 entries
INFINITI, year 2012 : 37 entries
INFINITI, year 2012 : 24 entries
JEEP, year 2012 : 37 entries
JEEP, year 2012 : 30 entries
KIA, year 2012 : 38 ent

NISSAN, year 2014 : 37 entries
NISSAN, year 2014 : 37 entries
NISSAN, year 2014 : 37 entries
NISSAN, year 2014 : 37 entries
NISSAN, year 2014 : 37 entries
NISSAN, year 2014 : 2 entries
OPEL, year 2014 : 37 entries
OPEL, year 2014 : 37 entries
OPEL, year 2014 : 37 entries
OPEL, year 2014 : 37 entries
OPEL, year 2014 : 23 entries
PEUGEOT, year 2014 : 38 entries
PEUGEOT, year 2014 : 4 entries
PORSCHE, year 2014 : 37 entries
PORSCHE, year 2014 : 17 entries
RENAULT, year 2014 : 37 entries
RENAULT, year 2014 : 37 entries
RENAULT, year 2014 : 37 entries
RENAULT, year 2014 : 37 entries
RENAULT, year 2014 : 34 entries
SKODA, year 2014 : 38 entries
SKODA, year 2014 : 37 entries
SKODA, year 2014 : 37 entries
SKODA, year 2014 : 37 entries
SKODA, year 2014 : 37 entries
SKODA, year 2014 : 24 entries
SUBARU, year 2014 : 29 entries
SUZUKI, year 2014 : 31 entries
TOYOTA, year 2014 : 38 entries
TOYOTA, year 2014 : 37 entries
TOYOTA, year 2014 : 37 entries
TOYOTA, year 2014 : 37 entries
TOYOTA, year 2014

SKODA, year 2017 : 37 entries
SKODA, year 2017 : 37 entries
SKODA, year 2017 : 37 entries
SKODA, year 2017 : 9 entries
SUBARU, year 2017 : 9 entries
SUZUKI, year 2017 : 8 entries
TOYOTA, year 2017 : 37 entries
TOYOTA, year 2017 : 37 entries
TOYOTA, year 2017 : 37 entries
TOYOTA, year 2017 : 37 entries
TOYOTA, year 2017 : 15 entries
VOLKSWAGEN, year 2017 : 38 entries
VOLKSWAGEN, year 2017 : 37 entries
VOLKSWAGEN, year 2017 : 37 entries
VOLKSWAGEN, year 2017 : 37 entries
VOLKSWAGEN, year 2017 : 37 entries
VOLKSWAGEN, year 2017 : 37 entries
VOLKSWAGEN, year 2017 : 37 entries
VOLKSWAGEN, year 2017 : 2 entries
VOLVO, year 2017 : 38 entries
VOLVO, year 2017 : 11 entries
AUDI, year 2018 : 37 entries
AUDI, year 2018 : 37 entries
AUDI, year 2018 : 4 entries
BMW, year 2018 : 38 entries
BMW, year 2018 : 37 entries
BMW, year 2018 : 37 entries
BMW, year 2018 : 37 entries
BMW, year 2018 : 37 entries
BMW, year 2018 : 37 entries
BMW, year 2018 : 37 entries
BMW, year 2018 : 22 entries
CHEVROLET, year 2

KIA, year 2020 : 37 entries
KIA, year 2020 : 37 entries
KIA, year 2020 : 37 entries
KIA, year 2020 : 37 entries
KIA, year 2020 : 37 entries
KIA, year 2020 : 37 entries
KIA, year 2020 : 37 entries
KIA, year 2020 : 37 entries
KIA, year 2020 : 37 entries
KIA, year 2020 : 37 entries
KIA, year 2020 : 37 entries
KIA, year 2020 : 37 entries
KIA, year 2020 : 37 entries
KIA, year 2020 : 37 entries
KIA, year 2020 : 37 entries
KIA, year 2020 : 5 entries
LEXUS, year 2020 : 37 entries
LEXUS, year 2020 : 37 entries
LEXUS, year 2020 : 37 entries
LEXUS, year 2020 : 10 entries
MINI, year 2020 : 38 entries
MINI, year 2020 : 37 entries
MINI, year 2020 : 37 entries
MINI, year 2020 : 37 entries
MINI, year 2020 : 37 entries
MINI, year 2020 : 37 entries
MINI, year 2020 : 8 entries
MAZDA, year 2020 : 37 entries
MAZDA, year 2020 : 37 entries
MAZDA, year 2020 : 37 entries
MAZDA, year 2020 : 37 entries
MAZDA, year 2020 : 37 entries
MAZDA, year 2020 : 27 entries
MITSUBISHI, year 2020 : 37 entries
MITSUBISHI, year

SUZUKI, year 2021 : 37 entries
SUZUKI, year 2021 : 37 entries
SUZUKI, year 2021 : 37 entries
SUZUKI, year 2021 : 15 entries
TOYOTA, year 2021 : 37 entries
TOYOTA, year 2021 : 15 entries
VOLKSWAGEN, year 2021 : 37 entries
VOLKSWAGEN, year 2021 : 37 entries
VOLKSWAGEN, year 2021 : 37 entries
VOLKSWAGEN, year 2021 : 37 entries
VOLKSWAGEN, year 2021 : 37 entries
VOLKSWAGEN, year 2021 : 37 entries
VOLKSWAGEN, year 2021 : 37 entries
VOLKSWAGEN, year 2021 : 37 entries
VOLKSWAGEN, year 2021 : 37 entries
VOLKSWAGEN, year 2021 : 37 entries
VOLKSWAGEN, year 2021 : 37 entries
VOLKSWAGEN, year 2021 : 37 entries
VOLKSWAGEN, year 2021 : 37 entries
VOLKSWAGEN, year 2021 : 37 entries
VOLKSWAGEN, year 2021 : 37 entries
VOLKSWAGEN, year 2021 : 37 entries
VOLKSWAGEN, year 2021 : 37 entries
VOLKSWAGEN, year 2021 : 37 entries
VOLKSWAGEN, year 2021 : 37 entries
VOLKSWAGEN, year 2021 : 37 entries
VOLKSWAGEN, year 2021 : 37 entries
VOLKSWAGEN, year 2021 : 30 entries
VOLVO, year 2021 : 37 entries
VOLVO, year 20

In [10]:
len(j)

60488

In [11]:
import json

def process_json_array(f):
    """
    This function processes the contents of a json file (please see save_to_json for details) to extract only the required
    attributes
    """
    with open(f, "r") as f:
        data = json.load(f)
    
    return data

In [13]:
from_json = process_json_array('data 3.json')

In [15]:
from_json[0]

{'availability': 'IN_STOCK',
 'category': 'cars',
 'color_hex': '007F00',
 'delivery_info': {},
 'description': '1 октября 1908 Форд воплотил в жизнь свою мечту создать "автомобиль для народа" выпустив модель Т - надежный и недорогой автомобиль , способный пройти там, где другие машины застревали в дорожной грязи.\nв первый год производства было продано 10 660 машин.\nАвтомобиль в идеальном состоянии.\nОтносится к категории культурных ценностей.\nДанный автомобиль Вы можете обменять на свой.\n\nВозможен торг.',
 'discount_options': {},
 'documents': {'owners_number': 1,
  'pts_original': True,
  'custom_cleared': True,
  'year': 1923,
  'pts': 'ORIGINAL'},
 'is_favorite': False,
 'old_category_id': 15,
 'owner_expenses': {'transport_tax': {'tax_by_year': 240,
   'year': 2020,
   'rid': 213,
   'rate': 12,
   'horse_power': 20,
   'holding_period_month': 12,
   'boost': 1},
  'osago_insurance': [{'insurance_company': 'TINKOFF', 'price': 2366}]},
 'section': 'used',
 'seller_type': 'COMM

In [16]:
from_json[60000]

{'availability': 'ON_ORDER',
 'category': 'cars',
 'color_hex': 'FAFBFB',
 'delivery_info': {},
 'description': 'Пакет "Зимние технологии"\nМногофункциональное рулевое колесо с кожаной отделкой и подогревом\nПодогрев передних сидений\nПодогрев боковых задних сидений\nИндикатор низкого уровня жидкости омывателя\nПодогрев форсунок омывателя\nЭлектроскладываемые наружные зеркала заднего вида с электрорегулировками и подогревом\nТеплоизолирующее лобовое стекло с электроподогревом\n\nВнешнее оборудование\n----------------------------\nЗадние стекла с тонировкой (65% светопоглощение)\nСветодиодные фары ближнего и дальнего света рефлекторного типа с отдельными дневными ходовыми огнями\nСветодиодные задние фонари\nРейлинги на крыше серебристого цвета\nХромированная окантовка боковых окон\nЛегкосплавные колесные диски "Montana" 7J x 17", шины 215/65 R17\n\nИнтерьер\n----------------------------\nТканевая отделка "Shooting Star"\nКарманы в спинках передних сидений\nДекоративные вставки Status\nС

In [17]:
from_json[0]['vehicle_info']['configuration']['human_name']

'Кабриолет'

In [18]:
from_json[60000]['vehicle_info']['configuration']['human_name']

'Внедорожник 5 дв.'

In [19]:
# Took the idea from https://www.kaggle.com/sokolovaleks/sf-dst-10-p1-parsing-almira-andrey-sokolov
# and changed it to fit my requirements

# dictionary of the columns we need ( +/- what we have in test.csv)
dict_columns = {'bodyType':0, 'brand':0, 'color_hex':0, 'complectation_dict':0, 'equipment_dict':0, 
                'fuelType':0, 'modelDate':0, 'model_info':0, 'model_name':0, 'name':0, 'numberOfDoors':0, 'productionDate':0, 
                'vehicleConfiguration':0, 'vehicleTransmission':0, 'vendor':0,    
                'engineDisplacement':0, 'enginePower':0, 'description':0, 
                'mileage':0, 'Привод':0, 'Руль':0, 
                'Состояние':0, 'Владельцы':0, 'ПТС':0, 'Таможня':0, 
                'Владение':0, 'price':0}

def from_elem_to_dict(d_elem, dict_cols, result):
    """
    This function will be used to map json fields to dict_columns while looping through the list of JSONs. 
    """
    d_dict = dict_columns.copy()
    
    try: d_dict['bodyType'] = d_elem['vehicle_info']['configuration']['human_name']
    except: d_dict['bodyType'] = None

    try: d_dict['brand'] = d_elem['vehicle_info']['mark_info']['code']
    except: d_dict['brand'] = None

    try: d_dict['color_hex'] = d_elem['color_hex']
    except: d_dict['color_hex'] = None  
        
    try: d_dict['complectation_dict'] = d_elem['vehicle_info']['complectation']
    except: d_dict['complectation_dict'] = None     
        
    try: d_dict['equipment_dict'] = d_elem['vehicle_info']['equipment']
    except: d_dict['equipment_dict'] = None           

    try: d_dict['fuelType'] = d_elem['lk_summary'].split()[-1]
    except: d_dict['fuelType'] = None

    try: d_dict['modelDate'] = d_elem['vehicle_info']['super_gen']['year_from']
    except: d_dict['modelDate'] = None
        
    try: d_dict['model_info'] = d_elem['vehicle_info']['model_info']
    except: d_dict['model_info'] = None        
        
    try: d_dict['model_name'] = d_elem['vehicle_info']['model_info']['ru_name']
    except: d_dict['model_name'] = None             

    try: d_dict['name'] =  d_elem['vehicle_info']['tech_param']['human_name']
    except: d_dict['name'] = None

    try: d_dict['numberOfDoors'] = d_elem['vehicle_info']['configuration']['doors_count']
    except: d_dict['numberOfDoors'] = None

    try: d_dict['productionDate'] = d_elem['documents']['year']
    except: d_dict['productionDate'] = None

    try: d_dict['vehicleTransmission'] = d_elem['vehicle_info']['tech_param']['transmission']
    except: d_dict['vehicleTransmission'] = None
        
    try: d_dict['vendor'] = d_elem['vehicle_info']['vendor']
    except: d_dict['vendor'] = None    

    try: d_dict['engineDisplacement'] = d_elem['vehicle_info']['tech_param']['human_name'].split()[0]
    except: d_dict['engineDisplacement'] = None

    try: d_dict['vehicleConfiguration'] = d_elem['vehicle_info']['configuration']
    except: d_dict['vehicleConfiguration'] = None

    try: d_dict['enginePower'] = d_elem['vehicle_info']['tech_param']['power']
    except: d_dict['enginePower'] = None

    try: d_dict['description'] = d_elem['description']
    except: d_dict['description'] = None

    try: d_dict['mileage'] = d_elem['state']['mileage']
    except: d_dict['mileage'] = None

    try: d_dict['Привод'] = d_elem['lk_summary'].split(', ')[-2]
    except: d_dict['Привод'] = None

    try: d_dict['Руль'] = d_elem['vehicle_info']['steering_wheel']
    except: d_dict['Руль'] = None

    try: d_dict['Состояние'] = d_elem['section']
    except: d_dict['Состояние'] = None

    try: d_dict['Владельцы'] = d_elem['documents']['owners_number']
    except: d_dict['Владельцы'] = None

    try: d_dict['ПТС'] = d_elem['documents']['pts']
    except: d_dict['ПТС'] = None

    try: d_dict['Таможня'] = d_elem['documents']['custom_cleared']
    except: d_dict['Таможня'] = None

    try: d_dict['Владение'] =  d_elem['documents']['purchase_date']
    except: d_dict['Владение'] = None

    try: d_dict['price'] = d_elem['price_info']['RUR']
    except: d_dict['price'] = None

    result.append(d_dict)

In [20]:
result = []

for i in from_json:
    from_elem_to_dict(d_elem=i, dict_cols=dict_columns, result=result)

In [21]:
df_result = pd.DataFrame(result)

In [22]:
df_result.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 60488 entries, 0 to 60487
Data columns (total 27 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   bodyType              60486 non-null  object 
 1   brand                 60488 non-null  object 
 2   color_hex             60488 non-null  object 
 3   complectation_dict    60488 non-null  object 
 4   equipment_dict        60488 non-null  object 
 5   fuelType              60488 non-null  object 
 6   modelDate             60486 non-null  float64
 7   model_info            60488 non-null  object 
 8   model_name            60488 non-null  object 
 9   name                  60486 non-null  object 
 10  numberOfDoors         60486 non-null  float64
 11  productionDate        60488 non-null  int64  
 12  vehicleConfiguration  60486 non-null  object 
 13  vehicleTransmission   60486 non-null  object 
 14  vendor                60488 non-null  object 
 15  engineDisplacement 

In [23]:
df_result.to_csv('own_train_set.csv')

In [60]:
df_result.to_excel('own_train_set.xlsx')