This code appears to be scraping data from the HeadHunter (hh.ru) API and saving it to an Excel file using the openpyxl library.

The code first sets up the request URL and creates an Excel file with several worksheet headers. It then initializes some variables for pagination, such as the page number, number of items per page, and row number for the worksheet.

The code then makes a request to the HeadHunter API to get a list of all available areas and loops through each area. For each area, it makes requests to the HeadHunter API to retrieve data for that specific area and paginates through the results. It then adds the data to the worksheet, starting at the second row and moving down for each item.

Finally, the code saves the Excel file.

In [None]:
import math
import openpyxl
import requests

# Set the base URL and parameters for the request
base_url = 'https://api.hh.ru/vacancies'

# Create an Excel file and add a worksheet
wb = openpyxl.Workbook()
ws = wb.active

# Add the headers to the worksheet
ws.cell(row=1, column=1).value = 'Title'
ws.cell(row=1, column=2).value = 'Description'
ws.cell(row=1, column=3).value = 'Requirements'
ws.cell(row=1, column=4).value = 'Responsibilities'
ws.cell(row=1, column=5).value = 'Salary'
ws.cell(row=1, column=6).value = 'Currency'
ws.cell(row=1, column=7).value = 'Employer'
ws.cell(row=1, column=8).value = 'Location'
ws.cell(row=1, column=9).value = 'Industry'
ws.cell(row=1, column=10).value = 'Specializations'
ws.cell(row=1, column=11).value = 'Type'
ws.cell(row=1, column=12).value = 'URL'

# Set the initial values for the pagination variables
page = 0
per_page = 100
found = 10000 # Set the initial value of 'found' to a large number
row = 2  # Start adding data to the second row

# Get a list of all available areas
areas_url = 'https://api.hh.ru/areas'
response = requests.get(areas_url, verify=False)
areas = response.json()


# Loop through the areas
for area in areas:
    # Set the initial page number to 0
    page = 0
    
    # Set the total number of pages to retrieve
    total_pages = math.ceil(found / per_page)
          
    # Loop through the pages
    for page in range(total_pages):
        params = {
            'text': 'tableau',
            'area': area['id'],  # Set the area to the current area
            'per_page': str(per_page),
            'page': str(page)
        }
        
        # Make the API request
        response = requests.get(base_url, params=params, verify=False)
                
        # Print the status code and the raw JSON response
        #print(f'Status code: {response.status_code}')
        
        # Check if the request was successful
        if response.status_code != 200:
            print(f'Request failed with status code {response.status_code}')
            break
        
        # Convert the response to JSON
        data = response.json()
                
            # Print the value of the 'found' key
            #print(f'Found: {data.get("found")}')
        
        # Check if the 'found' key is present in the data dictionary
        if 'found' in data:
            found = data['found']
        else:
            print("'found' key not found in data dictionary.")
            break
        
        # Update the total number of pages to retrieve
        total_pages = math.ceil(found / per_page)
        
        # Check if the 'items' key is empty or if there are no more results
        #if not data['items'] or found == 0:
        #    break
               
            # Print the total number of pages
            #print(f'Total pages: {total_pages}')
        
        # Add the data to the worksheet
        for item in data['items']:
            ws.cell(row=row, column=1).value = item['name']
            #ws.cell(row=row, column=2).value = item['description']
            #ws.cell(row=row, column=3).value = item['requirement']
            #ws.cell(row=row, column=4).value = item['responsibility']

            if item['salary'] is not None:
                salary_from = item['salary'].get('from')
                salary_to = item['salary'].get('to')
                salary_currency = item['salary']['currency']

                if salary_from and salary_to:
                    salary = f'{salary_from}-{salary_to}'
                    currency = salary_currency
                elif salary_from:
                    salary = f'{salary_from}+'
                    currency = salary_currency
                elif salary_to:
                    salary = f'up to {salary_to}'
                    currency = salary_currency
                else:
                    salary = None #'Not specified'
                    currency = None #'Not specified'
            else:
                salary = None #'Not specified'
                currency = None #'Not specified'

            ws.cell(row=row, column=5).value = salary
            ws.cell(row=row, column=6).value = currency
            ws.cell(row=row, column=7).value = item['employer']['name']
            ws.cell(row=row, column=8).value = item['area']['name']
            #ws.cell(row=row, column=9).value = item['industry']['name']
            #ws.cell(row=row, column=10).value = item['specialization']['name']
            ws.cell(row=row, column=11).value = item['type']['name']
            ws.cell(row=row, column=12).value = item['url']
            row += 1
        
            # Increment the page number
            page += 1
            
            # Increment the row number by the number of items in the data['items'] list
            #row += len(data['items'])

# Save the Excel file
wb.save('C:\\Users\\enizamov\\OneDrive - Luxoft\\Documents\\Private\\Python\\jobs.xlsx')           
