In [31]:
import requests
import urllib3
from urllib3.util.ssl_ import create_urllib3_context
from requests.adapters import HTTPAdapter
import pandas as pd
import os
from tqdm import tqdm
import zipfile
from urllib.parse import urlparse

# Disable SSL verification warnings
urllib3.disable_warnings()

class CustomAdapter(HTTPAdapter):
    def init_poolmanager(self, *args, **kwargs):
        # Create custom SSL context with lower security requirements
        context = create_urllib3_context()
        # The following line allows weaker DH keys
        context.set_ciphers('DEFAULT@SECLEVEL=1')
        kwargs['ssl_context'] = context
        return super().init_poolmanager(*args, **kwargs)
    
def fetch_api_data(url):
    session = requests.Session()
    session.mount('https://', CustomAdapter())
    try:
        response = session.get(url)
        response.raise_for_status()
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"Request failed: {e}")
        return None
    
def download_file(url, output_dir='downloads'):
    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)
    
    # Get filename from URL
    filename = os.path.basename(urlparse(url).path)
    filepath = os.path.join(output_dir, filename)
    
    # Download with progress bar
    try:
        response = requests.get(url, stream=True)
        response.raise_for_status()
        
        total_size = int(response.headers.get('content-length', 0))
        
        with open(filepath, 'wb') as f, tqdm(
            desc=filename,
            total=total_size,
            unit='iB',
            unit_scale=True,
            unit_divisor=1024,
        ) as pbar:
            for data in response.iter_content(chunk_size=1024):
                size = f.write(data)
                pbar.update(size)
                
        # Extract if zip file
        if filename.endswith('.zip'):
            with zipfile.ZipFile(filepath, 'r') as zip_ref:
                zip_ref.extractall(output_dir)
                
        return filepath
        
    except requests.RequestException as e:
        print(f"Error downloading {url}: {e}")
        return None

# Create session with custom adapter
session = requests.Session()
session.mount('https://', CustomAdapter())



In [5]:
# Pobieranie kodow kazdego wojewodztwa

url_wojewodztwa = 'https://api.cepik.gov.pl/slowniki/wojewodztwa'
wojewodztwa_data = fetch_api_data(url_wojewodztwa)
wojewodztwa_data = wojewodztwa_data['data']['attributes']['dostepne-rekordy-slownika']

for item in wojewodztwa_data:
    df_wojewodztwa = pd.DataFrame(wojewodztwa_data)
    df_wojewodztwa.to_csv('data/wojewodztwa.csv', index=False, encoding='utf-8')


In [3]:
url = (
    'https://api.cepik.gov.pl/pojazdy'
    '?wojewodztwo=30'
    '&data-od=20240101'
    '&data-do=20241231'
    '&filter[rodzaj-paliwa]=ENERGIA%20ELEKTRYCZNA'
)


vehicle_data = fetch_api_data(url)
# vehicle_data


In [19]:
# Pliki ze wszystkimi pojazdami

url = 'https://api.cepik.gov.pl/pliki'
pliki_data = fetch_api_data(url)


In [None]:
wojewodztwo = pliki_data['data']
links = []
for item in wojewodztwo:
    print(item['attributes']['url-do-pliku'])


In [41]:
file = 'data/downloads/pojazdy_14_2022-04-17/pojazdy_14_2022-04-17.csv'
# Read the CSV file
df = pd.read_csv(file)
print(df.head())
print("\nDataset Info:")
print(df.info())

  df = pd.read_csv(file)


        pojazd_id       marka  kategoria  typ                 model   wariant  \
0  30637564105351       SKODA       13.0   1Z               OCTAVIA  AABSEX01   
1  77618647702718         NaN        NaN  NaN  POLSKI FIAT 126P 650       NaN   
2  97632081219436  VOLKSWAGEN        NaN  NaN                  1200       NaN   
3  13631562009226         NaN        NaN  NaN                   102       NaN   
4  75774339262414      DAEWOO        NaN  NaN               TICO DX       NaN   

         wersja            rodzaj       podrodzaj przeznaczenie  ...  \
0  NFM5FM5AF006  SAMOCHÓD OSOBOWY  KARETA (SEDAN)           ---  ...   
1           NaN  SAMOCHÓD OSOBOWY            INNY           ---  ...   
2           NaN  SAMOCHÓD OSOBOWY  KARETA (SEDAN)           ---  ...   
3           NaN  SAMOCHÓD OSOBOWY  KARETA (SEDAN)           ---  ...   
4           NaN  SAMOCHÓD OSOBOWY            INNY           ---  ...   

  siedziba_wlasciciela_woj siedziba_wlasciciela_pow  \
0              MAZOWIECKI

In [43]:

new_df = df[df['rodzaj_paliwa'] == 'ENERGIA ELEKTRYCZNA']
# new_df = new_df[new_df['rodzaj'] == 'SAMOCHÓD OSOBOWY']
new_df.to_csv('data/elektryczne14.csv', index=False, encoding='utf-8')