At first we convert a raw .json filr to .csv

In [1]:
import pandas as pd
import json
from pathlib import Path

# --- Path handling for Jupyter Notebooks ---
# Get the current working directory (we assume this is now the 'post_offices' folder)
CURRENT_DIR = Path.cwd()

# Define file paths relative to the notebook's new location
json_file_name = CURRENT_DIR / 'sources' / 'raw_post.json'
csv_file_name = CURRENT_DIR / 'sources' / 'deutschepost_raw.csv'
# -----------------------------------------------------------

try:
    # Open and load the entire JSON file
    with open(json_file_name, encoding='utf-8') as f:
        data = json.load(f)

    # Use the correct key to access the list of data
    list_of_records = data['pfLocations']

    # Create a DataFrame from this list
    df = pd.DataFrame(list_of_records)
    
    # Save the DataFrame to CSV
    df.to_csv(csv_file_name, index=False, encoding='utf-8-sig')

    print(f"✅ File '{json_file_name.name}' successfully converted to '{csv_file_name.name}'")

except FileNotFoundError:
    print(f"❌ ERROR: File '{json_file_name.name}' not found. Make sure it is in the 'sources' subfolder.")
except KeyError:
    print(f"❌ KEY ERROR: Key 'pfLocations' not found! Please check the JSON file again.")
except Exception as e:
    print(f"❌ An unexpected error occurred: {e}")

✅ File 'raw_post.json' successfully converted to 'deutschepost_raw.csv'


Than we're going to extract opening hours and rename days on the week from numbers to names. Also we extracting geo coordinates.

In [6]:
import ast # This library safely converts a string into a Python object (a list)

# --- SETTINGS (Adapted for Jupyter Notebooks) ---
# Get the current working directory (we assume this is the 'post_offices' folder)
#CURRENT_DIR = Path.cwd()

# Define file names
input_file_name = 'deutschepost_raw.csv'
output_file_name = 'deutschepost_raw_opening_hours_geo.csv'

# Build the full paths relative to the notebook's location
# The script looks for the input file in the 'sources' subfolder
input_csv_file = CURRENT_DIR / 'sources' / input_file_name
output_csv_file = CURRENT_DIR / 'sources' / output_file_name

# Name of the column to process
column_to_process = 'pfTimeinfos'
# --------------------


def parse_opening_hours(data_list):
    """
    This function takes a list of dictionaries and extracts
    only the opening hours (OPENINGHOUR) for each day of the week.
    """
    # ... (The rest of the function remains unchanged)
    if not isinstance(data_list, list):
        return {}
    
    opening_hours = {}
    for item in data_list:
        if item.get('type') == 'OPENINGHOUR':
            weekday = item.get('weekday')
            time_from = item.get('timefrom', 'N/A')
            time_to = item.get('timeto', 'N/A')
            opening_hours[weekday] = f"{time_from}-{time_to}"
            
    return opening_hours

# 1. Read the source CSV file
df = pd.read_csv(input_csv_file)

# 2. Process the column
df['opening_hours_dict'] = df[column_to_process].apply(ast.literal_eval).apply(parse_opening_hours)

# 3. Convert the dictionary with hours into separate columns
hours_df = df['opening_hours_dict'].apply(pd.Series)

# 4. Rename the columns for clarity
weekday_map = {
    1: 'Monday',
    2: 'Tuesday',
    3: 'Wednesday',
    4: 'Thursday',
    5: 'Friday',
    6: 'Saturday',
    7: 'Sunday'
}
hours_df = hours_df.rename(columns=weekday_map)

# 5. Concatenate the new columns with the original DataFrame
df_final = pd.concat([df, hours_df], axis=1)

# The temporary dictionary column can be dropped if it is not needed
df_final = df_final.drop(columns=['opening_hours_dict'])

# 6. Save the result to a new file
df_final.to_csv(output_csv_file, index=False, encoding='utf-8-sig')

print(f"✅ Done! Data has been processed and saved to '{output_file_name}' in the 'sources' folder.")

✅ Done! Data has been processed and saved to 'deutschepost_raw_opening_hours_geo.csv' in the 'sources' folder.
