### Complete

In [1]:
import pandas as pd
import numpy as np
import zipfile
import xml.etree.ElementTree as ET
import os
import re

### Extract KMZ file into GeoJson


In [None]:


# --- Step 1: Unzip the KMZ ---
with zipfile.ZipFile('KSE_2019.kmz', 'r') as kmz:
    kmz.extract('doc.kml', 'output_folder')

# --- Step 2: Parse KML ---
kml_path = os.path.join('output_folder', 'doc.kml')
tree = ET.parse(kml_path)
root = tree.getroot()

# --- Step 3: Namespace ---
ns = {'kml': 'http://www.opengis.net/kml/2.2'}

# --- Step 4: Extract Data from Each Placemark ---
data = []

for placemark in root.findall('.//kml:Placemark', ns):
    entry = {}

    # --- Get <name> and try to split nickname ---
    name_elem = placemark.find('kml:name', ns)
    name_text = name_elem.text if name_elem is not None else "Unnamed"

    # Split: Main Name (Nickname)
    match = re.match(r"(.*?)\s*\((.*?)\)", name_text)
    if match:
        entry['main_name'] = match.group(1).strip()
        entry['nickname'] = match.group(2).strip()
    else:
        entry['main_name'] = name_text
        entry['nickname'] = None  # May still be in ExtendedData

    # --- Description ---
    description = placemark.find('kml:description', ns)
    entry['description'] = description.text.strip() if description is not None else "No description"

    # --- Geometry ---
    coords = None
    geom_type = None

    point = placemark.find('.//kml:Point', ns)
    if point is not None:
        geom_type = 'Point'
        coords = point.find('kml:coordinates', ns)

    line = placemark.find('.//kml:LineString', ns)
    if line is not None:
        geom_type = 'LineString'
        coords = line.find('kml:coordinates', ns)

    polygon = placemark.find('.//kml:Polygon', ns)
    if polygon is not None:
        geom_type = 'Polygon'
        coords = polygon.find('.//kml:coordinates', ns)

    coord_text = coords.text.strip() if coords is not None else "No coordinates"

    entry['geometry_type'] = geom_type if geom_type else "Unknown"
    entry['coordinates'] = coord_text

    # --- ExtendedData ---
    extended_data = placemark.find('kml:ExtendedData', ns)
    if extended_data is not None:
        for data_field in extended_data.findall('kml:Data', ns):
            key = data_field.get('name')
            value_elem = data_field.find('kml:value', ns)
            value = value_elem.text.strip() if value_elem is not None else ""
            entry[key] = value

            # Fallback: If Nickname appears in ExtendedData
            if key.lower() == 'nickname' and not entry['nickname']:
                entry['nickname'] = value

    # --- Print output ---
    print(f"--- {entry['main_name']} ---")
    print(f"Nickname: {entry['nickname']}")
    print(f"Type: {entry['geometry_type']}")
    print(f"Description: {entry['description']}")
    print(f"Coordinates:\n{entry['coordinates']}")
    print()

    data.append(entry)

# --- Step 5: Convert to DataFrame ---
df = pd.DataFrame(data)

# --- Optional: Show DataFrame head ---
print("DataFrame preview:")
print(df[['main_name', 'nickname', 'geometry_type', 'coordinates']].head())

--- Brusy ---
Nickname: None
Type: Point
Description: No description
Coordinates:
17.72635650711469,53.89990168007757,0

--- Bydgoszcz Błonie ---
Nickname: None
Type: Point
Description: No description
Coordinates:
17.95379761504704,53.11710975409892,0

--- Bydgoszcz Jachcice ---
Nickname: None
Type: Point
Description: No description
Coordinates:
17.98285348618702,53.13689597033368,0

--- Bydgoszcz Południe ---
Nickname: None
Type: Point
Description: No description
Coordinates:
18.06390347461197,53.1065035389906,0

--- Bydgoszcz Północ ---
Nickname: None
Type: Point
Description: No description
Coordinates:
18.02010366887875,53.13587277533668,0

--- Bydgoszcz Rupienica ---
Nickname: None
Type: Point
Description: No description
Coordinates:
18.02624583508158,53.11329687691467,0

--- Bydgoszcz Wschód ---
Nickname: None
Type: Point
Description: No description
Coordinates:
18.05971384939372,53.13230625468246,0

--- Bydgoszcz Śródmieście ---
Nickname: None
Type: Point
Description: No descript

In [12]:
# Step 1: Unzip KMZ file
with zipfile.ZipFile('KSE_2019.kmz', 'r') as kmz:
    kmz.extract('doc.kml', 'output_folder')
    
    
# Step 2: Parse KML
kml_path = os.path.join('output_folder', 'doc.kml')
tree = ET.parse(kml_path)
root = tree.getroot()


# --- Step 3: Namespace ---
ns = {'kml': 'http://www.opengis.net/kml/2.2'}

In [13]:
# Step 4: Find all unique field names in ExtendedData
fields = set()

for placemark in root.findall('.//kml:Placemark', ns):
    extended_data = placemark.find('kml:ExtendedData', ns)
    if extended_data is not None:
        for data in extended_data.findall('kml:Data', ns):
            name = data.get('name')
            if name:
                fields.add(name)

# Also check if <name> or <description> exist
fields.update(['name', 'description', 'geometry_type', 'coordinates'])

print("Detected fields in KMZ:")
for f in sorted(fields):
    print(f"- {f}")

Detected fields in KMZ:
- coordinates
- description
- geometry_type
- name


In [14]:
# --- Step 5: Extract Data from Each Placemark ---
data = []

for placemark in root.findall('.//kml:Placemark', ns):
    entry = {}

    name = placemark.find('kml:name', ns)
    description = placemark.find('kml:description', ns)

    entry['name'] = name.text if name is not None else "Unnamed"
    entry['description'] = description.text.strip() if description is not None else "No description"

    # Geometry types
    coords = None
    geom_type = None

    point = placemark.find('.//kml:Point', ns)
    if point is not None:
        geom_type = 'Point'
        coords = point.find('kml:coordinates', ns)

    line = placemark.find('.//kml:LineString', ns)
    if line is not None:
        geom_type = 'LineString'
        coords = line.find('kml:coordinates', ns)

    polygon = placemark.find('.//kml:Polygon', ns)
    if polygon is not None:
        geom_type = 'Polygon'
        coords = polygon.find('.//kml:coordinates', ns)

    coord_text = coords.text.strip() if coords is not None else "No coordinates"

    # Add geometry info to dict
    entry['geometry_type'] = geom_type if geom_type else "Unknown"
    entry['coordinates'] = coord_text

    # Print each placemark
    print(f"--- {entry['name']} ---")
    print(f"Type: {entry['geometry_type']}")
    print(f"Description: {entry['description']}")
    print(f"Coordinates:\n{entry['coordinates']}")
    
    # ExtendedData
    extended_data = placemark.find('kml:ExtendedData', ns)
    if extended_data is not None:
        for data_field in extended_data.findall('kml:Data', ns):
            key = data_field.get('name')
            value_elem = data_field.find('kml:value', ns)
            value = value_elem.text.strip() if value_elem is not None else ""
            entry[key] = value
            print(f"{key}: {value}")

    print()  # blank line after each placemark
    data.append(entry)

# --- Step 6: Convert to DataFrame ---
df = pd.DataFrame(data)

# --- Optional: Show DataFrame head ---
print("DataFrame preview:")
print(df.head())

--- Brusy ---
Type: Point
Description: No description
Coordinates:
17.72635650711469,53.89990168007757,0

--- Bydgoszcz Błonie ---
Type: Point
Description: No description
Coordinates:
17.95379761504704,53.11710975409892,0

--- Bydgoszcz Jachcice ---
Type: Point
Description: No description
Coordinates:
17.98285348618702,53.13689597033368,0

--- Bydgoszcz Południe ---
Type: Point
Description: No description
Coordinates:
18.06390347461197,53.1065035389906,0

--- Bydgoszcz Północ ---
Type: Point
Description: No description
Coordinates:
18.02010366887875,53.13587277533668,0

--- Bydgoszcz Rupienica ---
Type: Point
Description: No description
Coordinates:
18.02624583508158,53.11329687691467,0

--- Bydgoszcz Wschód ---
Type: Point
Description: No description
Coordinates:
18.05971384939372,53.13230625468246,0

--- Bydgoszcz Śródmieście ---
Type: Point
Description: No description
Coordinates:
18.01548442733492,53.12185826110145,0

--- Cementownia Bielawy ---
Type: Point
Description: No descript

In [17]:
df.head()

Unnamed: 0,name,description,geometry_type,coordinates
0,Brusy,No description,Point,"17.72635650711469,53.89990168007757,0"
1,Bydgoszcz Błonie,No description,Point,"17.95379761504704,53.11710975409892,0"
2,Bydgoszcz Jachcice,No description,Point,"17.98285348618702,53.13689597033368,0"
3,Bydgoszcz Południe,No description,Point,"18.06390347461197,53.1065035389906,0"
4,Bydgoszcz Północ,No description,Point,"18.02010366887875,53.13587277533668,0"


In [21]:
# Function to safely extract lon/lat from coordinate string
def extract_lon_lat(coord_str):
    try:
        if coord_str and ',' in coord_str:
            parts = coord_str.strip().split()
            first = parts[0].split(',')  # First point in case of multiple
            if len(first) >= 2:
                return float(first[0]), float(first[1])
    except Exception as e:
        return None, None
    return None, None

# Apply function row-wise
df['lon'], df['lat'] = zip(*df['coordinates'].map(extract_lon_lat))

# Optional: filter out invalid rows
df_clean = df.dropna(subset=['lon', 'lat'])

# Preview
print(df_clean[['name', 'lon', 'lat']].head())

                 name        lon        lat
0               Brusy  17.726357  53.899902
1    Bydgoszcz Błonie  17.953798  53.117110
2  Bydgoszcz Jachcice  17.982853  53.136896
3  Bydgoszcz Południe  18.063903  53.106504
4    Bydgoszcz Północ  18.020104  53.135873


In [22]:
df_clean

Unnamed: 0,name,description,geometry_type,coordinates,lon,lat
0,Brusy,No description,Point,"17.72635650711469,53.89990168007757,0",17.726357,53.899902
1,Bydgoszcz Błonie,No description,Point,"17.95379761504704,53.11710975409892,0",17.953798,53.117110
2,Bydgoszcz Jachcice,No description,Point,"17.98285348618702,53.13689597033368,0",17.982853,53.136896
3,Bydgoszcz Południe,No description,Point,"18.06390347461197,53.1065035389906,0",18.063903,53.106504
4,Bydgoszcz Północ,No description,Point,"18.02010366887875,53.13587277533668,0",18.020104,53.135873
...,...,...,...,...,...,...
5171,EC Zabrze,No description,Point,"18.81166540001864,50.2994758625995,0",18.811665,50.299476
5172,EC Miechowice,No description,Point,"18.84257612457504,50.34877087677186,0",18.842576,50.348771
5173,EC Katowice,No description,Point,"19.05371355340065,50.28549361257107,0",19.053714,50.285494
5174,EC Chorzów,No description,Point,"18.96941159329202,50.30768336930052,0",18.969412,50.307683


In [23]:
import json

def parse_geometry(geometry_type, coordinates):
    try:
        # Clean up coordinate string
        coord_sets = coordinates.strip().split()
        coords = [list(map(float, coord.split(',')[:2])) for coord in coord_sets]

        if geometry_type == 'Point':
            return {
                "type": "Point",
                "coordinates": coords[0]
            }
        elif geometry_type == 'LineString':
            return {
                "type": "LineString",
                "coordinates": coords
            }
        elif geometry_type == 'Polygon':
            return {
                "type": "Polygon",
                "coordinates": [coords]  # One ring
            }
    except Exception as e:
        print(f"Failed parsing geometry: {e}")
        return None
    return None

# Build GeoJSON FeatureCollection
features = []

for _, row in df.iterrows():
    geometry = parse_geometry(row.get('geometry_type'), row.get('coordinates', ''))
    if geometry is None:
        continue

    properties = row.drop(['coordinates', 'geometry_type']).to_dict()

    feature = {
        "type": "Feature",
        "geometry": geometry,
        "properties": properties
    }
    features.append(feature)

geojson = {
    "type": "FeatureCollection",
    "features": features
}

# Save to file
with open("output.geojson", "w", encoding="utf-8") as f:
    json.dump(geojson, f, indent=2)

print("✅ GeoJSON exported to output.geojson")

✅ GeoJSON exported to output.geojson


In [25]:
df

Unnamed: 0,main_name,nickname,description,geometry_type,coordinates
0,Brusy,,No description,Point,"17.72635650711469,53.89990168007757,0"
1,Bydgoszcz Błonie,,No description,Point,"17.95379761504704,53.11710975409892,0"
2,Bydgoszcz Jachcice,,No description,Point,"17.98285348618702,53.13689597033368,0"
3,Bydgoszcz Południe,,No description,Point,"18.06390347461197,53.1065035389906,0"
4,Bydgoszcz Północ,,No description,Point,"18.02010366887875,53.13587277533668,0"
...,...,...,...,...,...
5171,EC Zabrze,,No description,Point,"18.81166540001864,50.2994758625995,0"
5172,EC Miechowice,,No description,Point,"18.84257612457504,50.34877087677186,0"
5173,EC Katowice,,No description,Point,"19.05371355340065,50.28549361257107,0"
5174,EC Chorzów,,No description,Point,"18.96941159329202,50.30768336930052,0"


### Convert excel file into GEOJson

In [5]:
import geopandas as gpd
from shapely.geometry import Point

In [6]:
df_1 = pd.read_excel('dolnoslaskie.xlsx', sheet_name='power station')

In [7]:
df_1

Unnamed: 0,wkt_geom,fid,name,lon,lat,group,region
0,Point (15.35727678046260003 51.00728383804123212),3,Bartoszówka,15.357277,51.007284,Grupa Mikułowa,Oddział Jelenia Góra
1,Point (16.95677126142697944 51.04026798157452305),100,Bielany,16.956771,51.040268,Grupa Klecina,Oddział Wrocław
2,Point (16.61140380250553861 50.68939565192407315),71,Bielawa,16.611404,50.689396,Grupa Ząbkowice,Oddział Wałbrzych
3,Point (16.90302283231214986 51.02019054702402201),134,Biskupice [BSP],16.903023,51.020191,Grupa Klecina,Oddział Wrocław
4,Point (16.19054027301188015 50.75280485196915237),135,Boguszów [BOG],16.190540,50.752805,Grupa Boguszów,Oddział Wałbrzych
...,...,...,...,...,...,...,...
142,Point (17.05438432126165083 50.61039872557247321),97,Ziębice,17.054384,50.610399,Grupa Ząbkowice,Oddział Wałbrzych
143,Point (15.9188749936847902 51.11385095277329782),67,Złotoryja,15.918875,51.113851,Grupa Pawłowice,Oddział Legnica
144,Point (16.92322364393972123 51.46862150124397317),132,Żmigród,16.923224,51.468622,Grupa Pasikurowice,Oddział Wrocław
145,Point (17.03163381898551876 50.98237167108148071),133,Żórawina,17.031634,50.982372,,


In [9]:

# Create geometry from lon and lat
geometry = [Point(xy) for xy in zip(df_1['lon'], df_1['lat'])]

# Create GeoDataFrame
gdf = gpd.GeoDataFrame(df_1, geometry=geometry)

# Set the CRS to WGS84
gdf.set_crs(epsg=4326, inplace=True)

# Export to GeoJSON
gdf.to_file("dolnoslaskie_station.geojson", driver="GeoJSON")