In [2]:
%pip install numbers-parser


Collecting numbers-parser
  Downloading numbers_parser-4.16.2-py3-none-any.whl.metadata (23 kB)
Collecting compact-json<2.0.0,>=1.1.3 (from numbers-parser)
  Downloading compact_json-1.8.1-py3-none-any.whl.metadata (11 kB)
Collecting protobuf<6.0,>=4.0 (from numbers-parser)
  Downloading protobuf-5.29.5-cp38-abi3-macosx_10_9_universal2.whl.metadata (592 bytes)
Collecting python-snappy<1.0,>=0.7 (from numbers-parser)
  Downloading python_snappy-0.7.3-py3-none-any.whl.metadata (1.3 kB)
Collecting sigfig<2.0.0,>=1.3.3 (from numbers-parser)
  Downloading sigfig-1.3.19-py3-none-any.whl.metadata (4.7 kB)
Collecting enum-tools>=0.11 (from numbers-parser)
  Downloading enum_tools-0.13.0-py3-none-any.whl.metadata (8.0 kB)
Collecting sortedcontainers<3.0.0,>=2.4.0 (from sigfig<2.0.0,>=1.3.3->numbers-parser)
  Using cached sortedcontainers-2.4.0-py2.py3-none-any.whl.metadata (10 kB)
Downloading numbers_parser-4.16.2-py3-none-any.whl (328 kB)
Downloading compact_json-1.8.1-py3-none-any.whl (16 kB)

In [4]:
from numbers_parser import Document
import pandas as pd
from pathlib import Path

numbers_path = Path("../map_data_demo.numbers").resolve()
output_path = numbers_path.with_suffix(".csv")

print(f"Reading Numbers file: {numbers_path}")
doc = Document(numbers_path)

sheet = doc.sheets[0]
table = sheet.tables[0]

rows = [row for row in table.rows(values_only=True) if any(cell is not None for cell in row)]
if not rows:
    raise ValueError("The first table in the Numbers file appears to be empty.")

header, *body = rows

df = pd.DataFrame(body, columns=header)
df.to_csv(output_path, index=False)

print(f"Exported CSV: {output_path}")
df.head()


Reading Numbers file: /Users/ujwalneethipudi/Downloads/European Political Tech Landscape/map_data_demo.numbers
Exported CSV: /Users/ujwalneethipudi/Downloads/European Political Tech Landscape/map_data_demo.csv


Unnamed: 0,S. No.,Entity,Logo,HQ,Domain,Description,Map Bucket 1,Map Bucket 2,Relevance Flag (Y/N),Sub Bucket,Tools/Products,Map Bucket 1 (Normalized)
0,1.0,Panterra Global,,United Kingdom,https://www.panterra.global/,Strategic advisory to governments/corporations...,Media & Messaging,,Y,,,Messaging & Media
1,3.0,influencethe.com,,Bulgaria,https://influencethe.com/,Disinformation detection & “community intellig...,Media & Messaging,,Y,Information Integrity / Disinformation,,Messaging & Media
2,4.0,Electify,,,,"Secure online election software; customizable,...",Voting Tech,,Y,,,Voting Tech
3,5.0,Genaios GmbH,,Germany,https://genaios.ai/,"AI-powered fact-checking, content originality ...",Media & Messaging,,Y,,,Messaging & Media
4,14.0,Pakflatt,,United Kingdom,https://www.pakflatt.com/,Manufacturer of polling booths and election-eq...,GovTech / Civic Infrastructure,Voting Tech,Y,,,GovTech / Civic Infrastructure


In [6]:
# Export grouped JSON for the app
import json
from pathlib import Path

# Columns expected in CSV
# 'Entity', 'Domain', 'Description', 'Map Bucket 1 (Normalized)'

csv_path = Path("../map_data_demo.csv").resolve()
src_json_out = Path("../src/data/companies.json").resolve()
public_json_out = Path("../public/companies.json").resolve()
src_json_out.parent.mkdir(parents=True, exist_ok=True)
public_json_out.parent.mkdir(parents=True, exist_ok=True)

print(f"Reading CSV: {csv_path}")
df = pd.read_csv(csv_path)

required_cols = [
    'Entity',
    'Domain',
    'Description',
    'Map Bucket 1 (Normalized)'
]
missing = [c for c in required_cols if c not in df.columns]
if missing:
    raise ValueError(f"Missing required columns in CSV: {missing}")

# Clean column names for consistency
clean = df[required_cols].copy()
clean = clean.rename(columns={
    'Entity': 'name',
    'Domain': 'domain',
    'Description': 'description',
    'Map Bucket 1 (Normalized)': 'category'
})

# Drop fully empty names or categories
clean = clean.dropna(subset=['name', 'category'])

# Group by category
grouped = {}
for cat, group in clean.groupby('category'):
    items = []
    for _, row in group.iterrows():
        item = {
            'name': str(row['name']).strip(),
            'domain': (str(row['domain']).strip() if pd.notna(row['domain']) else ''),
            'description': (str(row['description']).strip() if pd.notna(row['description']) else '')
        }
        items.append(item)
    grouped[cat] = items

for destination in [src_json_out, public_json_out]:
    with open(destination, 'w', encoding='utf-8') as f:
        json.dump(grouped, f, ensure_ascii=False, indent=2)
    print(f"Wrote JSON: {destination}")

list(grouped.keys())[:8]


Reading CSV: /Users/ujwalneethipudi/Downloads/European Political Tech Landscape/map_data_demo.csv
Wrote JSON: /Users/ujwalneethipudi/Downloads/European Political Tech Landscape/src/data/companies.json
Wrote JSON: /Users/ujwalneethipudi/Downloads/European Political Tech Landscape/public/companies.json


['Data Analytics & Modeling',
 'Engagement & Mobilisation',
 'Fundraising',
 'GovTech / Civic Infrastructure',
 'Messaging & Media',
 'Organisational Infrastructure',
 'Research & Insights',
 'Voting Tech']