In [1]:
import sys
from csv import DictReader, DictWriter
from pathlib import Path
from pprint import pprint

from nominally import parse_name

In [2]:
DATA_FOLDER = Path("../data/")
DATA_FOLDER.mkdir(exist_ok=True)
CSV_NEWLINE = "" if sys.platform.startswith("win") else None

In [3]:
CSV_IN = DATA_FOLDER / "names.csv"
CSV_OUT = DATA_FOLDER / f"names_parsed.csv"
NAME_FIELD = "full"

DATA = """full
Mr. Samuel 'Sam' Vimes
Samuel "Young Sam" Vimes II.
Sybil Deirdre Olgivanna Ramkin-Vimes
Claude Maximillian Overton Transpire (CMOT) Dibbler
Jane Mary Betty Ann Pamela von Jones
John "Not-A-Vampire-At-All" Smith
"Dr Lawn, John (Mossy)"
"Vetinari, Havelock"
William de Worde
"von Lipwig, Moist"
"""

CSV_IN.write_text(DATA)
print(f"Wrote {len(DATA.splitlines()) - 1} names to {CSV_IN.name}.")

Wrote 10 names to names.csv.


In [4]:
with CSV_IN.open("r") as infile:
    reader = DictReader(infile)
    raw_names = [row[NAME_FIELD] for row in reader]
print(f"Read {len(raw_names)} raw names from {CSV_IN.name}")

Read 10 raw names from names.csv


In [5]:
raw_names

["Mr. Samuel 'Sam' Vimes",
 'Samuel "Young Sam" Vimes II.',
 'Sybil Deirdre Olgivanna Ramkin-Vimes',
 'Claude Maximillian Overton Transpire (CMOT) Dibbler',
 'Jane Mary Betty Ann Pamela von Jones',
 'John "Not-A-Vampire-At-All" Smith',
 'Dr Lawn, John (Mossy)',
 'Vetinari, Havelock',
 'William de Worde',
 'von Lipwig, Moist']

In [6]:
parsed_basic = [{NAME_FIELD: raw, **parse_name(raw)} for raw in raw_names]
parsed_basic

[{'full': "Mr. Samuel 'Sam' Vimes",
  'title': 'mr',
  'first': 'samuel',
  'middle': '',
  'last': 'vimes',
  'suffix': '',
  'nickname': 'sam'},
 {'full': 'Samuel "Young Sam" Vimes II.',
  'title': '',
  'first': 'samuel',
  'middle': '',
  'last': 'vimes',
  'suffix': 'ii',
  'nickname': 'young sam'},
 {'full': 'Sybil Deirdre Olgivanna Ramkin-Vimes',
  'title': '',
  'first': 'sybil',
  'middle': 'deirdre olgivanna',
  'last': 'ramkin-vimes',
  'suffix': '',
  'nickname': ''},
 {'full': 'Claude Maximillian Overton Transpire (CMOT) Dibbler',
  'title': '',
  'first': 'claude',
  'middle': 'maximillian overton transpire',
  'last': 'dibbler',
  'suffix': '',
  'nickname': 'cmot'},
 {'full': 'Jane Mary Betty Ann Pamela von Jones',
  'title': '',
  'first': 'jane',
  'middle': 'mary betty ann pamela',
  'last': 'von jones',
  'suffix': '',
  'nickname': ''},
 {'full': 'John "Not-A-Vampire-At-All" Smith',
  'title': '',
  'first': 'john',
  'middle': '',
  'last': 'smith',
  'suffix': ''

In [7]:
with CSV_OUT.open("w", newline=CSV_NEWLINE) as outfile:
    writer = DictWriter(outfile, list(parsed_basic[0].keys()))
    writer.writeheader()
    writer.writerows(parsed_basic)
print(f"Output {len(parsed_basic)} names to {CSV_OUT.name}")

Output 10 names to names_parsed.csv


In [8]:
print(CSV_OUT.read_text())

full,title,first,middle,last,suffix,nickname
Mr. Samuel 'Sam' Vimes,mr,samuel,,vimes,,sam
"Samuel ""Young Sam"" Vimes II.",,samuel,,vimes,ii,young sam
Sybil Deirdre Olgivanna Ramkin-Vimes,,sybil,deirdre olgivanna,ramkin-vimes,,
Claude Maximillian Overton Transpire (CMOT) Dibbler,,claude,maximillian overton transpire,dibbler,,cmot
Jane Mary Betty Ann Pamela von Jones,,jane,mary betty ann pamela,von jones,,
"John ""Not-A-Vampire-At-All"" Smith",,john,,smith,,not-a-vampire-at-all
"Dr Lawn, John (Mossy)",dr,john,,lawn,,mossy
"Vetinari, Havelock",,havelock,,vetinari,,
William de Worde,,william,,de worde,,
"von Lipwig, Moist",,moist,,von lipwig,,

