In [11]:
# import std libs
import urllib
from pathlib import Path

# import ext libs
import polars as pl
import matplotlib.pyplot as plot

In [12]:
# download data from https://geoshape.ex.nii.ac.jp/nrct/
url = 'https://geoshape.ex.nii.ac.jp/nrct/dataset/nrct-20231025.csv'

localpath = Path("./nrct.csv")
if (not localpath.is_file()):
    urllib.request.urlretrieve(url, localpath)

In [13]:
csv = pl.read_csv('nrct.csv')
csv.describe()

describe,id,都道府県コード,名称,読み,上位地名,出典住所,緯度,経度,推定手法,歴史地名ID,歴史地名,geolod_id
str,f64,f64,str,str,str,str,f64,f64,str,f64,str,str
"""count""",80277.0,80277.0,"""80277""","""80277""","""80277""","""80277""",80277.0,80277.0,"""80277""",80277.0,"""80277""","""80277"""
"""null_count""",0.0,0.0,"""0""","""0""","""0""","""0""",3651.0,3651.0,"""0""",39823.0,"""39823""","""39823"""
"""mean""",226180000000.0,22.61816,,,,,35.61884,136.595692,,40188000.0,,
"""std""",126170000000.0,12.616932,,,,,2.145059,3.313941,,105151.603961,,
"""min""",10000000000.0,1.0,"""【すす】屋村""","""ああらごむら""","""あきる野市""","""あきる野市三内""",24.058586,122.987679,"""jageocoder""",40000073.0,"""●幸地""","""0000C9"""
"""25%""",130000000000.0,13.0,,,,,34.652501,134.539688,,40076217.0,,
"""50%""",220000000000.0,22.0,,,,,35.425953,136.867706,,40234967.0,,
"""75%""",320000000000.0,32.0,,,,,36.557764,139.638672,,40280512.0,,
"""max""",480000000000.0,48.0,"""𤘩宮城村""","""んみーむら""","""龍野市""","""龍野市龍野町立町・龍野町大手…",45.444754,148.461914,"""rekishi_db""",40318444.0,"""＃田""","""zyk2uP"""


In [14]:
csv.head(5)

id,都道府県コード,名称,読み,上位地名,出典住所,緯度,経度,推定手法,歴史地名ID,歴史地名,geolod_id
i64,i64,str,str,str,str,f64,f64,str,i64,str,str
10000036900,1,"""尻沢辺村""","""しりさわべむら""","""函館市""","""函館市住吉町・谷地頭町・青柳…",41.753087,140.715357,"""jageocoder""",,,
10000037300,1,"""大町""","""おおまち""","""函館市""","""函館市大町""",41.769485,140.709671,"""jageocoder""",,,
10000037700,1,"""喜楽町""","""きらくちよう""","""函館市""","""函館市大町""",41.769485,140.709671,"""jageocoder""",,,
10000037800,1,"""七軒町""","""しちけんちよう""","""函館市""","""函館市大町""",41.769485,140.709671,"""jageocoder""",,,
10000037900,1,"""寺町""","""てらまち""","""函館市""","""函館市弥生町・大町""",41.769094,140.707901,"""jageocoder""",,,


In [15]:
csv.with_columns(
    pl.col('名称').str.replace(r'[市町村]$','').str.extract(r'(.)$',1).alias('lastchar')
)

id,都道府県コード,名称,読み,上位地名,出典住所,緯度,経度,推定手法,歴史地名ID,歴史地名,geolod_id,lastchar
i64,i64,str,str,str,str,f64,f64,str,i64,str,str,str
10000036900,1,"""尻沢辺村""","""しりさわべむら""","""函館市""","""函館市住吉町・谷地頭町・青柳…",41.753087,140.715357,"""jageocoder""",,,,"""辺"""
10000037300,1,"""大町""","""おおまち""","""函館市""","""函館市大町""",41.769485,140.709671,"""jageocoder""",,,,"""大"""
10000037700,1,"""喜楽町""","""きらくちよう""","""函館市""","""函館市大町""",41.769485,140.709671,"""jageocoder""",,,,"""楽"""
10000037800,1,"""七軒町""","""しちけんちよう""","""函館市""","""函館市大町""",41.769485,140.709671,"""jageocoder""",,,,"""軒"""
10000037900,1,"""寺町""","""てらまち""","""函館市""","""函館市弥生町・大町""",41.769094,140.707901,"""jageocoder""",,,,"""寺"""


In [25]:
count = csv.with_columns(
    pl.col('名称').str.replace(r'[市町村]$','').str.extract(r'(.)$',1).alias('lastchar')
).group_by('lastchar').agg(pl.count()).sort('count', descending=True)
count.head(5)

lastchar,count
str,u32
"""田""",7960
"""原""",2713
"""野""",2683
"""谷""",1943
"""川""",1858


In [21]:
csv.with_columns(
    pl.col('名称').str.replace(r'[市町村]$','').str.extract(r'(.)$',1).alias('lastchar')
).join(count, on='lastchar', how='left').filter(
    100 < pl.col('count')
).with_columns(
    pl.col('名称').alias('name'),
    pl.col('読み').alias('yomi'),
    pl.col('緯度').alias('lat'),
    pl.col('経度').alias('lon'),
    pl.col('名称').str.replace(r'[市町村]$','').str.extract(r'(.)$',1).alias('lastchar')
).select(
    ['name','yomi','lat','lon','lastchar']
).write_csv('viz/list.csv')