In [154]:
import json

class TrieNode:
    def __init__(self):
        self.children = {}
        self.is_end_of_word = False

class Trie:
    def __init__(self):
        self.root = TrieNode()

    def insert(self, word):
        node = self.root
        for char in word:
            if char not in node.children:
                node.children[char] = TrieNode()
            node = node.children[char]
        node.is_end_of_word = True

    def search(self, word):
        node = self.root
        prefix = ''
        for char in word:
            if char not in node.children:
                break
            prefix += char
            node = node.children[char]
            if node.is_end_of_word:
                return prefix
        return ''

class Solution:
    def __init__(self):
        self.trie = Trie()
        self.build_trie()

    def build_trie(self):
        with open('dvhcvn.json') as f:
            data_viet_nam = json.load(f)
            for city in data_viet_nam['data']:
                for district in city['level2s']:
                    for ward in district['level3s']:
                        address = f"{ward['name'].lower()}, {district['name'].lower()}, {city['name'].lower()}"
                        self.trie.insert(address)

    def longest_prefix_match(self, s):
        s = s.lower()
        return self.trie.search(s)

    def dp_remaining(self, s, prefix):
        remaining = s[len(prefix):].strip(', ')
        parts = remaining.split(', ')
        province = parts[-1]
        district = parts[-2] if len(parts) >= 2 else ''
        ward = parts[-3] if len(parts) >= 3 else ''
        return province, district, ward

    def process(self, s):
        prefix = self.longest_prefix_match(s)
        province, district, ward = self.dp_remaining(s, prefix)

        result = {
            "province": province,
            "district": district,
            "ward": ward,
        }

        return result

In [157]:
test = Solution()
print(test.process("Tuyên Quang, Yên Sơn, Tân Bình"))
print(test.process("phường 1, quận 1, tp hồ chí minh"))
print(test.process("phường 1, quận 1, hồ chí minh"))
print(test.process("phường 1, quận 1, hcm"))

{'province': 'Tân Bình', 'district': 'Yên Sơn', 'ward': 'Tuyên Quang'}
{'province': 'tp hồ chí minh', 'district': 'quận 1', 'ward': 'phường 1'}
{'province': 'hồ chí minh', 'district': 'quận 1', 'ward': 'phường 1'}
{'province': 'hcm', 'district': 'quận 1', 'ward': 'phường 1'}


In [158]:
# NOTE: DO NOT change this cell
# This cell is for scoring

TEAM_NAME = 'Ths_AA_Group20'  # This should be your team name
EXCEL_FILE = f'{TEAM_NAME}.xlsx'

import json
import time
with open('test.json') as f:
    data = json.load(f)

summary_only = True
df = []
solution = Solution()
timer = []
correct = 0
for test_idx, data_point in enumerate(data):
    address = data_point["text"]

    ok = 0
    try:
        start = time.perf_counter_ns()
        result = solution.process(address)
        answer = data_point["result"]
        finish = time.perf_counter_ns()
        timer.append(finish - start)
        ok += int(answer["province"] == result["province"])
        ok += int(answer["district"] == result["district"])
        ok += int(answer["ward"] == result["ward"])
        df.append([
            test_idx,
            address,
            answer["province"],
            result["province"],
            int(answer["province"] == result["province"]),
            answer["district"],
            result["district"],
            int(answer["district"] == result["district"]),
            answer["ward"],
            result["ward"],
            int(answer["ward"] == result["ward"]),
            ok,
            timer[-1] / 1_000_000_000,
        ])
    except Exception as e:
        df.append([
            test_idx,
            address,
            answer["province"],
            "EXCEPTION",
            0,
            answer["district"],
            "EXCEPTION",
            0,
            answer["ward"],
            "EXCEPTION",
            0,
            0,
            0,
        ])
        # any failure count as a zero correct
        pass
    correct += ok


    if not summary_only:
        # responsive stuff
        print(f"Test {test_idx:5d}/{len(data):5d}")
        print(f"Correct: {ok}/3")
        print(f"Time Executed: {timer[-1] / 1_000_000_000:.4f}")


print(f"-"*30)
total = len(data) * 3
score_scale_10 = round(correct / total * 10, 2)
if len(timer) == 0:
    timer = [0]
max_time_sec = round(max(timer) / 1_000_000_000, 4)
avg_time_sec = round((sum(timer) / len(timer)) / 1_000_000_000, 4)

import pandas as pd

df2 = pd.DataFrame(
    [[correct, total, score_scale_10, max_time_sec, avg_time_sec]],
    columns=['correct', 'total', 'score / 10', 'max_time_sec', 'avg_time_sec',],
)

columns = [
    'ID',
    'text',
    'province',
    'province_student',
    'province_correct',
    'district',
    'district_student',
    'district_correct',
    'ward',
    'ward_student',
    'ward_correct',
    'total_correct',
    'time_sec',
]

df = pd.DataFrame(df)
df.columns = columns

writer = pd.ExcelWriter(EXCEL_FILE, engine='xlsxwriter')
df2.to_excel(writer, index=False, sheet_name='summary')
df.to_excel(writer, index=False, sheet_name='details')
writer.close()


------------------------------
