<a href="https://colab.research.google.com/github/zahir2498/colab-tools/blob/main/2002_Voter_search_Champdani_181.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# @title #üó≥Ô∏è 2002 Voter Roll Search Tool For Champdani 181 Use bangla for best result
# @markdown #‚¨ÖÔ∏è Click ‚ñ∂ the button to start the app.
# @markdown This tool uses a copied text version of the official voter list PDF.For any mismatches or verification, please check the original PDF available on the official Election Commission website.

import re, pandas as pd, os, requests, unicodedata
from IPython.display import display, clear_output, HTML, Javascript
import ipywidgets as widgets

# --- Hide code cell ---
display(Javascript('''
var code_cells = document.querySelectorAll('.code-cell');
for (var i = 0; i < code_cells.length; i++) {
  code_cells[i].style.display = 'none';
}
'''))

# --- Install fuzzy library ---
try:
    from rapidfuzz import fuzz
except:
    print("üì¶ Installing rapidfuzz for fuzzy search...")
    os.system("pip install -q rapidfuzz")
    from rapidfuzz import fuzz

# --- Normalize Bangla Unicode ---
def normalize_bangla(text):
    if not text:
        return ""
    text = unicodedata.normalize('NFC', text)
    text = re.sub(r'[\u200c\u200d\u200b]', '', text)
    text = re.sub(r'\s+', ' ', text)
    return text.strip()

# --- Download the voter roll file automatically from GitHub ---
GITHUB_FILE_URL = "https://github.com/zahir2498/jscss/raw/refs/heads/main/V1.txt"
text_file_path = "/content/V1.txt"

def ensure_file_downloaded():
    if not os.path.exists(text_file_path):
        print("‚¨áÔ∏è Downloading voter roll file from GitHub...")
        try:
            response = requests.get(GITHUB_FILE_URL)
            if response.status_code == 200:
                with open(text_file_path, 'wb') as f:
                    f.write(response.content)
                print("‚úÖ File downloaded successfully.")
            else:
                print(f"‚ö†Ô∏è Failed to download file. HTTP {response.status_code}")
        except Exception as e:
            print("‚ùå Error downloading file:", e)

# --- Custom Phonetic Dictionary ---
phonetic_dict = {
    "Md": "‡¶Æ‡¶π‡¶É", "Mohd": "‡¶Æ‡¶π‡¶É", "Najamulhasan": "‡¶®‡¶ú‡¶Æ‡ßÅ‡¶≤‡¶π‡¶æ‡¶∏‡¶æ‡¶®", "Ismail": "‡¶á‡¶∏‡¶Æ‡¶æ‡¶á‡¶≤",
    "Manjur": "‡¶Æ‡¶û‡ßç‡¶ú‡ßÅ‡¶∞", "Ali": "‡¶Ö‡¶≤‡ßÄ", "Abdul": "‡¶Ö‡¶¨‡¶¶‡ßÅ‡¶≤", "Sattar": "‡¶∏‡¶§‡ßç‡¶§‡¶æ‡¶∞",
    "Jaimuddin": "‡¶ú‡ßà‡¶Æ‡ßÅ‡¶¶‡ßç‡¶¶‡¶ø‡¶®", "Rajab": "‡¶∞‡¶ú‡¶¨", "Alim": "‡¶Ü‡¶≤‡¶ø‡¶Æ", "Bibi": "‡¶¨‡¶ø‡¶¨‡¶ø",
    "Murtuza": "‡¶Æ‡ßÅ‡¶∞‡¶§‡ßÅ‡¶ú‡¶æ", "Ahamed": "‡¶Ü‡¶π‡¶Æ‡ßá‡¶¶", "Ahmed": "‡¶Ü‡¶π‡¶Æ‡ßá‡¶¶",
    "Hossain": "‡¶π‡ßã‡¶∏‡ßá‡¶®", "Husen": "‡¶π‡ßÅ‡¶∏‡ßá‡¶®", "Hasan": "‡¶π‡¶æ‡¶∏‡¶æ‡¶®",
    "Rahman": "‡¶∞‡¶π‡¶Æ‡¶æ‡¶®", "Begum": "‡¶¨‡ßá‡¶ó‡¶Æ"
}

# --- Phonetic Transliteration Function ---
def eng_to_bangla_phonetic(text):
    if not text.strip():
        return ""
    words = text.split()
    converted = []
    for w in words:
        key = re.sub(r'[^A-Za-z]', '', w)
        match = phonetic_dict.get(key.capitalize()) or phonetic_dict.get(key.lower().capitalize()) or phonetic_dict.get(key)
        if match:
            converted.append(match)
        else:
            tmp = w.lower()
            tmp = tmp.replace("sh", "‡¶∂").replace("s", "‡¶∏").replace("kh", "‡¶ñ").replace("k", "‡¶ï")
            tmp = tmp.replace("ch", "‡¶ö").replace("c", "‡¶ö").replace("t", "‡¶§").replace("th", "‡¶•")
            tmp = tmp.replace("d", "‡¶¶").replace("n", "‡¶®").replace("r", "‡¶∞").replace("l", "‡¶≤")
            tmp = tmp.replace("b", "‡¶¨").replace("bh", "‡¶≠").replace("g", "‡¶ó").replace("gh", "‡¶ò")
            tmp = tmp.replace("m", "‡¶Æ").replace("y", "‡¶Ø").replace("j", "‡¶ú").replace("h", "‡¶π")
            tmp = tmp.replace("a", "‡¶æ").replace("i", "‡¶ø").replace("u", "‡ßÅ").replace("e", "‡ßá").replace("o", "‡ßã")
            converted.append(tmp)
    return "".join(converted)

# --- Load Text File ---
def load_text():
    ensure_file_downloaded()
    try:
        with open(text_file_path, 'r', encoding='utf-8') as f:
            return [normalize_bangla(line) for line in f.readlines()]
    except FileNotFoundError:
        display(HTML("<b style='color:red;'>‚ö†Ô∏è Could not load voter roll file.</b>"))
        return []

lines = []
search_results = []
partial_results = []

# --- Widgets ---
name_input = widgets.Text(description='‡¶®‡¶æ‡¶Æ / Name:', layout=widgets.Layout(width='400px'))
father_input = widgets.Text(description='‡¶™‡¶ø‡¶§‡¶æ/‡¶Æ‡¶æ‡¶§‡¶æ:', layout=widgets.Layout(width='400px'))
id_input = widgets.Text(description='ID No:', layout=widgets.Layout(width='400px'))
search_button = widgets.Button(description="üîç Search", button_style='primary', layout=widgets.Layout(width='150px'))
download_button = widgets.Button(description="‚¨áÔ∏è Download CSV", button_style='success', layout=widgets.Layout(width='200px'))
output_box = widgets.Output()

# --- Search Function ---
def search_voter(b):
    global search_results, partial_results, lines
    with output_box:
        clear_output()
        print("üß† Using phonetic transliteration + fuzzy search...")

        if not lines:
            lines = load_text()
            if not lines:
                return

        name_q = normalize_bangla(name_input.value.strip())
        father_q = normalize_bangla(father_input.value.strip())
        id_q = normalize_bangla(id_input.value.strip())

        if not any([name_q, father_q, id_q]):
            display(HTML("<b style='color:orange;'>‚ö†Ô∏è Please enter at least one field to search.</b>"))
            return

        bangla_name = normalize_bangla(eng_to_bangla_phonetic(name_q)) if re.search(r'[A-Za-z]', name_q) else name_q
        bangla_father = normalize_bangla(eng_to_bangla_phonetic(father_q)) if re.search(r'[A-Za-z]', father_q) else father_q

        search_results, partial_results = [], []
        current_part = None

        for line in lines:
            line = normalize_bangla(line)
            if re.search(r'Part\s*\d+', line, re.IGNORECASE) or re.search(r'‡¶Ö‡¶Ç‡¶∂ ‡¶®‡¶Ç\s*\d+', line):
                current_part = line
            if not line or line.startswith(('‡¶ï‡ßç‡¶∞‡¶Æ‡¶ø‡¶ï', 'Page', 'START', '‡¶®‡¶ø‡¶∞‡ßç‡¶¨‡¶æ‡¶ö‡¶ï')):
                continue

            serial_match = re.match(r"(\d+)", line)
            serial_no = serial_match.group(1) if serial_match else "?"
            id_found = re.findall(r"\b[\w/]{8,}\b", line)
            id_no = ", ".join(id_found) if id_found else ""

            # Exact or fuzzy name/father match
            name_ratio = fuzz.partial_ratio(bangla_name, line)
            father_ratio = fuzz.partial_ratio(bangla_father, line)

            id_match = id_q in line if id_q else True

            record = {
                "Part No": current_part,
                "Serial No": serial_no,
                "Line": line,
                "ID No": id_no
            }

            if name_ratio >= 90 and father_ratio >= 80 and id_match:
                search_results.append(record)
            elif name_ratio >= 75 or father_ratio >= 75:
                partial_results.append(record)

        clear_output()

        if search_results or partial_results:
            if search_results:
                display(HTML("<h4>üîπ Exact/Fuzzy Matches</h4>"))
                df_exact = pd.DataFrame(search_results)
                display(df_exact)
            if partial_results:
                display(HTML("<h4>üî∏ Possible Matches</h4>"))
                df_partial = pd.DataFrame(partial_results)
                display(df_partial)

            total = len(search_results) + len(partial_results)
            display(HTML(f"<b style='color:green;'>‚úÖ Found {total} records ({len(search_results)} close, {len(partial_results)} possible).</b>"))
            display(download_button)
        else:
            display(HTML("<b style='color:red;'>‚ùå No matches found.</b>"))

# --- CSV Download ---
def download_csv(b):
    if not (search_results or partial_results):
        with output_box:
            display(HTML("<b style='color:orange;'>‚ö†Ô∏è No search results to download.</b>"))
        return
    df_all = pd.concat([pd.DataFrame(search_results), pd.DataFrame(partial_results)], ignore_index=True)
    path = "/content/voter_search_results.csv"
    df_all.to_csv(path, index=False, encoding='utf-8-sig')
    with output_box:
        display(HTML(f"<b>‚úÖ CSV ready:</b> <a href='voter_search_results.csv' target='_blank'>Click here to download</a>"))

# --- Bind Buttons ---
search_button.on_click(search_voter)
download_button.on_click(download_csv)

# --- Display UI ---
display(widgets.VBox([
    widgets.HTML("<h3>üó≥Ô∏è Bangla Voter Roll Search Tool</h3>"),
    name_input,
    father_input,
    id_input,
    search_button,
    output_box
]))


<IPython.core.display.Javascript object>

VBox(children=(HTML(value='<h3>üó≥Ô∏è Bangla Voter Roll Search Tool</h3>'), Text(value='', description='‡¶®‡¶æ‡¶Æ / Name‚Ä¶