In [1]:
import requests
import json
from tqdm.notebook import tqdm
import time
import os
import random

URLs

- http://13.215.146.238/api/api.php/select_province_and_zone

This URL will return a list of all the provinces and election zones in the country.

- http://13.215.146.238/api/api.php/select_election_zone_candidates?province=71&zoneNumber=2

This URL will return the details of all the election candidates in the province with the ID `71` and zone number `2`.

- http://13.215.146.238/api/api.php/select_election_candidate?candidateId=e8ce23553399625dcf33b029d29c2084

This URL will return the details of the election candidate with the ID `e8ce23553399625dcf33b029d29c2084`.


- http://13.215.146.238/images/candidate/thumb/71/2/7f2ebf81374ec54fd727f65d882c88d3.jpg

This URL will return a thumbnail image of the election candidate with the ID `e8ce23553399625dcf33b029d29c2084`.

In [2]:
def getter(slug, params={}):
    """
    Get data from the specified API endpoint.

    Args:
        slug (str): The API endpoint slug.
        params (dict): Optional parameters to pass to the API endpoint.

    Returns:
        dict: The API response data.
    """

    # Construct the API endpoint URL.
    ep = 'http://13.215.146.238/api/api.php/' + slug

    # Make the API request.
    resp = requests.get(ep, params=params)

    # Check the API response status code.
    if resp.status_code == 200:
        # The API request was successful.
        data = resp.json()

        # Check the API response message code.
        if 'MessageCode' in data.keys() and data['MessageCode'] == '00000':
            # The API request was successful and the data was returned.
            return data['Result']
        elif 'error_code' in data.keys() and data['error_code'] == 0:
            # The API request was successful but no data was returned.
            return data['data_list']
        else:
            # The API request was successful but the data was not returned.
            return []
    else:
        # The API request failed.
        raise requests.exceptions.HTTPError(resp)


In [25]:
SMART_VOTE_JSON_FILE = 'smartvote-candidates.json'

def load_candidate_file():
    candidate_list = []
    if os.path.exists(SMART_VOTE_JSON_FILE):
        with open('smartvote-candidates.json') as fp:
            candidate_list = json.load(fp)
    return candidate_list

def save_candidate_file(candidate_list):
    with open(SMART_VOTE_JSON_FILE,'w') as fp:
        json.dump(candidate_list, fp, ensure_ascii=False, indent=1)

## Candidate details

To get the details of a candidate, send a GET request to the following URL:

`http://13.215.146.238/api/api.php/select_election_candidate?candidateId=<candidate_id>`

In [4]:
def get_candidate_info(candidate_id):
    return getter('select_election_candidate',
                  {'candidateId': candidate_id})

In [5]:
get_candidate_info('e8ce23553399625dcf33b029d29c2084')

{'Id': 'e8ce23553399625dcf33b029d29c2084',
 'Title': 'นางสาว',
 'FirstName': 'อรอนงค์',
 'LastName': '  กาญจนชูศักดิ์',
 'PartyName': 'ประชาธิปัตย์',
 'PartyNumber': '1',
 'CandidateNo': '1',
 'Age': '61',
 'Occupation': 'นักการเมือง',
 'HighestEducation': 'ปริญญาโท'}

## List of candidate in the district

To get the candidates in the district, send a GET request to

`http://13.215.146.238/api/api.php/select_election_zone_candidates?province=<province_code>&zoneNumber=<zone_number>`

In [6]:
def get_candidates_in_district(prov_code, elect_dist_number):
    return getter('select_election_zone_candidates',
                  {'province': prov_code, 'zoneNumber': elect_dist_number})

## List all provinces

In [7]:
province_list = getter('select_province_and_zone')

## List all candidates

GET all candidates in all provinces

In [50]:
def clean_data(candidate):
    for k in candidate:
        if isinstance(candidate[k], str):
            candidate[k] = candidate[k].strip()
    if candidate['PartyName'] == '0':
        candidate['PartyName'] = 'มิติใหม่'
    return candidate

In [27]:
def candidate_list_from_api(candidate_list):
    for province in tqdm(province_list):
        prov_code = province['province_code']
        dist_total = len(province['zone_list'])
        
        # `dist_num` for 1 to n
        for dist_num in range(1, dist_total+1):
            # GET candidates from API
            candidates = get_candidates_in_district(prov_code, dist_num)
            
            for candidate in candidates:
                candidate['ProvinceName'] = province['province_name']
                candidate['ProvinceCode'] = prov_code
                candidate['ZoneNumber']   = dist_num
                
                candidate_list.append(clean_data(candidate))
                
    return candidate_list

In [28]:
candidate_list = load_candidate_file()
candidate_list = candidate_list_from_api(candidate_list)

  0%|          | 0/77 [00:00<?, ?it/s]

In [29]:
save_candidate_file(candidate_list)

## Get candidate info

# Get party

In [30]:
party_list = getter('select_party')
party_dict = {p['label'].strip(): p for p in party_list}

`http://13.215.146.238/api/api.php/select_election_partylist_candidates?partyNumber=244`

In [31]:
def get_candidate_party_listed(party_id):
    return getter('select_election_partylist_candidates',
                  {'partyNumber':party_id})

In [33]:
for i, party in enumerate(tqdm(party_list)):
    party_id = party['id_dga']
    candidates = get_candidate_party_listed(party_id)
    
    for candidate in candidates:
        candidate_list.append(clean_data(candidate))

  0%|          | 0/83 [00:00<?, ?it/s]

In [44]:
for i, candidate in enumerate(tqdm(candidate_list)):
    if candidate.get('_data_added', False):
        continue
        
    candidate['PartyId'] = party_dict[candidate['PartyName']]['id']
    
    candidate_id = candidate['CandidateId']    
    new_info = get_candidate_info(candidate_id)
    candidate.update(clean_data(new_info))
    
    # flag
    candidate['_data_added'] = True
    
    # sleep
    time.sleep(random.randrange(10,300)/1000)
    
    if random.random() < 0.01:
        time.sleep(1)

    if i % 100 == 0 or i == len(candidate_list) - 1:
        save_candidate_file(candidate_list)

  0%|          | 0/6669 [00:00<?, ?it/s]

# Download images

In [65]:
def get_image_candidate(candidate, ):
    candidate_id = candidate['CandidateId']
    if 'ProvinceCode' in candidate.keys():
        province_code = candidate['ProvinceCode']
        zone_number = candidate['ZoneNumber']
    else:
        province_code = '00'
        zone_number = candidate['PartyId']
    
    image_url = ('http://13.215.146.238/images/candidate/thumb/'
                 f'{province_code}/{zone_number}/{candidate_id}.jpg')
    
    file_name = candidate['FirstName'] + '-' + candidate['LastName'] + '.jpg'
    party = candidate['PartyName']
    image_dir = os.path.join('images', party)
    image_path = os.path.join(image_dir, file_name)
    os.makedirs(image_dir, exist_ok=True)
    if os.path.exists(image_path):
        candidate['ImagePath'] = image_path
        return
    
    resp = requests.get(image_url)
    
    # Sleep
    time.sleep(random.randrange(10,100)/1000)
    
    if resp.status_code == 200:
        fp = open(image_path, 'wb')
        fp.write(resp.content)
        fp.close()
        
        candidate['ImagePath'] = image_path
    else:
        print('status code is', resp.status_code)
    
    return

In [66]:
for i,candidate in enumerate(tqdm(candidate_list)):
    get_image_candidate(candidate)
    
    if i % 1000 == 0 or i - 1 == len(candidate_list) - 1:
        save_candidate_file(candidate_list)

  0%|          | 0/6669 [00:00<?, ?it/s]

## convert images

I wanted to convert image to .webp format and resize it to 156x156

In [67]:
from PIL import Image
from pathlib import Path

In [70]:
def calculate_image_size(width, height):
    r = 156/width
    return (int(width*r),int(height*r))

def convert_and_resize(candidate):
    original_image_path = candidate['ImagePath']
    
    destination_image_name = Path(original_image_path).with_suffix('.webp').name
    
    image_dir = os.path.join('candidates', candidate['PartyName'],)
    
    save_dir = os.path.join('../..', 'static', image_dir)
    
    os.makedirs(save_dir, exist_ok=True)
    
    save_path = os.path.join(save_dir, destination_image_name)
    
    if not os.path.exists(save_path):
        image = Image.open(original_image_path)
        image.resize(calculate_image_size(image.width,image.height)).save(save_path, format="webp")

    candidate['Image'] = os.path.join('/', 'yourcandidates', image_dir, destination_image_name)
    
    return candidate

In [72]:
for candidate in tqdm(candidate_list):
    convert_and_resize(candidate)

save_candidate_file(candidate_list)

  0%|          | 0/6669 [00:00<?, ?it/s]

# Save

In [None]:
import pandas as pd

In [None]:
candidate_df = pd.DataFrame(candidate_list).drop(columns='Id')

In [None]:
candidate_df

In [None]:
candidate_df.to_csv('smartvote-candidates.csv')