In [124]:
import requests
import json
from tqdm.notebook import tqdm
import time
import os
import random

URLs

- http://13.215.146.238/api/api.php/select_province_and_zone

This URL will return a list of all the provinces and election zones in the country.

- http://13.215.146.238/api/api.php/select_election_zone_candidates?province=71&zoneNumber=2

This URL will return the details of all the election candidates in the province with the ID `71` and zone number `2`.

- http://13.215.146.238/api/api.php/select_election_candidate?candidateId=e8ce23553399625dcf33b029d29c2084

This URL will return the details of the election candidate with the ID `e8ce23553399625dcf33b029d29c2084`.


- http://13.215.146.238/images/candidate/thumb/71/2/7f2ebf81374ec54fd727f65d882c88d3.jpg

This URL will return a thumbnail image of the election candidate with the ID `e8ce23553399625dcf33b029d29c2084`.

In [None]:
def getter(slug, params={}):
    ep = 'http://13.215.146.238/api/api.php/' + slug
    resp = requests.get(ep, params=params)
    data = resp.json()
    if 'MessageCode' in data.keys() and data['MessageCode'] == '00000':
        return data['Result']
    if 'error_code' in data.keys() and data['error_code'] == 0:
        return data['data_list']
    return []

In [None]:
def get_candidate_info(candidate_id):
    return getter('select_election_candidate',
                  {'candidateId': candidate_id})

In [None]:
get_candidate_info('e8ce23553399625dcf33b029d29c2084')

In [None]:
def get_candidates_in_district(prov_code, elect_dist_number):
    return getter('select_election_zone_candidates',
                  {'province': prov_code, 'zoneNumber': elect_dist_number})

In [None]:
get_candidate_info('7f2ebf81374ec54fd727f65d882c88d3')

In [None]:
resp = requests.get('http://13.215.146.238/images/candidate/thumb/71/2/7f2ebf81374ec54fd727f65d882c88d3.jpg')

In [None]:
province_list = getter('select_province_and_zone')

In [None]:
with open('smartvote-candidates.json') as fp:
    candidate_list = json.load(fp)

In [87]:
def candidate_list_from_api(candidate_list=[]):
    def find_candidate(candidate_id):
        for candidate in candidate_list:
            if candidate['CandidateId'] == candidate_id:
                return candidate
    for province in tqdm(province_list):
        dist_nums = len(province['zone_list'])
        prov_code = province['province_code']
        for dist_num in range(1, dist_nums+1):
            candidates = get_candidates_in_district(prov_code, dist_num)
            for candidate in candidates:
                former_data = find_candidate(candidate['CandidateId'])
                if former_data is not None:
                    for key in candidate:
                        if former_data[key] != candidate[key]:
                            former_data = candidate
                else:
                    candidate_list.append(candidate)
                candidate['ProvinceName'] = province['province_name']
                candidate['ProvinceCode'] = prov_code
                candidate['ZoneNumber'] = dist_num
    return candidate_list

In [88]:
candidate_list = candidate_list_from_api(candidate_list)

100%|███████████████████████████████████████████| 77/77 [00:32<00:00,  2.39it/s]


In [None]:
def save_candidate_file(candidate_list):
    with open('smartvote-candidates.json','w') as fp:
        json.dump(candidate_list, fp, ensure_ascii=False)

In [90]:
save_candidate_file(candidate_list)

In [91]:
i = 0
for candidate in tqdm(candidate_list):
    i += 1
    if 'Age' in candidate.keys(): continue
    candidate_id = candidate['CandidateId']
    new_info = get_candidate_info(candidate_id)
    candidate.update(new_info)
    
    # sleep
    time.sleep(random.randrange(10,300)/1000)
    
    if random.random() < 0.01:
        time.sleep(1)

    if i % 100 == 0 or i == len(candidate_list) - 1:
        save_candidate_file(candidate_list)

100%|█████████████████████████████████████| 4776/4776 [00:02<00:00, 2156.99it/s]


In [None]:
with open('smartvote-candidates.json') as fp:
    candidate_list = json.load(fp)

# Download images

In [141]:
def get_image_candidate(candidate):
    candidate_id = candidate['CandidateId']
    province_code = candidate['ProvinceCode']
    zone_number = candidate['ZoneNumber']
    
    image_url = ('http://13.215.146.238/images/candidate/thumb/'
                 f'{province_code}/{zone_number}/{candidate_id}.jpg')
    
    file_name = candidate['FirstName'] + '-' + candidate['LastName'].strip() + '.jpg'
    party = candidate['PartyName']
    image_dir = os.path.join('images', party)
    image_path = os.path.join(image_dir, file_name)
    os.makedirs(image_dir, exist_ok=True)
    
    resp = requests.get(image_url)
    if resp.status_code == 200:
        fp = open(image_path, 'wb')
        fp.write(resp.content)
        fp.close()
        
        candidate['Image'] = image_path
        return candidate
    else:
        print('status code is', resp.status_code)

In [142]:
i = 0
for candidate in tqdm(candidate_list):
    if 'Image' in candidate.keys():
        i += 1
        continue
    get_image_candidate(candidate)
    
    # sleep
    time.sleep(random.randrange(10,100)/1000)
    
    if random.random() < 0.01:
        time.sleep(1)

    if i % 100 == 0 or i == len(candidate_list) - 1:
        save_candidate_file(candidate_list)

    i += 1

  0%|          | 0/4776 [00:00<?, ?it/s]

## convert images

I wanted to convert image to .webp format and resize it to 156x156

In [143]:
from PIL import Image
from pathlib import Path

In [147]:
def convert_and_resize(candidate):
    if 'Image' not in candidate.keys():
        print(candidate)
    original_image_path = candidate['Image']
    image = Image.open(original_image_path)
    destination_image_name = Path(original_image_path).with_suffix('.webp').name
    image_dir = os.path.join('candidates', candidate['PartyName'],)
    save_dir = os.path.join('../..', 'static', image_dir)
    os.makedirs(save_dir, exist_ok=True)
    h,w = image.height, image.width
    r = 156/w
    save_path = os.path.join(save_dir, destination_image_name)
    if os.path.exists(save_path):
        pass
    else:
        image.resize((int(w*r),int(h*r))).save(save_path, format="webp")
    candidate['ImageSrc'] = os.path.join('/', 'yourcandidates', image_dir, destination_image_name)

In [148]:
i = 0
for candidate in tqdm(candidate_list):
    i += 1
    convert_and_resize(candidate)

save_candidate_file(candidate_list)

  0%|          | 0/4776 [00:00<?, ?it/s]

In [146]:
save_candidate_file(candidate_list)

# Save

In [None]:
import pandas as pd

In [None]:
candidate_df = pd.DataFrame(candidate_list).drop(columns='Id')

In [None]:
candidate_df

In [None]:
candidate_df.to_csv('smartvote-candidates.csv')