In [None]:
import json
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import matplotlib
import numpy as np

%matplotlib inline

font = {'family' : 'normal',
        'size'   : 18}

matplotlib.rc('font', **font)


In [None]:
with open("data/config.json") as f:
    config = json.load(f)
candidates = {}
name_to_district = {}
for district in config["ballots_config"]:
    for id_, name in district["options"].items():
        candidates[int(id_)] = name
        name_to_district[name] = district["district_id"]

FORMAT = "%Y-%m-%dT%H:%M:%S"
LEN = len("2021-09-17T08:18:18")

def read(fname):
    times = {
        name: [] for name in candidates.values()
    }
    with open(fname) as f:
        for l in f:
            d = json.loads(l)
            if d["vote"] in candidates:
                date = datetime.strptime(d["datetime"][:LEN], FORMAT)
                times[candidates[d["vote"]]].append(date.timestamp() - 3 * 60 * 60)
                
    votes_by_district = {
        id_: {} for id_ in name_to_district.values()
    }
    for name, votes in times.items():
        votes_by_district[name_to_district[name]][name] = len(votes)
    return times, votes_by_district

times_all, votes_all = read("data/vote_times.jsonl")
# times_decrypted, votes_decrypted = read("data/vote_times_decrypted.jsonl")

In [None]:
def make_ticks():
    ticks = []
    labels = []
    for hours in range(8, 24 * 2 + 22, 2):
        d = datetime(2021, 9, 17) + timedelta(hours=hours)
        ticks.append((d - timedelta(hours=3)).timestamp())
        if d.hour == 0:
            labels.append("{} сен".format(d.day))
        else:
            labels.append("{:02}".format(d.hour))
    return ticks, labels

def draw_comparison(times, name1, name2, district, colors=None):
    fig, (ax1, ax2) = plt.subplots(nrows=2, figsize=(20,15))

    ns, bins, patches = ax1.hist(
        [times[name1], times[name2]],
        bins=70,
        stacked=False,
        color=colors,
        label=[name1, name2],
    )
    ax1.legend()
    ticks, labels = make_ticks()
    ax1.set_xticks(ticks)
    ax1.set_xticklabels(labels)
    ax1.tick_params(axis='x', rotation=45)
    ax1.set_ylabel('Количество голосов в интервале')
    
    ax2.bar(bins[:-1], ns[1] / ns[0], width=2*patches[0].get_children()[0].get_width())
    ax2.set_ylabel('Отношение {} / {}'.format(name2.split()[0], name1.split()[0]))
    
    ax2.set_xticks(ticks)
    ax2.set_xticklabels(labels)
    ax2.tick_params(axis='x', rotation=45)
    
    plt.savefig("images/Comparison_{}_{}_{}.png".format(district, name1.split()[0], name2.split()[0]))

In [None]:
def find(substr):
    matching = [name for name in candidates.values() if substr.upper() == name.split()[0].upper()]
    assert len(matching) == 1, matching
    return matching[0]

In [None]:
def analyze_top_3_in_district(district, votes, times):
    print("Processing", district)
    s = sorted(votes.items(), key=lambda t: -t[1])
    draw_comparison(times, s[1][0], s[0][0], district, colors=["red", "blue"])
    draw_comparison(times, s[2][0], s[1][0], district, colors=["green", "red"])
#     draw_comparison(times, s[5][0], s[4][0], district, colors=["green", "red"])

In [None]:
for district, votes in votes_decrypted.items():
    analyze_top_3_in_district(district, votes, times_decrypted)

In [None]:
analyze_top_3_in_district(205, votes_all[205], times_all)

In [None]:
import scipy.ndimage

def smoothen(data, sigma):
    return scipy.ndimage.gaussian_filter(data, sigma)

def draw_comparison_smooth(times, name1, name2, district, colors):
    h1, _ = np.histogram(times[name1], bins=100, density=True)
    h2, _ = np.histogram(times[name2], bins=100, density=True)
    
    h1_smooth = smoothen(h1, 2)
    h2_smooth = smoothen(h2, 2)
    plt.plot(h2_smooth, color=colors[1])
    plt.plot(h1_smooth, color=colors[0])

In [None]:
def make_baseline(district, votes, times, bin_count=100, bins=None):
    s = sorted(votes.items(), key=lambda t: -t[1])
    if bins is None:
        bins = np.linspace(np.min(times[s[0][0]]), np.max(times[s[0][0]]), num=bin_count+1)
    baseline_h = np.zeros(bin_count)
    for name, _ in s[2:]:
        h, _ = np.histogram(times[name], bins=bins)
        baseline_h += h
    baseline_h /= np.sum(baseline_h)
    return baseline_h, bins

def analyze_top_3_in_district_smooth(district, votes, times):
    print("Processing", district)    
    s = sorted(votes.items(), key=lambda t: -t[1])

    
    plt.figure(figsize=(20,10))
    
    baseline_h, bins = make_baseline(district, votes, times)
    plt.plot(bins[:-1], smoothen(baseline_h, 1.5), color="black", lw=4, ls=':', label="Сумма по всем кандидатам, кроме 1-го и 2-го")
    
    ticks, labels = make_ticks()
    ax = plt.gca()
    ax.set_xticks(ticks)
    ax.set_xticklabels(labels)
    ax.tick_params(axis='x', rotation=45)

    colors = ["blue", "red", "green", "purple"]
    n = 4
    for color, (name, _) in zip(colors[:n], s[:n]):
        h1, _ = np.histogram(times[name], bins=bins)
        h1 = h1.astype(float)
        print(name, np.sum(h1))
        h1 /= np.sum(h1)
        h1_smooth = smoothen(h1, 1.5)
        plt.plot(bins[:-1], h1_smooth, color=color, label=name)
    plt.legend()
    plt.ylabel("Доля от всех голосов за кандидата")

In [None]:
analyze_top_3_in_district_smooth(207, votes_all[207], times_all)

In [None]:
districts = range(196, 211)
plt.figure(figsize=(20,10))
bins = None

for district in districts:
    h, bins = make_baseline(district, votes_all[district], times_all, bin_count=100, bins=bins)
#     h /= np.sum(baseline)
    h_smooth = smoothen(h, 1.5)
#     #     h2_smooth = smoothen(h2, 2)
    plt.plot(bins[:-1], h_smooth)

In [None]:
districts = range(196, 211)
plt.figure(figsize=(20,10))

bins = 100

top_sum = np.zeros(100)
second_sum = np.zeros(100)
third_sum = np.zeros(100)
fourth_sum = np.zeros(100)
baseline_sum = np.zeros(100)

for district in districts:
    s = sorted(votes_all[district].items(), key=lambda t: -t[1])
    name = s[0][0]
    h, bins = np.histogram(times_all[name], bins=100)
    top_sum += h
    
    second_sum += np.histogram(times_all[s[1][0]], bins=100)[0]
    third_sum += np.histogram(times_all[s[2][0]], bins=100)[0]
    fourth_sum += np.histogram(times_all[s[3][0]], bins=100)[0]
    
    h = h.astype(float) / np.sum(h)
    h_smooth = smoothen(h, 1.5)
    plt.plot(bins[:-1], h_smooth)
    
    baseline, bins = make_baseline(district, votes_all[district], times_all, bin_count=100, bins=bins)
    baseline_sum += baseline
    
    baseline /= np.sum(baseline)
    
    baseline_smooth = smoothen(baseline, 1.5)
    plt.plot(bins[:-1], baseline_smooth, ls=":")
#     #     h2_smooth = smoothen(h2, 2)
    
#     h1, _ = np.histogram(times[name], bins=bins)

In [None]:
top_avg = top_sum / np.sum(top_sum)
second_avg = second_sum / np.sum(second_sum)
third_avg = third_sum / np.sum(third_sum)
fourth_avg = fourth_sum / np.sum(fourth_sum)
baseline_avg = baseline_sum / np.sum(baseline_sum)
plt.figure(figsize=(20,10))
plt.plot(bins[:-1], smoothen(baseline_avg, 1.5), ls=":", lw=4, label="normal")
plt.plot(bins[:-1], smoothen(top_avg, 1.5), label="top")

plt.plot(bins[:-1], smoothen(second_avg, 1.5), label="second")
plt.plot(bins[:-1], smoothen(third_avg, 1.5), label="third")
plt.plot(bins[:-1], smoothen(fourth_avg, 1.5), label="fourth")
plt.legend()


In [None]:
def draw_comparison_ratio(times, names, district, colors=None, inds=[[0,1], [2,3]]):
#     fig, (ax1, ax2) = plt.subplots(nrows=2, figsize=(20,15))
    bins = 100
    hs = []
    low = np.zeros(bins)
    for name in names:
        h, bins = np.histogram(times[name], bins=bins)
        low = np.logical_or(low, h < 10)
        h = smoothen(h, 4)
        hs.append(h)
    
    r12 = hs[inds[0][0]] / hs[inds[0][1]]
    r34 = hs[inds[1][0]] / hs[inds[1][1]]
    plt.plot(bins[:-1], r12 - np.average(r12), color="red", label="Отношение 1-го ко 2-dму, сдвинутое на среднее")
    plt.plot(bins[:-1], r34 - np.average(r34), color="green", label="Отношение 3-го к 4-му, сдвинутое на среднее")

In [None]:
plt.figure(figsize=(20,10))
for district in range(196, 211):
    names = [t[0] for t in sorted(votes_all[district].items(), key=lambda t: -t[1])]
    draw_comparison_ratio(times_all, names, district)
ticks, labels = make_ticks()
ax = plt.gca()
ax.set_xticks(ticks)
ax.set_xticklabels(labels)
ax.tick_params(axis='x', rotation=45)
plt.legend(labels=["Отношение 1-го ко 2-му, сдвинутое на среднее",  "Отношение 3-го к 4-му, сдвинутое на среднее"])
_ = plt.xlabel("Дата и время")
plt.savefig("images/ratios.png")

In [None]:
plt.figure(figsize=(20,15))
for district in range(196, 211):
    names = [t[0] for t in sorted(votes_all[district].items(), key=lambda t: -t[1])]
    draw_comparison_ratio(times_all, names, district, inds=[[1,2],[2,3]])
ticks, labels = make_ticks()
ax = plt.gca()
ax.set_xticks(ticks)
ax.set_xticklabels(labels)
ax.tick_params(axis='x', rotation=45)
plt.legend(labels=["Отношение 2-го ко 3-му, сдвинутое на среднее",  "Отношение 3-го к 4-му, сдвинутое на среднее"])
_ = plt.xlabel("Дата и время")

In [None]:
draw_comparison(times_all, find("Лобанов"), find("Попов"), 197, colors=["red", "blue"])
draw_comparison(times_all, find("Тарнавский"), find("Бальмонт"), 197, colors=["green", "purple"])

In [None]:
votes_all[197]

In [None]:
def draw_comparison_ratio_total(times, votes, inds=[[0,1], [2,3]], colors=["red", "green"]):
#     fig, (ax1, ax2) = plt.subplots(nrows=2, figsize=(20,15))
    bins = 100
    hs = [np.zeros(bins) for i in range(20)]
    low = np.zeros(bins)
    for district, votes_ in votes.items():
        s = sorted(votes_.items(), key=lambda t: -t[1])
        for i, (name, _) in enumerate(s):
            h, bins = np.histogram(times[name], bins=bins)
            hs[i] += h
    for i in range(len(hs)):
        hs[i] = smoothen(hs[i], 4)

    for (i1, i2), color in zip(inds, colors):
        ratio = hs[i1] / hs[i2]
        plt.plot(bins[:-1], ratio - np.average(ratio), color=color, lw=3)

plt.figure(figsize=(20,10))
# for district in range(196, 211):
#     names = [t[0] for t in sorted(votes_all[district].items(), key=lambda t: -t[1])]
draw_comparison_ratio_total(times_all, votes_all, inds=[[0,1],[1,2],[2,3],[3,4]], colors=["red", "orange", "green", "purple"])
ticks, labels = make_ticks()
ax = plt.gca()
ax.set_xticks(ticks)
ax.set_xticklabels(labels)
ax.tick_params(axis='x', rotation=45)
plt.legend(labels=["Отношение 1-го ко 2-му, сдвинутое на среднее", "Отношение 2-го к 3-му, сдвинутое на среднее",  "Отношение 3-го к 4-му, сдвинутое на среднее"])
_ = plt.xlabel("Дата и время")
plt.savefig("images/total_ratio_1vs2_2vs3_3vs4.png")

In [None]:
plt.figure(figsize=(20,10))
# for district in range(196, 211):
#     names = [t[0] for t in sorted(votes_all[district].items(), key=lambda t: -t[1])]
draw_comparison_ratio_total(times_all, votes_all)
ticks, labels = make_ticks()
ax = plt.gca()
ax.set_xticks(ticks)
ax.set_xticklabels(labels)
ax.tick_params(axis='x', rotation=45)
plt.legend(labels=["Отношение 1-го ко 2-му, сдвинутое на среднее", "Отношение 3-го к 4-му, сдвинутое на среднее"])
_ = plt.xlabel("Дата и время")
plt.savefig("images/total_ratio_1vs2_3vs4.png")