In [None]:
import statsmodels.api as sm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rc
import seaborn as sns
import datetime
import copy

sns.set_style("white")
plt.rcParams['font.family'] = 'Arial'
plt.rcParams['font.size'] = 13

In [None]:
# prepare data
df_all = pd.read_csv('249_all.csv', usecols=['2', '4', '9', 'created_at', 'Attitude', 'Behavior'])

dict_1 = {} # pos
dict_0 = {} # neu
dict_2 = {} # neg
dates = None

for label in ['2', '4', '9']:
    df = copy.copy(df_all)
    df = df[df[label] == 1]
    df['sum'] = df["2"] + df['4'] + df['9']
    df = df[df['sum'] == 1].loc[:, ['created_at', 'Attitude', 'Behavior']]
    print(df.shape)

    # to month
    df['created_at'] = pd.to_datetime(df['created_at'])
    df['date_month'] = df['created_at'].dt.to_period('m')
    df.set_index('created_at', inplace=True)

    # counts
    grouped_attitude = df.groupby(['date_month'])['Attitude'].value_counts()


    # get date
    dates = grouped_attitude.index.get_level_values('date_month').unique()

    percentage_0 = []
    percentage_1 = []
    percentage_2 = []


    for date in dates:
        total_count = grouped_attitude.loc[date].sum()

        if (date, 0.0) in grouped_attitude.index:
            count_0 = grouped_attitude.loc[(date, 0.0)]
        else:
            count_0 = 0

        if (date, 1.0) in grouped_attitude.index:
            count_1 = grouped_attitude.loc[(date, 1.0)]
        else:
            count_1 = 0

        if (date, 2.0) in grouped_attitude.index:
            count_2 = grouped_attitude.loc[(date, 2.0)]
        else:
            count_2 = 0

        percent_0 = count_0 / total_count
        percent_1 = count_1 / total_count
        percent_2 = count_2 / total_count

        percentage_0.append(percent_0)
        percentage_1.append(percent_1)
        percentage_2.append(percent_2)
        
    dict_0[label] = percentage_0
    dict_1[label] = percentage_1
    dict_2[label] = percentage_2


In [None]:
colors = {'2': '#81DFCD', '4': '#CA956F', '9': '#D35A7F',}
ylimits = {'pos': 0.90, 'neg': 0.20, 'neu': 0.70}
dict_dict = dict(zip(['neu', 'pos', 'neg',], [dict_0, dict_1, dict_2]))

for label, dict_n in dict_dict.items():
    x = range(len(dates))

    fig, ax = plt.subplots(figsize=(16, 6))
    plt.subplots_adjust(left=0.05,right=0.97,top=0.95)
    
    ax.set_ylim(0, ylimits[label])
    ax.set_yticklabels(['{:.0f}%'.format(x*100) for x in ax.get_yticks()])

    x = np.arange(len(dict_n['2']))

    # Loess smothing
    lowess_2 = sm.nonparametric.lowess(dict_n['2'], x, frac=0.1)  # frac is the controler
    smoothed_x, smoothed_2 = zip(*lowess_2)

    # Loess smothing
    lowess_4 = sm.nonparametric.lowess(dict_n['4'], x, frac=0.1)  # frac is the controler   
    smoothed_x, smoothed_4 = zip(*lowess_4)
   
    # Loess smothing
    lowess_9 = sm.nonparametric.lowess(dict_n['9'], x, frac=0.1)  # frac is the controler   
    smoothed_x, smoothed_9 = zip(*lowess_9)

    ax.plot(smoothed_x, smoothed_2, label='2-valent HPV vaccine',color=colors['2'], linestyle='-')
    ax.plot(smoothed_x, smoothed_4, label='4-valent HPV vaccine',color=colors['4'], linestyle='--')
    ax.plot(smoothed_x, smoothed_9, label='9-valent HPV vaccine',color=colors['9'], linestyle='-.')
    # axis set
    ax.set_ylim(0, ylimits[label])
    ax.set_xlim(-0.5, 65.5)
    ax.set_xlabel('Time')
    ax.set_ylabel('Prevalence')

    interval = 6
    dates1 = [period.strftime('%Y-%b') for period in dates]
    dates2 = [date.split('-') for date in dates1]
    dates1 = [date[1]+'. ' + date[0] for date in dates2]

    xticks = [i * interval for i in range(len(x) // interval)] + [len(x) - 1]
    xlabels = [dates1[i * interval] for i in range(len(x) // interval)] + [dates1[-1]]

    # Add downward-pointing tick marks
    ax.tick_params(bottom=True, left=True, length=6)
    ax.legend()
    plt.xticks(xticks, xlabels, rotation=0, fontsize=12)
    # save to png
    plt.savefig(f'smothed_{label}.png', dpi=600)
plt.show()