In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rc
import seaborn as sns
import datetime
import copy

sns.set_style("white")
plt.rcParams['font.family'] = 'Arial'
plt.rcParams['font.size'] = 13

## fig 3a

In [None]:
def get_percentage_lists(df_input, freq='M'):

    df = df_input.copy()
    
    # transformation
    df['period'] = pd.to_datetime(df['created_at']).dt.to_period(freq)

    # Direct percentage calculation in one pass
    result = (
        df.groupby('period')['Attitude']
        .value_counts(normalize=True)
        .unstack(fill_value=0)
        .reindex(columns=[0.0, 1.0, 2.0], fill_value=0)
        .sort_index()
    )
    
    return result[0.0].tolist(), result[1.0].tolist(), result[2.0].tolist(), df['period'].unique()

# load data
df_all = pd.read_csv('data.csv', usecols=['created_at', 'Attitude'])

# data extraction
percentage_0, percentage_1, percentage_2, dates = get_percentage_lists(df_all, freq='M')
plot_data = pd.to_datetime(df_all.copy()['created_at']).dt.to_period('M').value_counts().sort_index().rename('count')


# a: left axis
width = 1
x = range(len(dates))

fig, ax = plt.subplots(figsize=(16, 6))
plt.subplots_adjust(left=0.05,right=0.85)

ax.set_yticklabels(['{:.0f}%'.format(x*100) for x in ax.get_yticks()])

# offset for stack bar
offset = [p2 + p1 for p2, p1 in zip(percentage_0, percentage_1)]

ax.bar(x, percentage_1, width, label='1',color='#80BEAF')
ax.bar(x, percentage_0, width, bottom=percentage_1, label='2',color='#D1DCE2')
ax.bar(x, percentage_2, width, bottom=offset, label='0',color='#EE9C6C')

# axis set
ax.set_ylim(0, 1)
ax.set_xlim(-0.5, 65.5)
ax.set_xlabel('Time')
ax.set_ylabel('Prevalence of each attitude')

# a: right axis

ax2 = ax.twinx()  
line2, = ax2.plot(x, plot_data.values/1000, color='#4A4B9D')  # line plot
ax2.set_yticklabels([f'{int(x)}K' for x in ax2.get_yticks()])
ax2.set_ylabel('No. of Weibo posts related to HPV vaccination')

# leged
lines, _ = ax.get_legend_handles_labels()
labels = ['Positive', 'Neutral', 'Negtive']
lines2, labels2 = ax2.get_legend_handles_labels()
ax2.legend(lines + [line2], labels + ['No. of\nWeibo\nposts'],  bbox_to_anchor=(1.18, 1), ncol=1)

interval = 6
dates1 = [period.strftime('%Y-%b') for period in dates]
dates2 = [date.split('-') for date in dates1]
dates1 = [date[1]+'. ' + date[0] for date in dates2]

xticks = [i * interval for i in range(len(x) // interval)] + [len(x) - 1]
xlabels = [dates1[i * interval] for i in range(len(x) // interval)] + [dates1[-1]]

# Add downward-pointing tick marks
ax.tick_params(axis='x', which='both', bottom=True, length=6)

plt.xticks(xticks, xlabels, rotation=0, fontsize=12)
plt.savefig('xxx.png',dpi=600)
plt.show()



## fig 3b

In [None]:
# loda data
df_all = pd.read_csv('../data/coded_data/desensitized_mainland_labeled_data.csv', usecols=['created_at', 'Perceived Disease Risk (+)', 'Perceived benefits (+)', 'Perceived barriers to accepting vaccines (-)'])
df = copy.copy(df_all)

df['created_at'] = pd.to_datetime(df['created_at'])
df['date_month'] = df['created_at'].dt.to_period('M')
df.set_index('created_at', inplace=True)

# data extraction 1
grouped = df.groupby(['date_month'])['Perceived Disease Risk (+)'].value_counts()
dates = grouped.index.get_level_values('date_month').unique()
percentage = []
for date in dates:
    total_count = grouped.loc[date].sum()
    if (date, 1.0) in grouped.index:
        count_1 = grouped.loc[(date, 1.0)]
        percent_1 = count_1 / total_count
        percentage.append(percent_1)
    else:
        percentage.append(0)
plot_data = pd.Series(data=percentage,index=dates)
x = np.arange(len(plot_data))
lowess = sm.nonparametric.lowess(plot_data, x, frac=0.1)  
smoothed_x, smoothed_y = zip(*lowess)

# data extraction 2
grouped = df.groupby(['date_month'])['Perceived benefits (+)'].value_counts()
dates = grouped.index.get_level_values('date_month').unique()
percentage = []
for date in dates:
    total_count = grouped.loc[date].sum()
    if (date, 1.0) in grouped.index:
        count_1 = grouped.loc[(date, 1.0)]
        percent_1 = count_1 / total_count
        percentage.append(percent_1)
    else:
        percentage.append(0)
plot_data2 = pd.Series(data=percentage,index=dates)
x = np.arange(len(plot_data2))
lowess = sm.nonparametric.lowess(plot_data2, x, frac=0.13)  
smoothed_x2, smoothed_y2 = zip(*lowess)

# data extraction 3
grouped = df.groupby(['date_month'])['Perceived barriers to accepting vaccines (-)'].value_counts()
dates = grouped.index.get_level_values('date_month').unique()
percentage = []
for date in dates:
    total_count = grouped.loc[date].sum()
    if (date, 1.0) in grouped.index:
        count_1 = grouped.loc[(date, 1.0)]
        percent_1 = count_1 / total_count
        percentage.append(percent_1)
    else:
        percentage.append(0)
plot_data3 = pd.Series(data=percentage,index=dates)
x = np.arange(len(plot_data3))
lowess = sm.nonparametric.lowess(plot_data3, x, frac=0.2)  
smoothed_x3, smoothed_y3 = zip(*lowess)

In [None]:
# plot

fig, ax = plt.subplots(figsize=(15, 6))
fig.subplots_adjust(left=0.05,right=0.97,top=0.97)
plt.plot(smoothed_x, smoothed_y,label='Perceived disease risk',color='#186F65')
plt.plot(smoothed_x2, smoothed_y2,label='Perceived benefits of HPV vaccines', color='#B5CB99')
plt.plot(smoothed_x3, smoothed_y3,label='Perceived barriers to accepting HPV vaccines', color='#B2533E')
ax.set_ylim(0,0.7)
ax.set_xlim(-0.5, 65.5)
ax.legend()

ax.set_ylabel('Prevalence')
ax.set_yticklabels(['{:.0f}%'.format(x*100) for x in ax.get_yticks()])
interval = 6
dates1 = [period.strftime('%Y-%b') for period in dates]
dates2 = [date.split('-') for date in dates1]
dates1 = [date[1]+'. ' + date[0] for date in dates2]

xticks = [i * interval for i in range(len(x) // interval)] + [len(x) - 1]
xlabels = [dates1[i * interval] for i in range(len(x) // interval)] + [dates1[-1]]

# Add downward-pointing tick marks
ax.tick_params(axis='x', which='both', bottom=True, length=6)
ax.set_xlabel('Time')
plt.xticks(xticks, xlabels, rotation=0, fontsize=12)

plt.savefig('HBM.png',dpi=600)
plt.show()

## fig 3c

In [None]:
# loda data
df_all = pd.read_csv('../data/coded_data/desensitized_mainland_labeled_data.csv', usecols=['created_at', 'Practical barriers to vaccination (-)', 'Misinformation (-)', 'Social norms  cues to action (+)'])
df = copy.copy(df_all)

df['created_at'] = pd.to_datetime(df['created_at'])
df['date_month'] = df['created_at'].dt.to_period('M')
df.set_index('created_at', inplace=True)

# data extraction 1
grouped = df.groupby(['date_month'])['Practical barriers to vaccination (-)'].value_counts()
dates = grouped.index.get_level_values('date_month').unique()
percentage = []
for date in dates:
    total_count = grouped.loc[date].sum()
    if (date, 1.0) in grouped.index:
        count_1 = grouped.loc[(date, 1.0)]
        percent_1 = count_1 / total_count
        percentage.append(percent_1)
    else:
        percentage.append(0)
plot_data = pd.Series(data=percentage,index=dates)
x = np.arange(len(plot_data))
lowess = sm.nonparametric.lowess(plot_data, x, frac=0.1)  
smoothed_x, smoothed_y = zip(*lowess)

# data extraction 2
grouped = df.groupby(['date_month'])['Misinformation (-)'].value_counts()
dates = grouped.index.get_level_values('date_month').unique()
percentage = []
for date in dates:
    total_count = grouped.loc[date].sum()
    if (date, 1.0) in grouped.index:
        count_1 = grouped.loc[(date, 1.0)]
        percent_1 = count_1 / total_count
        percentage.append(percent_1)
    else:
        percentage.append(0)
plot_data2 = pd.Series(data=percentage,index=dates)
x = np.arange(len(plot_data2))
lowess = sm.nonparametric.lowess(plot_data2, x, frac=0.13)  
smoothed_x2, smoothed_y2 = zip(*lowess)

# data extraction 3
grouped = df.groupby(['date_month'])['Social norms  cues to action (+)'].value_counts()
dates = grouped.index.get_level_values('date_month').unique()
percentage = []
for date in dates:
    total_count = grouped.loc[date].sum()
    if (date, 1.0) in grouped.index:
        count_1 = grouped.loc[(date, 1.0)]
        percent_1 = count_1 / total_count
        percentage.append(percent_1)
    else:
        percentage.append(0)
plot_data3 = pd.Series(data=percentage,index=dates)
x = np.arange(len(plot_data3))
lowess = sm.nonparametric.lowess(plot_data3, x, frac=0.2)  
smoothed_x3, smoothed_y3 = zip(*lowess)

In [None]:
# plot

fig, ax = plt.subplots(figsize=(15, 6))
fig.subplots_adjust(left=0.05,right=0.97,top=0.97)
plt.plot(smoothed_x, smoothed_y,label='Practical barriers to HPV vaccination', color='#FBA1B7')
plt.plot(smoothed_x2, smoothed_y2,label='Misinformation', color='#FF6E31')
plt.plot(smoothed_x3, smoothed_y3,label='Social norms', color='#243763')
ax.set_ylim(0,0.5)
ax.set_xlim(-0.5, 65.5)
ax.legend()

ax.set_ylabel('Prevalence')

interval = 6
dates1 = [period.strftime('%Y-%b') for period in dates]
dates2 = [date.split('-') for date in dates1]
dates1 = [date[1]+'. ' + date[0] for date in dates2]
xticks = [i * interval for i in range(len(x) // interval)] + [len(x) - 1]
xlabels = [dates1[i * interval] for i in range(len(x) // interval)] + [dates1[-1]]
ax.set_yticklabels(['{:.0f}%'.format(x*100) for x in ax.get_yticks()])

# Add downward-pointing tick marks
ax.tick_params(axis='x', which='both', bottom=True, length=6)
ax.set_xlabel('Time')
plt.xticks(xticks, xlabels, rotation=0, fontsize=12)
plt.savefig('external environment.png',dpi=600)
plt.show()