In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
import transformers
from transformers import pipeline

In [None]:
df = pd.read_csv("Opal/Data/preprocessed_osteoporosis_prob.csv")

In [None]:
bin_age = []


for age in df['Age']:
    if age == '7-12':
        bin_age.append('0-44')
    elif age == '13-18':
        bin_age.append('0-44')
    elif age == '19-24':
        bin_age.append('0-44')
    elif age == '25-34':
        bin_age.append('0-44')
    elif age == '35-44':
        bin_age.append('0-44')
    elif age == '45-54':
        bin_age.append('45-54')
    elif age == '55-64':
        bin_age.append('55-64')
    elif age == '65-74':
        bin_age.append('65-74')
    elif age == '75 or over':
        bin_age.append('75+')
    else:
        #missing values
        bin_age.append('')

df['Age'] = bin_age

In [None]:
Review = []

for satisfaction in df['Satisfaction']:
    if satisfaction < 3:
        Review.append('bad')
    else:
        Review.append('good')

df['Review'] = Review

## n-gram helper functions

In [None]:
from wordcloud import WordCloud, STOPWORDS
stopwords = set(STOPWORDS)

#Enhanced stopwording
more_stopwords = {'went', 'go', 'one', 'two', '2', '3', 'side', 'effects'}
stopwords = stopwords.union(more_stopwords)

In [None]:
from collections import defaultdict
import nltk
from plotly import tools
import plotly.offline as py
py.init_notebook_mode(connected=True)
import plotly.graph_objs as go
"""import nltk
from collections import defaultdict
from nltk.tokenize import word_tokenize
#nltk.word_tokenize(df_ost['Reviews'].iloc[0])

tokenizer = nltk.RegexpTokenizer(r"\w+")
#new_words = tokenizer.tokenize(df_ost['Reviews'].iloc[0])"""

In [None]:
def remove_punc(string):
    punc = '''!()-[]{};:'"\, <>./?@#$%^&*_~'''
    for ele in string:  
        if ele in punc:  
            string = string.replace(ele, "") 
    return string

In [None]:
def generate_ngrams(text, n_gram=1):
    from nltk.tokenize import word_tokenize
    tokenizer = nltk.RegexpTokenizer(r"\w+")
    #text = tokenizer.tokenize(text)

    token = [token for token in text.lower().split(" ") if token != "" if token not in stopwords]
    token = [remove_punc(i) for i in token]
    #token = [token for token in tokenizer.tokenize(text) if token != "" if token not in STOPWORDS]
    
    ngrams = zip(*[token[i:] for i in range(n_gram)])
    return [" ".join(ngram) for ngram in ngrams]

In [None]:
## custom function for horizontal bar chart ##
def horizontal_bar_chart(df, color):
    trace = go.Bar(
        y=df["word"].values[::-1],
        x=df["wordcount"].values[::-1],
        showlegend=False,
        orientation = 'h',
        marker=dict(
            color=color,
        ),
    )
    return trace

## Effectiveness

In [None]:
#have as probabilities instead of 0 or 1
df_effectiveness = df[df["effectiveness"]>0.7]

In [None]:
freq_dict = defaultdict(int)
for sent in df_effectiveness[df_effectiveness["Review"] == 'bad']['Reviews']:
    for word in generate_ngrams(sent,2):
        freq_dict[word] += 1
fd_sorted = pd.DataFrame(sorted(freq_dict.items(), key=lambda x: x[1])[::-1])
fd_sorted.columns = ["word", "wordcount"]
trace1 = horizontal_bar_chart(fd_sorted.head(10), 'orange')

freq_dict = defaultdict(int)
for sent in df_effectiveness[df_effectiveness["Review"] == 'good']['Reviews']:
    for word in generate_ngrams(sent,2):
        freq_dict[word] += 1
fd_sorted = pd.DataFrame(sorted(freq_dict.items(), key=lambda x: x[1])[::-1])
fd_sorted.columns = ["word", "wordcount"]
trace2 = horizontal_bar_chart(fd_sorted.head(10), 'orange')

# Creating two subplots
fig = tools.make_subplots(rows=1, cols=2, vertical_spacing=0.04,horizontal_spacing=0.15,
                          subplot_titles=["Frequent biagrams of Bad Reviews", 
                                          "Frequent biagrams of Good Reviews"])
fig.append_trace(trace1, 1, 1)
fig.append_trace(trace2, 1, 2)
fig['layout'].update(height=1200, width=1000, paper_bgcolor='rgb(233,233,233)', title="Bigram Count Plots")
py.iplot(fig, filename='word-plots')

## Classification

In [None]:
classifier  = pipeline("zero-shot-classification",  model = "facebook/bart-large-mnli")

### Sample Experimentation

In [None]:
"""#A sample review that contains a) whether the medicine worked b) the type of side effect and c) the delivery method (pill)

sample_review = "This osteoporosis medicine worked, but it gave me a nasty stomach ache. It was easy to swallow."


res = classifier(
    sample_review,
    candidate_labels = ['limb pain', 'gastrointestinal', 'dental', 'cardiac', 'dermatological', 'respiratory' , 'weight gain and loss', 'headache'], 
    multi_class = True
)
res"""

In [None]:
sides = ['limb pain', 'gastrointestinal', 'dental', 'cardiac', 'dermatological', 'respiratory' , 'weight gain and loss', 'headache', 'flu', 'back pain', 'jaw pain']

In [None]:
#setting empty values for the columns
index = -1
for label in sides:
    df[label + '_m'] = -1

for j in range(len(df)):
    #counter for progress/debugging
    index+=1
    #running the classifier on the column    
    res = classifier(
        df.iloc[j]['Reviews'],
        candidate_labels = sides,
        multi_label = True
    )
    #setting the column values according to the output from the classifier ("_m" = multiclass)
    for i in range(len(res['labels'])):
        df[res['labels'][i]+ '_m'].iloc[j] = res['scores'][i]

In [None]:
#df.to_csv('data/updated_df.csv')

In [None]:
df['max'] = df[['limb pain_m', 'gastrointestinal_m', 'dental_m', 'cardiac_m', 'dermatological_m', 'respiratory_m', 'weight gain and loss_m', 'headache_m', 'flu_m', 'body ache_m', 'back pain_m', 'jaw pain_m']].max(axis=1)
df.head()

# Side Effects

In [None]:
df_side_effects = df[df["side_effect"]>0.7].reset_index()

In [None]:
print(len(df))
print(len(df_side_effects))
print(round(len(df_side_effects)/len(df),2))

In [None]:
sns.set_style('ticks')
fig, ax = plt.subplots()
ax = sns.histplot(x = 'max', data = df_side_effects)
for container in ax.containers:
    ax.bar_label(container)

In [None]:
print(len(df_side_effects[df_side_effects['max'] > 0.7]))
print(len(df_side_effects))
print(round(len(df_side_effects[df_side_effects['max'] > 0.7])/len(df_side_effects),2))

In [None]:
"""side1 = []
side2 = []
side3 = []
side4 = []
side5 = []
side6 = []
side7 = []
side8 = []
side9 = []

thresh = 0.7

for i in range(len(df_side_effects)):
    if df_side_effects['limb pain_m'][i] >= thresh:
        side1.append(1)
    elif df_side_effects['limb pain_m'][i] < thresh:
        side1.append(0)
    
    if df_side_effects['gastrointestinal_m'][i] >= thresh:
        side2.append(1)
    elif df_side_effects['gastrointestinal_m'][i] < thresh:
        side2.append(0)

    if df_side_effects['dental_m'][i] >= thresh:
        side3.append(1)
    elif df_side_effects['dental_m'][i] < thresh:
        side3.append(0)

    if df_side_effects['cardiac_m'][i] >= thresh:
        side4.append(1)
    elif df_side_effects['cardiac_m'][i] < thresh:
        side4.append(0)

    if df_side_effects['dermatological_m'][i] >= thresh:
        side5.append(1)
    elif df_side_effects['dermatological_m'][i] < thresh:
        side5.append(0)

    if df_side_effects['respiratory_m'][i] >= thresh:
        side6.append(1)
    elif df_side_effects['respiratory_m'][i] < thresh:
        side6.append(0)

    if df_side_effects['weight gain and loss_m'][i] >= thresh:
        side7.append(1)
    elif df_side_effects['weight gain and loss_m'][i] < thresh:
        side7.append(0)

    if df_side_effects['headache_m'][i] >= thresh:
        side8.append(1)
    elif df_side_effects['headache_m'][i] < thresh:
        side8.append(0)



df_side_effects['limb pain'] = side1
df_side_effects['gastrointestinal'] = side2
df_side_effects['dental'] = side3
df_side_effects['cardiac'] = side4
df_side_effects['dermatological'] = side5
df_side_effects['respiratory'] = side6
df_side_effects['weight'] = side7
df_side_effects['headache'] = side8"""


## Side Effects - Bigrams

In [None]:
freq_dict = defaultdict(int)
for sent in df_side_effects[df_side_effects["Review"] == 'bad']['Reviews']:
    for word in generate_ngrams(sent,2):
        freq_dict[word] += 1
fd_sorted = pd.DataFrame(sorted(freq_dict.items(), key=lambda x: x[1])[::-1])
fd_sorted.columns = ["word", "wordcount"]
trace1 = horizontal_bar_chart(fd_sorted.head(10), 'orange')

freq_dict = defaultdict(int)
for sent in df_side_effects[df_side_effects["Review"] == 'good']['Reviews']:
    for word in generate_ngrams(sent,2):
        freq_dict[word] += 1
fd_sorted = pd.DataFrame(sorted(freq_dict.items(), key=lambda x: x[1])[::-1])
fd_sorted.columns = ["word", "wordcount"]
trace2 = horizontal_bar_chart(fd_sorted.head(10), 'orange')

# Creating two subplots
fig = tools.make_subplots(rows=1, cols=2, vertical_spacing=0.04,horizontal_spacing=0.15,
                          subplot_titles=["Frequent biagrams of Bad Reviews", 
                                          "Frequent biagrams of Good Reviews"])
fig.append_trace(trace1, 1, 1)
fig.append_trace(trace2, 1, 2)
fig['layout'].update(height=1200, width=1000, paper_bgcolor='rgb(233,233,233)', title="Bigram Count Plots")
py.iplot(fig, filename='word-plots')

## Bigrams of < 0.7 confidence

In [None]:
df_side_effects.to_csv('temp.csv', index = False)

In [None]:
df2 = df_side_effects[df_side_effects['max'] < 0.7]

In [None]:
freq_dict = defaultdict(int)
for sent in df2[df2["Review"] == 'bad']['Reviews']:
    for word in generate_ngrams(sent,2):
        freq_dict[word] += 1
fd_sorted = pd.DataFrame(sorted(freq_dict.items(), key=lambda x: x[1])[::-1])
fd_sorted.columns = ["word", "wordcount"]
trace1 = horizontal_bar_chart(fd_sorted.head(30), 'orange')

freq_dict = defaultdict(int)
for sent in df2[df2["Review"] == 'good']['Reviews']:
    for word in generate_ngrams(sent,2):
        freq_dict[word] += 1
fd_sorted = pd.DataFrame(sorted(freq_dict.items(), key=lambda x: x[1])[::-1])
fd_sorted.columns = ["word", "wordcount"]
trace2 = horizontal_bar_chart(fd_sorted.head(30), 'orange')

# Creating two subplots
fig = tools.make_subplots(rows=1, cols=2, vertical_spacing=0.04,horizontal_spacing=0.15,
                          subplot_titles=["Frequent biagrams of Bad Reviews", 
                                          "Frequent biagrams of Good Reviews"])
fig.append_trace(trace1, 1, 1)
fig.append_trace(trace2, 1, 2)
fig['layout'].update(height=1200, width=1000, paper_bgcolor='rgb(233,233,233)', title="Bigram Count Plots")
py.iplot(fig, filename='word-plots')

### Modifications

In [None]:
classifier  = pipeline("zero-shot-classification",  model = "facebook/bart-large-mnli")

In [None]:
sides2 = ['flu', 'body ache']


#setting empty values for the columns
index = -1
for label in sides2:
    df[label + '_m'] = -1

for j in range(len(df)):
    #counter for progress/debugging
    index+=1
    #running the classifier on the column    
    res = classifier(
        df.iloc[j]['Reviews'],
        candidate_labels = sides2,
        multi_label = True
    )
    #setting the column values according to the output from the classifier ("_m" = multiclass)
    for i in range(len(res['labels'])):
        df[res['labels'][i]+ '_m'].iloc[j] = res['scores'][i]

In [None]:
sides3 = ['back pain']


#setting empty values for the columns
index = -1
for label in sides3:
    df[label + '_m'] = -1

for j in range(len(df)):
    #counter for progress/debugging
    index+=1
    #running the classifier on the column    
    res = classifier(
        df.iloc[j]['Reviews'],
        candidate_labels = sides3,
        multi_label = True
    )
    #setting the column values according to the output from the classifier ("_m" = multiclass)
    for i in range(len(res['labels'])):
        df[res['labels'][i]+ '_m'].iloc[j] = res['scores'][i]

In [None]:
sides4 = ['jaw pain']


#setting empty values for the columns
index = -1
for label in sides4:
    df[label + '_m'] = -1

for j in range(len(df)):
    #counter for progress/debugging
    index+=1
    #running the classifier on the column    
    res = classifier(
        df.iloc[j]['Reviews'],
        candidate_labels = sides4,
        multi_label = True
    )
    #setting the column values according to the output from the classifier ("_m" = multiclass)
    for i in range(len(res['labels'])):
        df[res['labels'][i]+ '_m'].iloc[j] = res['scores'][i]

In [None]:
#A sample review that contains a) whether the medicine worked b) the type of side effect and c) the delivery method (pill)

sample_review = "I have a serious back pain"


res = classifier(
    sample_review,
    candidate_labels = ['flu', 'body pain'], 
    multi_class = True
)
res

# NEED TO CHANGE THIS

In [None]:
side_tf = []


thresh = 0.7

for i in range(len(df_side_effects)):
    if df_side_effects['limb pain'][i] == 1:
        side_tf.append('limb')
    elif df_side_effects['gastrointestinal'][i] == 1:
        side_tf.append('gastro')
    elif df_side_effects['dental'][i] == 1:
        side_tf.append('dental')
    elif df_side_effects['cardiac'][i] == 1:
        side_tf.append('cardiac')
    elif df_side_effects['dermatological'][i] == 1:
        side_tf.append('derma')
    elif df_side_effects['respiratory'][i] == 1:
        side_tf.append('respiratory')
    elif df_side_effects['weight'][i] == 1:
        side_tf.append('weight')
    elif df_side_effects['headache'][i] == 1:
        side_tf.append('headache')
    else:
        side_tf.append('other')

df_side_effects['side_tf'] = side_tf

## EDA (age distribution, ie dental things tend to affect older people)

In [None]:
#df_side_effects.drop(['Unnamed: 0'], axis = 1)

# Side Effects EDA

In [None]:
df_limb = df_side_effects[df_side_effects['limb pain'] == 1]
df_gastro = df_side_effects[df_side_effects['gastrointestinal'] == 1]
df_dental = df_side_effects[df_side_effects['dental'] == 1]
df_cardiac = df_side_effects[df_side_effects['cardiac'] == 1]
df_derma = df_side_effects[df_side_effects['dermatological'] == 1]
df_resp = df_side_effects[df_side_effects['respiratory'] == 1]
df_weight = df_side_effects[df_side_effects['weight'] == 1]
df_headache = df_side_effects[df_side_effects['headache'] == 1]

In [None]:
total = len(df_side_effects)
print('limb side effects: ', round((len(df_limb)/total)*100,2), '%')
print('gastrointesntinal side effects: ', round((len(df_gastro)/total)*100,2), '%')
print('headache side effects: ', round((len(df_headache)/total)*100,2), '%')
print('respiratory side effects: ', round((len(df_resp)/total)*100,2), '%')
print('weight loss/gain side effects: ', round((len(df_weight)/total)*100,2), '%')
print('dental side effects: ', round((len(df_dental)/total)*100,2), '%')
print('dermatological side effects: ', round((len(df_derma)/total)*100,2), '%')
print('cardiac side effects: ', round((len(df_cardiac)/total)*100,2), '%')

In [None]:
sns.set(rc={"figure.dpi":200, 'savefig.dpi':200},
        font_scale=0.5,
        style="white"
        )

#define data
data = [round((len(df_limb)/total)*100,2), round((len(df_gastro)/total)*100,2), round((len(df_headache)/total)*100,2), round((len(df_resp)/total)*100,2), round((len(df_weight)/total)*100,2), round((len(df_dental)/total)*100,2), round((len(df_derma)/total)*100,2), round((len(df_cardiac)/total)*100,2)]
labels = ['Limb', 'Gastro', 'Headache', 'Respiratory', 'Weight', 'Dental', 'Derma', 'Cardiac' ]

#define Seaborn color palette to use
colors = sns.color_palette('pastel')[0:8]

#create pie chart
plt.pie(data, labels = labels, colors = colors, autopct='%.0f%%')
plt.show()

In [None]:
df_plot = df_limb.groupby(['Age', 'Review']).size().reset_index().pivot(columns='Review', index='Age', values=0)
df_plot.plot(kind='bar', stacked=True, color=["skyblue", "gray"])
plt.title('Limb Pain', fontsize=12)

In [None]:
sns.set(rc={"figure.dpi":100, 'savefig.dpi':100},
        font_scale=0.5,
        style="white"
        )
sns.countplot(x = 'Age', data = df_limb, color = 'skyblue', order=["0-44", "45-54", "55-64", "65-74", "75+"])
plt.title('Limb Pain', fontsize=12)

"""ax = plt.gca()
ax.set_ylim([0, 200])"""

In [None]:
df_limb.groupby('Drug')['Age'].count().sort_values(ascending = False).reset_index().head(5)

In [None]:
sns.countplot(x = 'Age', data = df_limb[df_limb['Review'] == 'good'], color = 'skyblue', order=["0-44", "45-54", "55-64", "65-74", "75+"])

In [None]:
sns.countplot(x = 'Age', data = df_limb[df_limb['Review'] == 'bad'], color = 'skyblue', order=["0-44", "45-54", "55-64", "65-74", "75+"])

In [None]:
df_plot = df_gastro.groupby(['Age', 'Review']).size().reset_index().pivot(columns='Review', index='Age', values=0)
df_plot.plot(kind='bar', stacked=True, color=["skyblue", "gray"])
plt.title('Gastrointesntinal', fontsize=12)

In [None]:
sns.countplot(x = 'Age', data = df_gastro, color = 'skyblue', order=["0-44", "45-54", "55-64", "65-74", "75+"])
plt.title('Gastrointesntinal', fontsize=12)

In [None]:
df_gastro.groupby('Drug')['Age'].count().sort_values(ascending = False).reset_index().head(5)

In [None]:
sns.countplot(x = 'Age', data = df_gastro[df_gastro['Review'] == 'good'], color = 'skyblue', order=["0-44", "45-54", "55-64", "65-74", "75+"])

In [None]:
sns.countplot(x = 'Age', data = df_gastro[df_gastro['Review'] == 'bad'], color = 'skyblue', order=["0-44", "45-54", "55-64", "65-74", "75+"])

In [None]:
df_plot = df_dental.groupby(['Age', 'Review']).size().reset_index().pivot(columns='Review', index='Age', values=0)
df_plot.plot(kind='bar', stacked=True, color=["skyblue", "gray"])
plt.title('Dental', fontsize=12)

In [None]:
sns.countplot(x = 'Age', data = df_dental, color = 'skyblue', order=["0-44", "45-54", "55-64", "65-74", "75+"])
plt.title('Dental', fontsize=12)

In [None]:
df_dental.groupby('Drug')['Age'].count().sort_values(ascending = False).reset_index().head(5)

In [None]:
sns.countplot(x = 'Age', data = df_dental[df_dental['Review'] == 'good'], color = 'skyblue', order=["0-44", "45-54", "55-64", "65-74", "75+"])

In [None]:
sns.countplot(x = 'Age', data = df_dental[df_dental['Review'] == 'bad'], color = 'skyblue', order=["0-44", "45-54", "55-64", "65-74", "75+"])

In [None]:
df_plot = df_cardiac.groupby(['Age', 'Review']).size().reset_index().pivot(columns='Review', index='Age', values=0)
df_plot.plot(kind='bar', stacked=True, color=["skyblue", "gray"])
plt.title('Cardiac', fontsize=12)

In [None]:
sns.countplot(x = 'Age', data = df_cardiac, color = 'skyblue', order=["0-44", "45-54", "55-64", "65-74", "75+"])
plt.title('Cardiac', fontsize=12)

In [None]:
df_cardiac.groupby('Drug')['Age'].count().sort_values(ascending = False).reset_index().head(5)

In [None]:
sns.countplot(x = 'Age', data = df_cardiac[df_cardiac['Review'] == 'good'], color = 'skyblue', order=["0-44", "45-54", "55-64", "65-74", "75+"])

In [None]:
sns.countplot(x = 'Age', data = df_cardiac[df_cardiac['Review'] == 'bad'], color = 'skyblue', order=["0-44", "45-54", "55-64", "65-74", "75+"])

In [None]:
df_plot = df_derma.groupby(['Age', 'Review']).size().reset_index().pivot(columns='Review', index='Age', values=0)
df_plot.plot(kind='bar', stacked=True, color=["skyblue", "gray"])
plt.title('Dematological', fontsize=12)

In [None]:
sns.countplot(x = 'Age', data = df_derma, color = 'skyblue', order=["0-44", "45-54", "55-64", "65-74", "75+"])
plt.title('Dematological', fontsize=12)

In [None]:
df_derma.groupby('Drug')['Age'].count().sort_values(ascending = False).reset_index().head(5)

In [None]:
sns.countplot(x = 'Age', data = df_derma[df_derma['Review'] == 'good'], color = 'skyblue', order=["0-44", "45-54", "55-64", "65-74", "75+"])

In [None]:
sns.countplot(x = 'Age', data = df_derma[df_derma['Review'] == 'bad'], color = 'skyblue', order=["0-44", "45-54", "55-64", "65-74", "75+"])

In [None]:
df_plot = df_resp.groupby(['Age', 'Review']).size().reset_index().pivot(columns='Review', index='Age', values=0)
df_plot.plot(kind='bar', stacked=True, color=["skyblue", "gray"])
plt.title('Respiratory', fontsize=12)

In [None]:
sns.countplot(x = 'Age', data = df_resp, color = 'skyblue', order=["0-44", "45-54", "55-64", "65-74", "75+"])
plt.title('Respiratory', fontsize=12)

In [None]:
df_resp.groupby('Drug')['Age'].count().sort_values(ascending = False).reset_index().head(5)

In [None]:
sns.countplot(x = 'Age', data = df_resp[df_resp['Review'] == 'good'], color = 'skyblue', order=["0-44", "45-54", "55-64", "65-74", "75+"])

In [None]:
sns.countplot(x = 'Age', data = df_resp[df_resp['Review'] == 'bad'], color = 'skyblue', order=["0-44", "45-54", "55-64", "65-74", "75+"])

In [None]:
df_plot = df_weight.groupby(['Age', 'Review']).size().reset_index().pivot(columns='Review', index='Age', values=0)
df_plot.plot(kind='bar', stacked=True, color=["skyblue", "gray"])
plt.title('Weight Loss / Gain', fontsize=12)

In [None]:
sns.countplot(x = 'Age', data = df_weight, color = 'skyblue', order=["0-44", "45-54", "55-64", "65-74", "75+"])
plt.title('Weight Loss / Gain', fontsize=12)

In [None]:
df_weight.groupby('Drug')['Age'].count().sort_values(ascending = False).reset_index().head(5)

In [None]:
sns.countplot(x = 'Age', data = df_weight[df_weight['Review'] == 'good'], color = 'skyblue', order=["0-44", "45-54", "55-64", "65-74", "75+"])

In [None]:
sns.countplot(x = 'Age', data = df_weight[df_weight['Review'] == 'bad'], color = 'skyblue', order=["0-44", "45-54", "55-64", "65-74", "75+"])

In [None]:
df_plot = df_headache.groupby(['Age', 'Review']).size().reset_index().pivot(columns='Review', index='Age', values=0)
#df_plot
df_plot.plot(kind='bar', stacked=True, color=["skyblue", "gray"])
plt.title('Headache', fontsize=12)

In [None]:
sns.countplot(x = 'Age', data = df_headache, order=["0-44", "45-54", "55-64", "65-74", "75+"], color = 'skyblue')
plt.title('Headache', fontsize=12)

In [None]:
df_headache.groupby('Drug')['Age'].count().sort_values(ascending = False).reset_index().head(5)

In [None]:
sns.countplot(x = 'Age', data = df_headache[df_headache['Review'] == 'good'], color = 'skyblue', order=["0-44", "45-54", "55-64", "65-74", "75+"])

In [None]:
sns.countplot(x = 'Age', data = df_headache[df_headache['Review'] == 'bad'], color = 'skyblue', order=["0-44", "45-54", "55-64", "65-74", "75+"])

# TYMLOS

In [None]:
pd.set_option('display.max_colwidth', None)

In [None]:
df_tymlos = df_side_effects.loc[df_side_effects['Drug'].str.contains("tymlos", case=False)]

In [None]:
df_tymlos[['Age', 'limb pain_m', 'gastrointestinal_m', 'dental_m', 'cardiac_m', 'dermatological_m', 'respiratory_m', 'weight gain and loss_m', 'headache_m']]

In [None]:
df_tymlos[['Age', 'limb pain', 'gastrointestinal', 'dental', 'cardiac', 'dermatological', 'respiratory', 'weight', 'headache']]

In [None]:
print(df_tymlos['Reviews'][800])
print('')
print(df_tymlos['Reviews'][801])
print('')
print(df_tymlos['Reviews'][802])