In [1]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd
import requests
import numpy as np
from ipywidgets import widgets

In [2]:
# data processing

data = pd.read_excel(requests.get(r"https://github.com/x-zhe/RMP_gendered_comment/raw/main/data1.xlsx").content)
dfmap = pd.read_excel(requests.get(r"https://github.com/x-zhe/RMP_gendered_comment/raw/main/topic_mapping1.xlsx").content)
df1 = pd.merge(data, dfmap, on="topic")

data = pd.read_excel(requests.get(r"https://github.com/x-zhe/RMP_gendered_comment/raw/main/data5.xlsx").content)
dfmap = pd.read_excel(requests.get(r"https://github.com/x-zhe/RMP_gendered_comment/raw/main/topic_mapping5.xlsx").content)
df5 = pd.merge(data, dfmap, on="topic")

# RateMyProfessors.com gendered comments

In [4]:
def make_figure(field, cat, figtype, is_sig=False):
    # color param
    colors = ["#F38E72", "#408FC1"]

    # Initialize figure
    ds = [df5, df1]
    if is_sig:
        filter = "field == '{}' and type == '{}' and category == '{}' and pval < 0.05".format(
            field, figtype, cat)
    else:
        filter = "field == '{}' and type == '{}' and category == '{}'".format(
            field, figtype, cat)
    ts = [d.query(filter) for d in ds]
    row_heights = max(len(ts[0]), len(ts[1]))

    if figtype == "topic":
        subplot_titles = ["<b>(A) Five-star reviews<b><br>{}".format(
                                cat), "<b>(B) One-star reviews<b><br>{}".format(cat)]
    else:
        subplot_titles = ["<b>(A) Five-star reviews<b><br>{}(Positive sentiment)".format(
                                cat), "<b>(B) One-star reviews<b><br>{}(Negative sentiment)".format(cat)]
    fig = make_subplots(rows=1,
                        cols=2,
                        horizontal_spacing=0.03,
                        subplot_titles=subplot_titles,
                        )

    # Add Traces
    hovertemplate = "Coef.:%{x:.3f} <br>95% CI:[%{customdata[1]:.3f}, %{customdata[2]:.3f}]<br>p: %{customdata[0]:.3f}"
    # first is topic
    for i, t in enumerate(ts):
        dfx = t.sort_values("coef", ascending=True)
        subdf = dfx[(dfx["coef"] <= 0)]
        padding = row_heights - len(dfx)  # padding the y axis
        fig.add_trace(go.Scatter(x=[None] * padding + subdf["coef"].tolist(),
                                 # avoid duplicate padding
                                 y=[" " * p for p in range(padding)] + \
                                 subdf["name"].tolist(),
                                 # for hovertext showing
                                 customdata=np.vstack(
                                     (np.zeros((padding, 3)), subdf[["pval", "low", "high"]].to_numpy())),
                                 marker=dict(color=colors[1]),
                                 mode="markers",
                                 name=cat,
                                 legendgroup="Men",
                                 error_x=dict(type="data",
                                              symmetric=False,
                                              array=[0] * padding + (subdf["high"] -
                                                                     subdf["coef"]).tolist(),
                                              arrayminus=[0] * padding + (subdf["coef"] -
                                                                          subdf["low"]).tolist(),
                                              color=colors[1]
                                              ),
                                 hovertemplate=hovertemplate,
                                 ),
                      row=1,
                      col=i+1,
                      )

        subdf = dfx[(dfx["coef"] > 0)]
        fig.add_trace(go.Scatter(x=subdf["coef"],
                                 y=subdf["name"],
                                 # for hovertext showing
                                 customdata=subdf[[
                                     "pval", "low", "high"]].to_numpy(),
                                 marker=dict(color=colors[0]),
                                 mode="markers",
                                 name=cat,
                                 legendgroup="Women",
                                 error_x=dict(type="data",
                                              symmetric=False,
                                              array=subdf["high"] -
                                              subdf["coef"],
                                              arrayminus=subdf["coef"] -
                                              subdf["low"],
                                              color=colors[0]
                                              ),
                                 hovertemplate=hovertemplate,
                                 ),
                      row=1,
                      col=i+1,
                      )

    fig.update_layout(height=(row_heights + 6) * 30,
                      width=1000,
                      template="simple_white",
                      font=dict(family="Arial"),
                      showlegend=False,
                      margin=dict(t=80, b=80),
                      )

    fig.update_xaxes(showgrid=True,
                     zeroline=True,
                     zerolinecolor="gray",
                     mirror=True,
                     )

    fig.update_yaxes(type='category',
                     showgrid=True,
                     ticklabelstep=1,
                     mirror=True,
                     )

    # move right subplots' y axes to the right
    fig.update_yaxes(side="right",
                     col=2,
                     row=1
                     )

    # show x axis label at bottom
    for col in [1, 2]:
        fig.update_xaxes(title=dict(text="Coefficient (women-men)",
                                    standoff=0.01,
                                    font_size=13),
                         ticklabelposition="outside",
                         col=col,
                         row=1
                         )

    return fig


In [5]:
# category order params
cat_orders = ["Overall", "Teaching", "Personal", "Material",
              "Structure", "Evaluation", "Grading", "Subject"]
fields = ["All", "Applied Sciences", "Natural Sciences", "Math & Computing",
          "Engineering", "Medicine Health", "Social Sciences", "Education", "Humanities"]

dimbox = widgets.ToggleButtons(
    description='Dimension: ',
    value='Overall',
    options=cat_orders
)

fieldbox = widgets.ToggleButtons(
    description='Field: ',
    value='All',
    options=fields
)

pbox = widgets.Checkbox(description="Show significant (p<0.05) only",
                        value=False,
                        )

typebox = widgets.ToggleButtons(
    options=['Topic modeling', 'Sentiment'],
    value='Topic modeling',
    description='Analysis type: ',
)


def on_clicked(change):
    at = "topic" if typebox.value == 'Topic modeling' else 'sent'
    g2 = make_figure(fieldbox.value, dimbox.value, at, pbox.value)
    # use dict g2 data and layout to update all
    g.update(g2.to_dict(), overwrite=True)


dimbox.observe(on_clicked, names="value")
fieldbox.observe(on_clicked, names="value")
typebox.observe(on_clicked, names="value")
pbox.observe(on_clicked, names="value")
g = go.FigureWidget(make_figure(
    fieldbox.value, dimbox.value, "topic", is_sig=True))
vbox = widgets.VBox([dimbox,
                     fieldbox,
                     widgets.HBox([typebox,
                                  pbox]),
                     g])
display(vbox)


VBox(children=(ToggleButtons(description='Dimension: ', options=('Overall', 'Teaching', 'Personal', 'Material'…