-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
117 lines (90 loc) · 4.96 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import pandas as pd, streamlit as st
import matplotlib.pyplot as plt
from wordcloud import WordCloud, STOPWORDS
from streamlit_lottie import st_lottie
import requests
def load_lottieurl(url):
r = requests.get(url)
if r.status_code != 200:
return None
return r.json()
data = pd.read_excel("data_effects.xlsx").fillna("Unknown")
data.country = data.country.apply(lambda x: "USA" if x == "United States" else x)
countries = list(data.country.unique())
countries.sort(reverse=True)
countries.insert(0, "All")
data.Year = data.Year.apply(lambda x: str(x)[:4])
years = list(data.Year.unique())
years.sort(reverse=True)
years.insert(0, "All")
outcomes = list(data.outcome_clean.unique())
outcomes.sort(reverse=True)
outcomes.insert(0, "All")
data.effect = pd.Categorical(data.effect).rename_categories({-1: 'decrease', 0: 'no association', 1: "increase"})
effects = list(data.effect.unique())
effects.insert(0, "All")
# Abstract.Note for data_review
data["text"] = [str(data.loc[i, "Title"]) + " " + str(data.loc[i, "Abstract Note...8"]) for i in range(len(data))]
data.text = data.text.apply(lambda text: text.lower())
#lottie_tweet = load_lottieurl('https://assets6.lottiefiles.com/packages/lf20_tnrzlN.json')
#st_lottie(lottie_tweet, speed=1, height=200, key="initial")
st.markdown("<h1 style='text-align: center;'> Digital Media and Democracy </h1>", unsafe_allow_html=True)
st.markdown("<h3 style='text-align: center;'> wordclouds of titles and abstracts of scientific papers </h3>", unsafe_allow_html=True)
st.session_state.stopwords = STOPWORDS.union(set(["find", "study", "investigate", "result", "sample",
"finding", "paper", "article", "results", "findings",
"test", "one", "two", "three", "examine"]))
def make_wordcloud(text, color, stopwords = st.session_state.stopwords):
text = " ".join(text)
wordcloud = WordCloud(width=1800, height=1200, stopwords = stopwords,
max_font_size=250, max_words=150, background_color="white",
colormap=color, collocations=True).generate(text)
fig = plt.figure(figsize=(18,12))
plt.imshow(wordcloud, interpolation="bilinear")
plt.axis("off")
return fig
def get_filtered_data(data, filtervars, vars = ["Year", "outcome_clean", "effect", "country"]):
newdata = data.copy()
for i in range(len(vars)):
if len(filtervars[i]) == 0:
pass
elif filtervars[i][0] == "All":
pass
else:
tuplefilt = tuple(filtervars[i])
newdata = newdata.query(f'{vars[i]} in {tuplefilt}')
return newdata
st.multiselect("Filter by outcome measure", outcomes, default=["All"], key="OUTCOME")
st.multiselect("Filter by effect of digital media on outcome", effects, default=["All"], key="EFFECT")
st.multiselect("Filter by country of study", countries, default=["All"], key="COUNTRY")
st.multiselect("Filter by year of publication", years, default=["All"], key="YEAR")
if "last_filters" not in st.session_state:
st.session_state.last_filters = []
st.session_state.filters = [st.session_state.YEAR , st.session_state.OUTCOME, st.session_state.EFFECT, st.session_state.COUNTRY ]
st.session_state.changed = st.session_state.filters != st.session_state.last_filters
newdf = get_filtered_data(data, st.session_state.filters)
if st.session_state.changed:
df = newdf.drop_duplicates(subset=["Title"])
if len(df.text) == 0:
st.markdown("There are no articles matching your selection criteria.")
else:
st.markdown(f"There are {len(df.text)} articles matching your selection criteria.")
st.pyplot(make_wordcloud(df.text, "cool"))
st.session_state.last_filters = st.session_state.filters
df = newdf.drop_duplicates(subset=["Title"])
df = df[df["Year"] != "Unkn"]
df["DOITrue"] = df.DOI.apply(lambda doi : len(doi) < 100)
df = df[df["DOITrue"]]
df.sort_values(by=['Year'], ascending=False, inplace=True)
st.session_state.df = df.reset_index(inplace=False)
st.slider("How many titles would you like to explore?", min_value=0, max_value=len(st.session_state.df), value= 10 if len(st.session_state.df) > 9 else len(st.session_state.df) , step=1, key="number_to_print")
st.markdown(f"Showing {st.session_state.number_to_print} most recent articles:")
for i in range(len(st.session_state.df)):
if i == st.session_state.number_to_print:
break
st.markdown(f"{st.session_state.df.loc[i,'Year']}. {st.session_state.df.loc[i,'Title']} https://doi.org/{st.session_state.df.loc[i,'DOI']}")
st.markdown("""
---
By [@YaraKyrychenko](https://twitter.com/YaraKyrychenko) based on data from:
Lorenz-Spreen, P., Oswald, L., Lewandowsky, S. et al. [A systematic review of worldwide causal and correlational evidence on digital media and democracy.](https://doi.org/10.1038/s41562-022-01460-1) Nat Hum Behav (2022).
[Data (OSF)](https://osf.io/7ry4a/) [Web App (GitHub)](https://github.com/yarakyrychenko/digital-media-and-democracy-app)
""")