# Drudge tempo analysis

By Ben Welsh

A draft of analysis of how many headlines are cycled through the homepage of drudgereport.com

In [28]:
import sys
from pathlib import Path

In [1]:
from rich import print
from rich.progress import track

In [2]:
import pandas as pd
import altair as alt

In [29]:
this_dir = Path("__file__").parent.absolute()
sys.path.append(this_dir.parent)
sys.path.append(str(this_dir.parent / "newshomepages"))

In [30]:
import altair_theme

In [31]:
alt.themes.register('palewire', altair_theme.theme)
alt.themes.enable('palewire')

ThemeRegistry.enable('palewire')

In [3]:
drudge_df = pd.read_csv(
    "https://github.com/palewire/news-homepages/raw/main/extracts/csv/drudge-hyperlinks-analysis.csv",
    parse_dates=["earliest_date"]
)

In [5]:
story_df = drudge_df[drudge_df.is_story].copy()

In [15]:
date_totals = story_df.groupby("earliest_date").size().rename("n").reset_index().sort_values("earliest_date").set_index("earliest_date")

In [16]:
date_range = pd.date_range(
    story_df.earliest_date.min(),
    story_df.earliest_date.max(),
    freq="D",
)

In [17]:
date_index = pd.DatetimeIndex(date_range)

In [21]:
backfilled_df = date_totals.reindex(date_index).reset_index().rename(columns={"index": "date"}).sort_values("date")

In [22]:
backfilled_df.n.fillna(0, inplace=True)

In [23]:
backfilled_df['7_day_rolling_average'] = backfilled_df.n.rolling(7).mean()

In [59]:
chart_df = backfilled_df[
    (~pd.isnull(backfilled_df['7_day_rolling_average'])) &
    (backfilled_df.date < backfilled_df.date.max())
]

In [60]:
chart_df.tail()

Unnamed: 0,date,n,7_day_rolling_average
84,2022-11-01,76,64.571429
85,2022-11-02,71,61.857143
86,2022-11-03,68,62.571429
87,2022-11-04,67,64.714286
88,2022-11-05,35,60.428571


In [62]:
chart_df.n.mean()

64.46987951807229

In [66]:
chart = alt.Chart(chart_df).encode(
    x=alt.X("date:T", title=None, timeUnit="yearmonthdate", axis=alt.Axis(format="%b %-d", grid=False)),
    tooltip=["date", "n"]
)

bars = chart.mark_bar(opacity=0.6).encode(
    y=alt.Y("n:Q")
)

line = chart.mark_line(stroke=altair_theme.palette['accent']).encode(
    y=alt.Y("7_day_rolling_average:Q", title="Seven-day rolling average")
)

(bars + line).properties(
    title="New Drudge headlines per day"
)

In [71]:
story_df[story_df.earliest_date == "2022-11-05"].to_csv("drudge-proof-date.csv", index=False)