In [1]:
import mwclient
import time

site = mwclient.Site("en.wikipedia.org")
page =site.pages["Bitcoin"]

In [2]:
revs = list(page.revisions())

In [3]:
revs[0]

OrderedDict([('revid', 1248596137),
             ('parentid', 1247295554),
             ('user', 'Vgbyp'),
             ('timestamp',
              time.struct_time(tm_year=2024, tm_mon=9, tm_mday=30, tm_hour=12, tm_min=40, tm_sec=16, tm_wday=0, tm_yday=274, tm_isdst=-1)),
             ('comment',
              '/* 2020–present */ Reinstating the BTC/USD chart based on the [[Talk:Bitcoin#Bitcoin_Price_Chart|Talk page recommendations]].')])

In [4]:
revs = sorted(revs, key=lambda rev: rev["timestamp"])

In [5]:
revs[0]

OrderedDict([('revid', 275832581),
             ('parentid', 0),
             ('user', 'Pratyeka'),
             ('timestamp',
              time.struct_time(tm_year=2009, tm_mon=3, tm_mday=8, tm_hour=16, tm_min=41, tm_sec=7, tm_wday=6, tm_yday=67, tm_isdst=-1)),
             ('comment', 'creation (stub)')])

In [20]:
from transformers import pipeline

# Create a simple sentiment analysis pipeline
try:
    sentiment_pipeline = pipeline("sentiment-analysis")
    print("Sentiment Analysis Pipeline Loaded Successfully!")
except Exception as e:
    print("Error loading pipeline:", str(e))



  
            


No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


Error loading pipeline: At least one of TensorFlow 2.0 or PyTorch should be installed. To install TensorFlow 2.0, read the instructions at https://www.tensorflow.org/install/ To install PyTorch, read the instructions at https://pytorch.org/.


In [22]:
# Initialize the edits dictionary
edits = {}

# Assuming revs is a list of revisions
for rev in revs:
    date = rev.get("date")
    
    # Check if the date is not already a key in edits
    if date not in edits:
        edits[date] = dict(sentiments=list(), edit_count=0)

    # Increment the edit count for that date
    edits[date]["edit_count"] += 1







In [26]:
from statistics import mean   
for key in edits:
    # Check if the "sentiments" key exists and has values
    if "sentiments" in edits[key] and len(edits[key]["sentiments"]) > 0:
        edits[key]["sentiment"] = mean(edits[key]["sentiments"])
        edits[key]["neg_sentiment"] = len([s for s in edits[key]["sentiments"] if s < 0]) / len(edits[key]["sentiments"])
    else:
        # Handle case where there are no sentiments
        edits[key]["sentiment"] = None 
        edits[key]["neg_sentiment"] = 0 


In [27]:
import pandas as pd

edits_df = pd.DataFrame.from_dict(edits,orient="index")

In [28]:
edits_df

Unnamed: 0,edit_count,sentiment,neg_sentiment
,17899,,0


In [29]:
edits_df.index = pd.to_datetime(edits_df.index)

In [30]:
from datetime import datetime

dates = pd.date_range(start="2009-03-09", end = datetime.today())

In [31]:
dates

DatetimeIndex(['2009-03-09', '2009-03-10', '2009-03-11', '2009-03-12',
               '2009-03-13', '2009-03-14', '2009-03-15', '2009-03-16',
               '2009-03-17', '2009-03-18',
               ...
               '2024-09-21', '2024-09-22', '2024-09-23', '2024-09-24',
               '2024-09-25', '2024-09-26', '2024-09-27', '2024-09-28',
               '2024-09-29', '2024-09-30'],
              dtype='datetime64[ns]', length=5685, freq='D')

In [32]:
edits_df = edits_df.reindex(dates, fill_value=0)

In [33]:
edits_df

Unnamed: 0,edit_count,sentiment,neg_sentiment
2009-03-09,0,0,0
2009-03-10,0,0,0
2009-03-11,0,0,0
2009-03-12,0,0,0
2009-03-13,0,0,0
...,...,...,...
2024-09-26,0,0,0
2024-09-27,0,0,0
2024-09-28,0,0,0
2024-09-29,0,0,0


In [34]:
rolling_edits=edits_df.rolling(30).mean()

In [35]:
rolling_edits

Unnamed: 0,edit_count,sentiment,neg_sentiment
2009-03-09,,,
2009-03-10,,,
2009-03-11,,,
2009-03-12,,,
2009-03-13,,,
...,...,...,...
2024-09-26,0.0,0.0,0.0
2024-09-27,0.0,0.0,0.0
2024-09-28,0.0,0.0,0.0
2024-09-29,0.0,0.0,0.0


In [36]:
rolling_edits = rolling_edits.dropna()

In [37]:
rolling_edits

Unnamed: 0,edit_count,sentiment,neg_sentiment
2009-04-07,0.0,0.0,0.0
2009-04-08,0.0,0.0,0.0
2009-04-09,0.0,0.0,0.0
2009-04-10,0.0,0.0,0.0
2009-04-11,0.0,0.0,0.0
...,...,...,...
2024-09-26,0.0,0.0,0.0
2024-09-27,0.0,0.0,0.0
2024-09-28,0.0,0.0,0.0
2024-09-29,0.0,0.0,0.0


In [38]:
rolling_edits.to_csv("wikipedia_edits.csv")