In [28]:
import json, time, requests
from dotenv import load_dotenv
import os

In [29]:
################################################################################################ 
# The code and comments below are adopted, with light modifications, from Dr. David McDonald,
# who provided them for use in DATA 512, a course in the University of Washington MS of Data
# Science Program. The code is provided and utilized here 
# under the Creative Commons CC-BY license
################################################################################################

#########
#
#    CONSTANTS
#

#    The current LiftWing ORES API endpoint and prediction model
#
API_ORES_LIFTWING_ENDPOINT = "https://api.wikimedia.org/service/lw/inference/v1/models/{model_name}:predict"
API_ORES_EN_QUALITY_MODEL = "enwiki-articlequality"

#
#    The throttling rate is a function of the Access token that you are granted when you request the token. The constants
#    come from dissecting the token and getting the rate limits from the granted token. An example of that is below.
#
API_LATENCY_ASSUMED = 0.002       # Assuming roughly 2ms latency on the API and network
API_THROTTLE_WAIT = ((60.0*60.0)/5000.0)-API_LATENCY_ASSUMED  # The key authorizes 5000 requests per hour

#    When making automated requests we should include something that is unique to the person making the request
#    This should include an email - your UW email would be good to put in there
#    
#    Because all LiftWing API requests require some form of authentication, you need to provide your access token
#    as part of the header too
#
REQUEST_HEADER_TEMPLATE = {
    'User-Agent': "<{email_address}>, University of Washington, MSDS DATA 512 - AUTUMN 2024",
    'Content-Type': 'application/json',
    'Authorization': "Bearer {access_token}"
}
#
#    This is a template for the parameters that we need to supply in the headers of an API request
#
REQUEST_HEADER_PARAMS_TEMPLATE = {
    'email_address' : "dvogler@uw.edu",         # your email address should go here
    'access_token'  : ""          # the access token you create will need to go here
}

#
#    A dictionary of English Wikipedia article titles (keys) and sample revision IDs that can be used for this ORES scoring example
#
ARTICLE_REVISIONS = { 'Bison':1085687913 , 
                     'Northern flicker':1086582504 , 
                     'Red squirrel':1083787665 , 
                     'Chinook salmon':1085406228 , 
                     'Horseshoe bat':1060601936 }

#
#    This is a template of the data required as a payload when making a scoring request of the ORES model
#
ORES_REQUEST_DATA_TEMPLATE = {
    "lang":        "en",     # required that its english - we're scoring English Wikipedia revisions
    "rev_id":      "",       # this request requires a revision id
    "features":    True
}

#
#    These are used later - defined here so they, at least, have empty values
#
USERNAME = ""
ACCESS_TOKEN = ""

In [30]:
USERNAME = "voglerdaniel"
load_dotenv()
ACCESS_TOKEN = os.getenv("ACCESS_TOKEN")

In [31]:
#########
#
#    PROCEDURES/FUNCTIONS
#

def request_ores_score_per_article(article_revid = None, email_address=None, access_token=None,
                                   endpoint_url = API_ORES_LIFTWING_ENDPOINT, 
                                   model_name = API_ORES_EN_QUALITY_MODEL, 
                                   request_data = ORES_REQUEST_DATA_TEMPLATE, 
                                   header_format = REQUEST_HEADER_TEMPLATE, 
                                   header_params = REQUEST_HEADER_PARAMS_TEMPLATE):
    
    #    Make sure we have an article revision id, email and token
    #    This approach prioritizes the parameters passed in when making the call
    if article_revid:
        request_data['rev_id'] = article_revid
    if email_address:
        header_params['email_address'] = email_address
    if access_token:
        header_params['access_token'] = access_token
    
    #   Making a request requires a revision id - an email address - and the access token
    if not request_data['rev_id']:
        raise Exception("Must provide an article revision id (rev_id) to score articles")
    if not header_params['email_address']:
        raise Exception("Must provide an 'email_address' value")
    if not header_params['access_token']:
        raise Exception("Must provide an 'access_token' value")
    
    # Create the request URL with the specified model parameter - default is a article quality score request
    request_url = endpoint_url.format(model_name=model_name)
    
    # Create a compliant request header from the template and the supplied parameters
    headers = dict()
    for key in header_format.keys():
        headers[str(key)] = header_format[key].format(**header_params)
    
    # make the request
    try:
        # we'll wait first, to make sure we don't exceed the limit in the situation where an exception
        # occurs during the request processing - throttling is always a good practice with a free data
        # source like ORES - or other community sources
        if API_THROTTLE_WAIT > 0.0:
            time.sleep(API_THROTTLE_WAIT)
        #response = requests.get(request_url, headers=headers)
        response = requests.post(request_url, headers=headers, data=json.dumps(request_data))
        json_response = response.json()
    except Exception as e:
        print(e)
        json_response = None
    return json_response


In [None]:
score = request_ores_score_per_article()

In [32]:
#   
#
#   Which article - the key for the article dictionary defined above
article_title = "Bison"
#
print(f"Getting LiftWing ORES scores for '{article_title}' with revid: {ARTICLE_REVISIONS[article_title]:d}")
#
#    Make the call, just pass in the article revision ID, email address, and access token
score = request_ores_score_per_article(article_revid=ARTICLE_REVISIONS[article_title],
                                       email_address="dwmc@uw.edu",
                                       access_token=ACCESS_TOKEN)
#
#    Output the result
print(json.dumps(score,indent=4))
#

Getting LiftWing ORES scores for 'Bison' with revid: 1085687913
{
    "enwiki": {
        "models": {
            "articlequality": {
                "version": "0.9.2"
            }
        },
        "scores": {
            "1085687913": {
                "articlequality": {
                    "score": {
                        "prediction": "FA",
                        "probability": {
                            "B": 0.07895665991827401,
                            "C": 0.03728215742560417,
                            "FA": 0.5629436065906797,
                            "GA": 0.30547854835374505,
                            "Start": 0.011061807252218824,
                            "Stub": 0.00427722045947826
                        }
                    }
                }
            }
        }
    }
}


In [33]:
#
#   What article - the key for the article dictionary defined above
article_title = "Red squirrel"
#
#   We have to pass in some parameters used for the request header. Create a copy of the template and fill in some fields.
hparams = REQUEST_HEADER_PARAMS_TEMPLATE.copy()
hparams['email_address'] = "dwmc@uw.edu"
hparams['access_token'] = ACCESS_TOKEN
#
#    We can also do this with the request data - although this might not be as useful as with the header params
rd = ORES_REQUEST_DATA_TEMPLATE.copy()
rd['rev_id'] = ARTICLE_REVISIONS[article_title]
#
print(f"Getting LiftWing ORES scores for '{article_title}' with revid: {ARTICLE_REVISIONS[article_title]:d}")
#
#    Make the call, just pass in the article revision ID and the header parameters
score = request_ores_score_per_article(request_data=rd,
                                       header_params=hparams)
#
#    Output the result
print(json.dumps(score,indent=4))
#

Getting LiftWing ORES scores for 'Red squirrel' with revid: 1083787665
{
    "enwiki": {
        "models": {
            "articlequality": {
                "version": "0.9.2"
            }
        },
        "scores": {
            "1083787665": {
                "articlequality": {
                    "score": {
                        "prediction": "C",
                        "probability": {
                            "B": 0.34796005858456314,
                            "C": 0.5493773163633026,
                            "FA": 0.033407655474737605,
                            "GA": 0.056969127408174655,
                            "Start": 0.008976178264322794,
                            "Stub": 0.0033096639048991057
                        }
                    }
                }
            }
        }
    }
}
