In [None]:
### IMPORTANT ###
# DO NOT RUN THIS NOTEBOOK IF YOU ARE TRYING TO RECREATE THE FIGURES FROM THE PAPER!
# The code here is only to show how some raw data was acquired and/or post-processed.

## Import

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import requests
from requests.auth import HTTPBasicAuth
from bs4 import BeautifulSoup
import json
import pandas as pd
from fig2_blocklist_utils import *

## Compile Censys Query

In [3]:
noprescriptionlist = [
   "no prescription required",
   "no prescription needed",
    "without prescription"
   "no rx needed",
   "no rx required",
   "no doctor visit",
]
# Expanded list of controlled substances (generic names and some popular ones)
CONTROLLED_SUBSTANCES = [
   "xanax",
   "valium",
   "tramadol",
   "oxycodone",
   "hydrocodone",
   "percocet",
   "adderall",
   "fentanyl",
   "morphine",
   "codeine",
   "hydromorphone",
   "oxymorphone",
   "dilaudid",
   "suboxone",
   "buprenorphine",
   "oxicontin",
   "vicodin",
   "roxicodone"
]

# Expanded list of brand names for common drugs
BRAND_NAMES = [
   "viagra",
   "cialis",
   "levitra",
   "kamagra",       # often sold illegally online
   "sildenafil",    # generic Viagra
   "tadalafil",     # generic Cialis
   "vardenafil",    # another ED medication
   "stendra"
]

noprescription = " OR ".join([f'"{k}"' for k in noprescriptionlist])
meds = " OR ".join([f'"{k}"' for k in (BRAND_NAMES + CONTROLLED_SUBSTANCES)])
query = f"({noprescription}) OR ({meds})"


In [4]:
# API keys
API_ID = "3ffa6ea5-5bdc-4c59-b7c6-39ba610974f3"
API_SECRET = "YVVoMmGbePCEqeIrZSwkiKe1yzC4poPB"

# query
url = "https://search.censys.io/api/v2/hosts/search"
cursor_next = ""
num_pages = 10

all_query_data = []
for i in range(num_pages):
    
    # setup parameters
    params = {
        "q": query,
    }
    if cursor_next != "":
        params["cursor"] = cursor_next

    # query
    response = requests.get(url, auth=HTTPBasicAuth(API_ID, API_SECRET), params=params)

    # format raw data
    data = response.json()
    cursor_next = data['result']['links']['next']

    # save
    print(data)
    all_query_data.append(data)

# # save to file
# with open('data/censys_manual.json', 'w') as f:
#     json.dump(all_query_data, f, indent=2)

{'code': 200, 'status': 'OK', 'result': {'query': '("no prescription required" OR "no prescription needed" OR "without prescriptionno rx needed" OR "no rx required" OR "no doctor visit") OR ("viagra" OR "cialis" OR "levitra" OR "kamagra" OR "sildenafil" OR "tadalafil" OR "vardenafil" OR "stendra" OR "xanax" OR "valium" OR "tramadol" OR "oxycodone" OR "hydrocodone" OR "percocet" OR "adderall" OR "fentanyl" OR "morphine" OR "codeine" OR "hydromorphone" OR "oxymorphone" OR "dilaudid" OR "suboxone" OR "buprenorphine" OR "oxicontin" OR "vicodin" OR "roxicodone")', 'total': 2940, 'duration': 333, 'hits': [{'location': {'postal_code': '34673', 'province': 'Florida', 'timezone': 'America/New_York', 'country': 'United States', 'continent': 'North America', 'coordinates': {'latitude': 28.27168, 'longitude': -82.71955}, 'country_code': 'US', 'city': 'Port Richey'}, 'last_updated_at': '2025-05-31T16:28:41.147Z', 'autonomous_system': {'description': 'ACEHOST', 'bgp_prefix': '209.216.89.0/24', 'asn'

## Compile Blocklist that Require Post-Processing

In [5]:
pharmacies = fetch_illegal_pharmacies()
# trim accordingly
blocklist_pharmacy_safe = pharmacies[14:-7]
print(f"Total pharmacies in blocklist: {len(blocklist_pharmacy_safe)}")

# # Write to text file
# with open('data/blocklist_pharmacy_safe.txt', 'w') as f:
#     for entry in blocklist_pharmacy_safe:
#         f.write(entry + '\n')

Total pharmacies in blocklist: 44257


In [6]:
df_whitelist_pharmacy_safe = pd.read_csv('../data/fig2/whitelist_pharmacy_safe.csv')

whitelist_pharmacy_safe_arr = []
for entry in df_whitelist_pharmacy_safe['NABP Approved Websites'].values:
    for e in entry.split('\n'):
        whitelist_pharmacy_safe_arr.append(e)

# with open('data/whitelist_pharmacy_safe.txt','w') as f:
#     for line in whitelist_pharmacy_safe_arr:
#         f.write(line + '\n')

## Covnert DNS Blocklists to IP Blocklists

In [7]:
# convert_dns_files_to_ip('data/blocklist_pharmacy_safe.txt','data/blocklist_pharmacy_safe_ip.txt')
# convert_dns_files_to_ip('data/blocklist_pharmacy_safe2.txt','data/blocklist_pharmacy_safe2_ip.txt')
# convert_dns_files_to_ip('data/blocklist_pharmacy_list_fda_warnings.txt','data/blocklist_pharmacy_list_fda_warnings_ip.txt')
# convert_dns_files_to_ip('data/whitelist_pharmacy_safe.txt','data/whitelist_pharmacy_safe_ip.txt')