In [None]:
"""checks all of the follows from this day one week ago, and unfollows any users that did not follow back.
  logging should record stats of follows-follow backs from that day as well as number of accounts that were deleted.
  Should consume as much of the list in the s3 bucket as possible, and return early if there's nothing in the bucket to delete.
  After processing as much as possible the function will log how much it did, how much is remaining, and it will save in the dynamodb deletions stats what the current counts of everything are.
  When the s3 bucket is completely emptied out, the s3 object will be deleted, and the dynamodb deletions stats will be finished and logged for the sum of the day.

  Args:
    None
  Returns:
    None
"""

"checks all of the follows from this day one week ago, and unfollows any users that did not follow back.\n  logging should record stats of follows-follow backs from that day as well as number of accounts that were deleted.\n  Should consume as much of the list in the s3 bucket as possible, and return early if there's nothing in the bucket to delete.\n  After processing as much as possible the function will log how much it did, how much is remaining, and it will save in the dynamodb deletions stats what the current counts of everything are.\n  When the s3 bucket is completely emptied out, the s3 object will be deleted, and the dynamodb deletions stats will be finished and logged for the sum of the day.\n\n  Args:\n    None\n  Returns:\n    None\n"

In [None]:
pip install boto3



In [None]:
pip install atproto



In [None]:
# import colab secrets to store login credentials
from google.colab import userdata

# aws stuff
import boto3
from botocore.exceptions import ClientError
# json necessary to parse secret string, and write/read s3 objects
import json

# datetime is necessary for our ddb and s3 schema
import datetime
import zoneinfo

# for logging
import base64
import requests


# import bluesky api
from atproto import Client
# this is the exception that is raised when we try to find a
from atproto.exceptions import BadRequestError

In [None]:
AWS_KEY = userdata.get('aws_access_key')
AWS_SECRET_KEY = userdata.get('aws_secret_access_key')
REGION = userdata.get('aws_region')
SECRETS_ID = userdata.get('aws_secretsmanager_id')

DDB = 'dynamodb'
S3 = 's3'
DDB_TABLE = 'rickybot-ddb'
S3_BUCKET = 'rickybot-s3'
DDB_DELSTATS_KEY = 'DEL-STATS'
DDB_ATTR_PROCESSED = 'PROCESSED'
DDB_ATTR_DNE = 'DNE'
DDB_ATTR_FOLLOWBACKS = 'FOLLOWBACKS'
DDB_ATTR_NOFOLLOWBACK = 'NO-FOLLOWBACK'

PRIMARY_KEY = 'DOW' # the dynamodb table's primary key. there is no sort key
DOW_KEYS = {
    'Sunday': 'SUN',
    'Monday': 'MON',
    'Tuesday': 'TUE',
    'Wednesday': 'WED',
    'Thursday': 'THU',
    'Friday': 'FRI+SAT',
    'Saturday': 'FRI+SAT'
}
USER_TIMEZONE = "US/Eastern"

FILE_PATH = "LOGGING_DEL_02.txt"
BRANCH = "main"

MY_DID = 'did:plc:ktkc7jfakxzjpooj52ffc6ra'

In [None]:
# get the day of the week so we know what dynamodb key to pull from and which bucket to aggregate to
# doing this first because we do not run this on saturday and can bail out early if we get into this code for some reason
# also we are running this at about 1am, the following day after all runs have concluded for the previous. so we're aggregating the previous day's results
cur_timestamp = datetime.datetime.now(zoneinfo.ZoneInfo(USER_TIMEZONE))
dow = cur_timestamp.strftime("%A")
str_timestamp = str(cur_timestamp) # we'll need this to use as the attribute for ddb

if dow == 'Saturday':
  print("it's Saturday, you shouldn't be here.")
  # return early

# use the day of the week to pull up the corresponding key for our dynamodb entries and our s3 bucket
s3_key = DOW_KEYS[dow]
print(s3_key)

SUN


In [None]:
# connect to aws
try:
  aws_session = boto3.Session(
          aws_access_key_id = AWS_KEY,
          aws_secret_access_key = AWS_SECRET_KEY,
          region_name = REGION
      )
except:
  print('failed to begin AWS session')
  # return with error
  # this is the only error that we can't log to github, because we never got the credentials

In [None]:
# then connect to secrets manager
try:
  secrets_client = aws_session.client('secretsmanager')
  secret_value = secrets_client.get_secret_value(SecretId=SECRETS_ID)
  secret_string = secret_value['SecretString']
  secret_map = json.loads(secret_string)
except:
  print('failed to reach aws secrets manager')
  # return with error

In [None]:
# create constants from the values in the secrets manager
BSKY_USERNAME = secret_map['bsky_username']
BSKY_PASSWORD = secret_map['bsky_password']
GITHUB_TOKEN = secret_map['github_token']
GITHUB_REPO = secret_map['github_user/repo']
DELETION_MAX = int(secret_map['deletion_max'])

In [None]:
# before the program starts let's set up the logging function so we can insert it at any point where our program could break
def logging_deletions(logging_text):
  # LOGGING ALL THE CHANGES TO OUR LOGGING FILE IN GITHUB
  commit_message = "Logging follow deletions on " + str_timestamp


  # Step 1: Get the file's current content and SHA
  url = f"https://api.github.com/repos/{GITHUB_REPO}/contents/{FILE_PATH}"
  headers = {"Authorization": f"token {GITHUB_TOKEN}"}
  response = requests.get(url, headers=headers)
  response_json = response.json()

  # Decode the content of the file
  file_sha = response_json["sha"]
  content = base64.b64decode(response_json["content"]).decode("utf-8")

  # Step 2: Modify the file content
  new_content = content + str_timestamp + ': ' + logging_text + '\n'
  encoded_content = base64.b64encode(new_content.encode("utf-8")).decode("utf-8")

  # Step 3: Push the updated content
  data = {
      "message": commit_message,
      "content": encoded_content,
      "sha": file_sha,
      "branch": BRANCH,
  }
  update_response = requests.put(url, headers=headers, json=data)

  if update_response.status_code == 200:
      print("Logging file updated successfully! Here's what was added to the logs:")
      print(str_timestamp + ": " + logging_text)
  else:
      print(f"Error: {update_response.json()}")

In [None]:
# initialize dynamodb and s3
try:
  dynamodb = aws_session.resource(DDB)
  table = dynamodb.Table(DDB_TABLE)
except:
  print('ERROR - failed to get dynamo db table')
  logging_deletions('ERROR - failed to get dynamo db table')
  # return with error
try:
  s3 = aws_session.client(S3)
  buckets = s3.list_buckets()
  bucket = s3.list_objects_v2(Bucket=S3_BUCKET)
except:
  print('ERROR - failed to get s3 bucket')
  logging_deletions('ERROR - failed to get s3 bucket')
  # return with error

In [None]:
# pull the object from s3 - if there is nothing in there we can return early
try:
  s3.head_object(Bucket=S3_BUCKET, Key=s3_key)
  print("Object existed in s3 bucket.")
  response = s3.get_object(Bucket=S3_BUCKET, Key=s3_key)
  # creates a list from the json info in the s3 bucket
  old_follows = json.loads(response["Body"].read())
  print(old_follows, type(old_follows), len(old_follows))
except s3.exceptions.ClientError as e:
  if e.response["Error"]["Code"] == "404": # object was not found at the key
    print('There was no users list found in s3. Terminating function call.')
    # RETURN
  else:
    print("ERROR - failed to get previous follows list from s3 bucket")
    logging_deletions("ERROR - failed to get previous follows list from s3 bucket")

Object existed in s3 bucket.
['did:plc:fscbdmrjprzpx6oy4mjcumpz', 'did:plc:qpjc5w33v4b4je2f4yomrh5z'] <class 'list'> 2


In [None]:
print(len(old_follows))

2


In [None]:
# and now we can log into the bluesky client
try:
  client = Client()
  client.login(BSKY_USERNAME, BSKY_PASSWORD)
except:
  err = 'ERROR - failed to log in to the bluesky client'
  print(err)
  logging_deletions(err)
  # return here, cannot proceed without bluesky

In [None]:
# just getting a previous count of our followers and following for the logs
following_before = 0
try:
  following = client.get_profile(actor=BSKY_USERNAME).follows_count
  following_before = following
except:
  warning = 'WARNING - failed to get previous follow count'
  print(warning)
  logging_deletions(warning)
print(following_before)

38289


In [None]:
# initialize our deletion stats for our logging and dynamodb record
processed_count = 0
failed_to_delete = [] # if any fail we can add them back in and retry later
count_users_dne = 0 # if we fail to find on lookup the account does not exist anymore
followed_back = 0
no_followback = 0
finished_deleting = False # flag to let us know if we can delete the running deletion stats
error_count = 0
muted_user_count = 0
muted_fails = 0

# now go through the followers, check if they still exist, see if they followed back, delete if necessary
for user_did in old_follows:
  processed_count += 1
  if user_did == MY_DID:
    # this shouldn't happen but we'll cover it anyway
    continue
  # first we have to try to get the profile of the user
  try:
    user_profile = client.get_profile(actor=user_did)
  except BadRequestError as e:
    # if we get a bad request error it means that the profile was either banned or deleted. Nothing else to do with them, but I want to keep track of these.
    # we could really validate this by using the next line of code, but it's overkill - I don't want to get hyperspecific
    # if e.response.content.message == 'Profile not found':
    count_users_dne += 1
  except Exception as e:
    # if we had a general exception then we should retry this user later, probably just timed out or something.
    print(f'general exception getting profile of user: {e}')
    failed_to_delete.append(user_did)
    error_count += 1
    if error_count > 3: # something's going wrong with this run, either rate limiting or timing out for some reason
      break

  user_didnt_followback = True if user_profile.viewer.followed_by == None else False
  if user_didnt_followback:
    no_followback += 1
    try:
      follow_uri = user_profile.viewer.following
      if follow_uri != None: # maybe I manually unfollowed
        client.delete_follow(follow_uri)
        # here we also want to try to mute the user so they're not still in our feed to be readded
        try:
            client.mute(user_did)
            muted_user_count += 1
        except Exception as e:
            print(f'exception encountered muting user: {e}')
            muted_fails += 1
            error_count += 1
    except Exception as e:
      # something went wrong and we failed to delete this user, it's extremely rare for this to happen unless you're just rate limited, so save this for later
      print(f'exception encountered deleting user: {e}')
      failed_to_delete.append(user_did)
      error_count += 1
      if error_count > 7: # we're probably getting rate limited, so stop processing users
        break
    finally:
      if no_followback >= DELETION_MAX: # this is a hard cap on deletions so you don't get rate limited.
        break
  else:
    followed_back += 1

# get a new follow count to show the change
following_after = 0
try:
  following = client.get_profile(actor=BSKY_USERNAME).follows_count
  following_after = following
except:
  warning = 'WARNING - failed to get updated follow count'
  print(warning)
  logging_deletions(warning)

# get a new mutes count to show change
mutes_after = 0
try:
  mutes_after = len(client.app.bsky.graph.get_mutes().mutes)
except:
  warning = 'WARNING - failed to get updated follow count'
  print(warning)
  logging_deletions(warning)

# ok so we're out of the loop and here's what we need to do
# add any users that we failed to delete back to the end of the list. hopefully this should usually be 0
old_follows.extend(failed_to_delete)
# now we check to see if we made it to the end of the list.
s3_reupload = -1
s3_deletion_success = False
if processed_count >= len(old_follows):
  finished_deleting = True
  # now delete the s3 object
  try:
    s3.delete_object(Bucket=S3_BUCKET, Key=s3_key)
    s3_deletion_success = True
    # logging_deletions('Finished processing all deletions for today. Object was successfully deleted from s3 bucket.')
  except Exception as e:
    err = f'ERROR - failed to delete the list of follows from the s3 bucket: {e}'
    print(err)
    logging_deletions(err)
else:
  # take a slice from wherever we got to until the end of the list, and then we'll stick that back into s3
  leftover_follows = old_follows[processed_count : ]
  try:
    s3.put_object(
        Bucket=S3_BUCKET,
        Key=s3_key,
        Body=json.dumps(leftover_follows),
        ContentType="application/json"
    )
    s3_reupload = len(leftover_follows)
  except Exception as e:
    err = f"ERROR - failed to upload list of leftover follows to s3: {e}"
    print(err)
    logging_deletions(err)

# delete s3 if nothing left to put in
# do our logging, noting the status of the s3 reupload if it occurred
logging_deletions(f'Processed {processed_count} users from the list of {len(old_follows)}.{"" if len(failed_to_delete) == 0 else f" {len(failed_to_delete)} failures were encountered and need to be retried."} From this batch of deletions {followed_back} users followed back, {no_followback} did not follow back and were deleted, and {count_users_dne} accounts no longer exist. {muted_user_count} of the unfollowed users were successfully muted, with {muted_fails} errors.\n  Follows count - now: {following_after} | prev: {following_before}{"" if mutes_after == 0 else f"| mutes: "+str(mutes_after)}{f". Successfully reuploaded remaining {s3_reupload} users to s3." if s3_reupload > 0 else ""}')


Logging file updated successfully! Here's what was added to the logs:
2025-03-02: Processed 2 users from the list of 2. From this batch of deletions 0 users followed back, 2 did not follow back and were deleted, and 0 accounts no longer exist.
  Follows count - now: 38289 | prev: 38289


In [None]:
# pull up dynamodb, see if there were old stats, and add them to our current stats
try:
  ddb_response = table.get_item(
      Key={'DOW': DDB_DELSTATS_KEY},
  )
except Exception as e:
  print(f"ERROR - failed to check ddb key's existence: {e}")
  logging_deletions(f"ERROR - failed to check ddb key's existence: {e}")

print('ddb response:', ddb_response)
# it's not a problem if there was nothing in the response, it just means this was the first deletion of the day
if 'Item' in ddb_response:
  processed_count += ddb_response['Item'][DDB_ATTR_PROCESSED]
  count_users_dne += ddb_response['Item'][DDB_ATTR_DNE]
  followed_back += ddb_response['Item'][DDB_ATTR_FOLLOWBACKS]
  no_followback += ddb_response['Item'][DDB_ATTR_NOFOLLOWBACK]
  # after finishing iterating through all of the attributes we can delete this key from the dynamodb to clear out all the previous runs
  if finished_deleting:
    try:
      table.delete_item(
        Key={'DOW': DDB_DELSTATS_KEY}
      )
    except Exception as e:
      err = f"ERROR - failed to delete item {DDB_DELSTATS_KEY} from dynamodb: {e}"
      print(err)
      logging_deletions(err)

# now log the statistics regardless of if there was anything in ddb (there will not be if there was only one deletion run for the day)
if finished_deleting:
    conversion_rate = round(followed_back / (followed_back + no_followback) * 100, 2)
    logging_deletions(f"Finished {s3_key}. Last week's follows have been pruned. {'Object was successfully deleted from s3 bucket. ' if s3_deletion_success else ''}TODAY'S STATS: \n  {processed_count} total follows processed. \n  {followed_back} users followed back. \n  {no_followback} did not follow back and were deleted. \n  {count_users_dne} accounts no longer exist. \n  {conversion_rate}% Conversion Rate.")
# if we're not done with the runs regardless of if there was anything in the ddb response we want to put something in there now
else:
    # if we're not finished deleting then we update the ddb delstats for next run
    try:
      table.update_item(
          Key={'DOW': DDB_DELSTATS_KEY},
          UpdateExpression='SET #attr1 = :val1, #attr2 = :val2, #attr3 = :val3, #attr4 = :val4',
          ExpressionAttributeNames={
              '#attr1': DDB_ATTR_PROCESSED,
              '#attr2': DDB_ATTR_DNE,
              '#attr3': DDB_ATTR_FOLLOWBACKS,
              '#attr4': DDB_ATTR_NOFOLLOWBACK
          },
          ExpressionAttributeValues={
              ':val1': processed_count,
              ':val2': count_users_dne,
              ':val3': followed_back,
              ':val4': no_followback
          }
      )
    except Exception as e:
      err = f'ERROR - failed to store running deletion statistics in dynamodb.\n{e}'
      print(err)
      logging_deletions(err)


ddb response: {'Item': {'DOW': 'DEL-STATS', 'DNE': Decimal('15'), 'FOLLOWBACKS': Decimal('325'), 'PROCESSED': Decimal('2913'), 'NO-FOLLOWBACK': Decimal('2588')}, 'ResponseMetadata': {'RequestId': 'NU5ECUB8SB1MHALVB6UOKGO1ONVV4KQNSO5AEMVJF66Q9ASUAAJG', 'HTTPStatusCode': 200, 'HTTPHeaders': {'server': 'Server', 'date': 'Sun, 02 Mar 2025 11:28:05 GMT', 'content-type': 'application/x-amz-json-1.0', 'content-length': '131', 'connection': 'keep-alive', 'x-amzn-requestid': 'NU5ECUB8SB1MHALVB6UOKGO1ONVV4KQNSO5AEMVJF66Q9ASUAAJG', 'x-amz-crc32': '1050521271'}, 'RetryAttempts': 0}}
Logging file updated successfully! Here's what was added to the logs:
2025-03-02: Finished checking last week's follows from SUN for deletions. Object was successfully deleted from s3 bucket. TODAY'S STATS: 
  2915 total follows processed. 
  325 users followed back. 
  2590 did not follow back and were deleted. 
  15 accounts no longer exist. 
  11.15% Conversion Rate.


In [None]:
# for debugging, do not leave this in final code
print(table.scan())

{'Items': [{'2025-03-01 20:00:43.351902-05:00': {'did:plc:hxzuvov7x46q4ddaiec647pm', 'did:plc:bx62gadkeqq54muvo7nnjo2o', 'did:plc:rcsx2dkuvnnmruujo6oycve7', 'did:plc:ly4m5neoo36rvjevyoofvdcp', 'did:plc:2rsgl3f6ntc5f45xbdsgleaf', 'did:plc:ki6diq64aapltjisp44ey24e', 'did:plc:gyfqroiizgeez2lr3osymuux', 'did:plc:4s5i2ipifwl52ldbk46hah5w', 'did:plc:ldvnqrqk3hghqcu5qj53uahf', 'did:plc:xvgdlmmxsfmzr7or2u4ahvdj', 'did:plc:46nd4e74wzt5n7r44q6a2x7h', 'did:plc:zdkahucnca5xswgtqjjoz2ai', 'did:plc:b65qaowcxn7aa7mwio4risn4', 'did:plc:imus4hhckn2pdmmzreql6ork', 'did:plc:27yc5jck4plpywafbdwqucwr', 'did:plc:xrajjdysxlrabk7wus75cbia', 'did:plc:idjdrvzocec3g322gftn5evy', 'did:plc:l7bejo72iyw3j3mzxnzaaxdn', 'did:plc:klcapang7x6tcjh5sz4pjdb2', 'did:plc:dxurmhn4xq5k2twsi2wz5egp', 'did:plc:7lu2rpfbzczsvuzzlcuvpliq', 'did:plc:ct7j32rioagu2y6qpwpl6k5n', 'did:plc:gc6ef5yzs47n7axyo33q5ztq', 'did:plc:x3gcmt3es5xrpu2kiq7ptgb2', 'did:plc:5rpl3gxu5d7tjqsgxnvguwd4', 'did:plc:iefm24ym3xd3igc2ll6drqkg', 'did:plc:n6yoyd