In [None]:
import mailbox
import re
import requests
from tqdm.notebook import tqdm
import datetime
from concurrent.futures import ThreadPoolExecutor, as_completed
import email.utils

### Please modify these according to your needs

The folder should look like this

```
❯ tree
.
├── README.md
├── Takeout
│   ├── Mail
│   │   └── Spam.mbox
│   └── archive_browser.html
├── takeout-20231206T082003Z-001.zip
└── unsub.ipynb
```

In [None]:
#  Path to your takeout
mbox_path = 'Takeout/Mail/Spam.mbox'
#  Only emails within the last `days` will be used. Set to None to take all.
days=10

In [1]:
def find_unsubscribe_links(email_body):
    url_pattern = r'https?://[^\s<>"]+|www\.[^\s<>"]+'
    unsubscribe_pattern = re.compile(r'unsubscribe', re.IGNORECASE)

    urls = re.findall(url_pattern, email_body)
    unsubscribe_links = [url for url in urls if unsubscribe_pattern.search(url)]
    return unsubscribe_links

In [None]:
def send_unsubscribe_request(url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:94.0) Gecko/20100101 Firefox/94.0'
    }
    try:
        response = requests.get(url, headers=headers, timeout=10)
        if response.status_code == 200:
            print(f"Unsubscribe request sent successfully to {url}")
        else:
            print(f"Failed to send unsubscribe request to {url}, status code: {response.status_code}")
    except requests.RequestException as e:
        print(f"Error sending unsubscribe request to {url}: {e}")

In [None]:
def is_recent(email_date):
    if days is None:
        return True
    email_datetime = datetime.datetime.fromtimestamp(email_date)
    return datetime.datetime.now() - email_datetime < datetime.timedelta(days=days)

In [None]:
mbox = mailbox.mbox(mbox_path)
unique_links = set()

In [None]:
for message in tqdm(mbox):
    if message.get('Date'):
        date_tuple = email.utils.parsedate_tz(message.get('Date'))
        if date_tuple and is_recent(email.utils.mktime_tz(date_tuple)):
            if message.is_multipart():
                for part in message.walk():
                    content_type = part.get_content_type()
                    if content_type in ['text/plain', 'text/html']:
                        email_body = part.get_payload(decode=True).decode('utf-8', errors='ignore')
                        links = find_unsubscribe_links(email_body)
                        unique_links.update(links)
            else:
                email_body = message.get_payload(decode=True).decode('utf-8', errors='ignore')
                links = find_unsubscribe_links(email_body)
                unique_links.update(links)

with ThreadPoolExecutor(max_workers=10) as executor:
    futures = [executor.submit(send_unsubscribe_request, link) for link in unique_links]
    for future in tqdm(as_completed(futures), total=len(futures)):
        future.result()