In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException
import time
import requests
from pathlib import Path
import json
import hashlib

In [None]:
# Instagram post URL
POST_URL = ""  # PASTE YOUR INSTAGRAM POST URL HERE
DOWNLOAD_DIR = Path("instagram_downloads")
DOWNLOAD_DIR.mkdir(exist_ok=True)

In [None]:
# Setup Chrome with mobile emulation and DevTools
options = webdriver.ChromeOptions()
options.add_argument('--auto-open-devtools-for-tabs')
options.add_experimental_option('mobileEmulation', {
    'deviceName': 'iPhone 12 Pro'
})
options.set_capability('goog:loggingPrefs', {'performance': 'ALL'})

driver = webdriver.Chrome(options=options)
print("Browser opened with DevTools enabled")

In [None]:
# Navigate and refresh
driver.get(POST_URL)
time.sleep(3)
driver.refresh()
time.sleep(2)
print("Page loaded and refreshed")

In [None]:
# Extract likes, comments, and paid partnership info
likes = "0"
comments = "0"
is_paid = False

try:
    # Get likes - look for span with role="button" and specific classes
    like_spans = driver.find_elements(By.CSS_SELECTOR, 'span.x1ypdohk.x1s688f.x2fvf9.xe9ewy2[role="button"]')
    if like_spans:
        likes = like_spans[0].text
        print(f"Likes: {likes}")
except:
    print("Could not extract likes")

try:
    # Get comments - look for span with specific classes
    comment_spans = driver.find_elements(By.CSS_SELECTOR, 'span.xdj266r.x14z9mp.xat24cr.x1lziwak.xexx8yu.xyri2b.x18d9i69.x1c1uobl.x1hl2dhg.x16tdsg8.x1vvkbs')
    for span in comment_spans:
        text = span.text
        if text.isdigit():
            comments = text
            print(f"Comments: {comments}")
            break
except:
    print("Could not extract comments")

try:
    # Check for paid partnership
    page_source = driver.page_source
    if "Paid partnership with " in page_source:
        is_paid = True
        print("✓ Paid partnership detected")
    else:
        print("Not a paid partnership")
except:
    print("Could not check paid partnership")

print(f"\nPost Stats: Likes={likes}, Comments={comments}, Paid={is_paid}")

In [None]:
# Click next button until no more items
click_count = 0
while True:
    try:
        next_btn = WebDriverWait(driver, 2).until(
            EC.element_to_be_clickable((By.CSS_SELECTOR, 'button[aria-label="Next"], button[aria-label="next"]'))
        )
        next_btn.click()
        click_count += 1
        print(f"Clicked next: {click_count}")
        time.sleep(0.5)
    except (TimeoutException, NoSuchElementException):
        print("No more next button found")
        break

print(f"Total clicks: {click_count}")

In [None]:
# Extract media URLs from network logs
logs = driver.get_log('performance')
media_urls = set()

for log in logs:
    try:
        message = json.loads(log['message'])['message']
        if message['method'] == 'Network.responseReceived':
            response = message['params']['response']
            url = response['url']
            mime_type = response.get('mimeType', '')
            
            if 'image/' in mime_type or 'video/' in mime_type:
                if 'cdninstagram.com' in url:
                    media_urls.add(url)
    except:
        continue

print(f"Found {len(media_urls)} potential media items")
driver.quit()

In [None]:
# Download all media - skip bad URLs and duplicates
downloaded_hashes = set()
success_count = 0

for idx, url in enumerate(media_urls, 1):
    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()
        
        # Check for duplicates by content hash
        content_hash = hashlib.md5(response.content).hexdigest()
        if content_hash in downloaded_hashes:
            print(f"Skipped {idx}/{len(media_urls)}: Duplicate")
            continue
        
        downloaded_hashes.add(content_hash)
        
        ext = '.jpg' if 'image/' in response.headers.get('content-type', '') else '.mp4'
        success_count += 1
        filename = DOWNLOAD_DIR / f"item_{success_count}{ext}"
        
        filename.write_bytes(response.content)
        print(f"Downloaded {success_count}: {filename.name}")
    except Exception as e:
        print(f"Skipped {idx}/{len(media_urls)}: {str(e)[:50]}")

print(f"\n✓ Downloaded {success_count} unique items to '{DOWNLOAD_DIR}'")
print(f"Post: Likes={likes}, Comments={comments}, Paid={is_paid}")