In [1]:
!pip install beautifulsoup4 requests pandas


Defaulting to user installation because normal site-packages is not writeable


In [2]:
import requests
import pandas as pd

In [4]:
def scrape_reddit(keyword="AI takeover", subreddit="artificial", max_posts=100):
    import requests, pandas as pd
    posts = []
    after = None

    while len(posts) < max_posts:
        url = f"https://www.reddit.com/r/{subreddit}/search.json?q={keyword}&restrict_sr=1&sort=new&limit=100"
        if after:
            url += f"&after={after}"
        headers = {"User-Agent": "Mozilla/5.0"}
        res = requests.get(url, headers=headers)
        data = res.json()
        children = data["data"]["children"]
        if not children:
            break
        for child in children:
            post = child["data"]
            title = post.get("title", "")
            selftext = post.get("selftext", "")
            url = "https://reddit.com" + post.get("permalink", "")
            content = selftext if selftext else title
            posts.append({"title": title, "url": url, "content": content})
        after = data["data"].get("after")
        if not after:
            break

    return pd.DataFrame(posts[:max_posts])


In [8]:
# Scrape 3 AI-future related keywords
df_takeover = scrape_reddit("AI takeover", max_posts=50)
df_future   = scrape_reddit("AI future", max_posts=50)
df_agi      = scrape_reddit("AGI", max_posts=50)

# Clean + Filter each
df_takeover.drop_duplicates(subset="url", inplace=True)
df_future.drop_duplicates(subset="url", inplace=True)
df_agi.drop_duplicates(subset="url", inplace=True)

df_takeover = df_takeover[df_takeover["content"].str.len() > 30]
df_future = df_future[df_future["content"].str.len() > 30]
df_agi = df_agi[df_agi["content"].str.len() > 30]

# Combine all
df_combined = pd.concat([df_takeover, df_future, df_agi]).drop_duplicates(subset="url").reset_index(drop=True)

print(f" Total articles after combining and filtering: {len(df_combined)}")
df_combined.head(103)


 Total articles after combining and filtering: 104


Unnamed: 0,title,url,content
0,The AI Revolution Isn’t Coming—It’s Already He...,https://reddit.com/r/artificial/comments/1j1br...,Have you ever stopped to think about how AI is...
1,One-Minute Daily AI News 9/24/2024,https://reddit.com/r/artificial/comments/1fow5...,1. ‘Tit**A**n**I**c’ director **James Cameron*...
2,Big tech companies form new consortium to alla...,https://reddit.com/r/artificial/comments/1bvlu...,Big tech companies form new consortium to alla...
3,Is Devin AI Really Going To Takeover Software ...,https://reddit.com/r/artificial/comments/1bh4j...,"I've been reading about Devin AI, and it seems..."
4,Musk Demands Bigger Stake in Tesla as Price fo...,https://reddit.com/r/artificial/comments/198b1...,"- Elon Musk, CEO of Tesla, has demanded that t..."
...,...,...,...
98,"Yann LeCun: ""Some people are making us believe...",https://reddit.com/r/artificial/comments/1hkvr...,"Yann LeCun: ""Some people are making us believe..."
99,When AI Beats Us In Every Test We Can Create: ...,https://reddit.com/r/artificial/comments/1hkb6...,When AI Beats Us In Every Test We Can Create: ...
100,From o1 to o3 was just 3 months,https://reddit.com/r/artificial/comments/1hjd7...,From o1 to o3 was just 3 months
101,"ARC-AGI has fallen to OpenAI's new model, o3",https://reddit.com/r/artificial/comments/1hiq1...,"ARC-AGI has fallen to OpenAI's new model, o3"


In [11]:
import requests

def query_ollama(prompt, model="mistral"):
    url = "http://localhost:11434/api/generate"
    payload = {
        "model": model,
        "prompt": prompt.strip(),
        "stream": False
    }

    try:
        response = requests.post(url, json=payload)
        response.raise_for_status()
        return response.json()["response"]
    except Exception as e:
        print(f"❌ Error during Ollama call: {e}")
        return "Summary: N/A\nScore: 0\nTone: Unknown"

def analyze_article(text):
    truncated_text = text.strip().replace("\n", " ")[:3000]  # Limit length + clean line breaks

    prompt = f"""
You are an AI analyst. Analyze the following Reddit post about the future of AI and humanity:

POST:
{truncated_text}

Tasks:
1. Give a 2–3 sentence summary.
2. Assign a score from -10 (very dangerous) to +10 (very beneficial) for the future of humanity.
3. Classify tone as Hopeful or Fearful.

Return in this exact format:
Summary: ...
Score: ...
Tone: ...
"""
    return query_ollama(prompt)


In [13]:


results = []

for i in range(0, len(df_combined), 10):
    batch = df_combined.iloc[i:i+10].copy()
    start_idx = i + 1
    end_idx = i + len(batch)
    print(f"\n⚙️ Running batch {start_idx} to {end_idx}")

    batch_responses = []

    for j, row in batch.iterrows():
        post_num = row.name + 1
        print(f"🔍 Analyzing post {post_num}/{len(df_combined)}...")
        start = time.time()

        try:
            response = analyze_article(row["content"])
        except Exception as e:
            print(f"❌ Error: {e}")
            response = "Summary: N/A\nScore: 0\nTone: Unknown"

        duration = round(time.time() - start, 2)
        print(f"✅ Done in {duration} sec")
        batch_responses.append(response)

    batch["analysis"] = batch_responses
    results.append(batch)

    # Auto-save progress after every batch
    pd.concat(results).to_csv("ai_future_analysis_progress.csv", index=False)



⚙️ Running batch 1 to 10
🔍 Analyzing post 1/104...
✅ Done in 51.15 sec
🔍 Analyzing post 2/104...
✅ Done in 51.31 sec
🔍 Analyzing post 3/104...
✅ Done in 18.32 sec
🔍 Analyzing post 4/104...
✅ Done in 39.88 sec
🔍 Analyzing post 5/104...
✅ Done in 55.78 sec
🔍 Analyzing post 6/104...
✅ Done in 30.18 sec
🔍 Analyzing post 7/104...
✅ Done in 39.7 sec
🔍 Analyzing post 8/104...
✅ Done in 17.63 sec
🔍 Analyzing post 9/104...
✅ Done in 16.46 sec
🔍 Analyzing post 10/104...
✅ Done in 31.65 sec

⚙️ Running batch 11 to 20
🔍 Analyzing post 11/104...
✅ Done in 34.32 sec
🔍 Analyzing post 12/104...
✅ Done in 30.04 sec
🔍 Analyzing post 13/104...
✅ Done in 33.42 sec
🔍 Analyzing post 14/104...
✅ Done in 23.53 sec
🔍 Analyzing post 15/104...
✅ Done in 51.24 sec
🔍 Analyzing post 16/104...
✅ Done in 54.95 sec
🔍 Analyzing post 17/104...
✅ Done in 58.72 sec
🔍 Analyzing post 18/104...
✅ Done in 22.64 sec
🔍 Analyzing post 19/104...
✅ Done in 23.72 sec
🔍 Analyzing post 20/104...
✅ Done in 18.85 sec

⚙️ Running batch

In [14]:
def parse_analysis(response):
    try:
        lines = response.strip().split("\n")
        summary = lines[0].replace("Summary: ", "").strip()
        score = lines[1].replace("Score: ", "").strip()
        tone = lines[2].replace("Tone: ", "").strip()
        return pd.Series([summary, score, tone])
    except:
        return pd.Series(["", "", ""])

final_df = pd.concat(results).reset_index(drop=True)
final_df[["summary", "score", "tone"]] = final_df["analysis"].apply(parse_analysis)

# Export final CSV
final_df.to_csv("final_ai_future_analysis.csv", index=False)
final_df[["title", "summary", "score", "tone", "url"]].head()


Unnamed: 0,title,summary,score,tone,url
0,The AI Revolution Isn’t Coming—It’s Already He...,The post discusses a hypothetical scenario whe...,,Score: 0 (Neutral) - This scenario presents bo...,https://reddit.com/r/artificial/comments/1j1br...
1,One-Minute Daily AI News 9/24/2024,"The post highlights recent developments in AI,...",+6 (The advancements in AI are beneficial but ...,Neutral (The post presents information without...,https://reddit.com/r/artificial/comments/1fow5...
2,Big tech companies form new consortium to alla...,Major tech companies have formed a consortium ...,,Score: +4,https://reddit.com/r/artificial/comments/1bvlu...
3,Is Devin AI Really Going To Takeover Software ...,"The post discusses Devin AI, a new Large Langu...",,Score: 0 (Neutral) - While the technology is i...,https://reddit.com/r/artificial/comments/1bh4j...
4,Musk Demands Bigger Stake in Tesla as Price fo...,"Elon Musk, CEO of Tesla, has demanded addition...",,Score: +6 (Musk's focus on AI development coul...,https://reddit.com/r/artificial/comments/198b1...
