In [2]:
pip install praw pandas tqdm nltk spacy gensim vaderSentiment matplotlib tqdm

Collecting praw
  Downloading praw-7.8.1-py3-none-any.whl.metadata (9.4 kB)
Collecting gensim
  Downloading gensim-4.4.0-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata (8.4 kB)
Collecting vaderSentiment
  Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl.metadata (572 bytes)
Collecting prawcore<3,>=2.4 (from praw)
  Downloading prawcore-2.4.0-py3-none-any.whl.metadata (5.0 kB)
Collecting update_checker>=0.18 (from praw)
  Downloading update_checker-0.18.0-py3-none-any.whl.metadata (2.3 kB)
Downloading praw-7.8.1-py3-none-any.whl (189 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m189.3/189.3 kB[0m [31m7.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading gensim-4.4.0-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl (27.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.9/27.9 MB[0m [31m94.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading vaderSentiment-3.3.2-py2.py3-none-any.whl (125 kB)
[2K   [90m━━━━━━━━━


# Data Gathering Strategy
-   **Arctic Shift**: Employed to gather historical data (bypassing recent API limits).
-   **Method**: `ArcticShiftClient` implemented to fetch posts/comments by subreddit and keyword.


In [3]:

import os
import sys
import json
import time
import logging
import traceback
import signal
from pathlib import Path
from datetime import datetime, timedelta

import praw
import pandas as pd
import requests
from tqdm import tqdm

# NLP / modeling libraries
try:
    import spacy
    SPACY_AVAILABLE = True
except Exception:
    SPACY_AVAILABLE = False

import gensim
from gensim import corpora, models
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# Fallback NLTK
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem.porter import PorterStemmer

import matplotlib.pyplot as plt

# ---------------------------
# Configuration (edit here)
# ---------------------------
DEFAULT_SUBREDDITS = ["electricvehicles", "cars", "TeslaMotors"]
DEFAULT_KEYWORDS = ["ev", "electric vehicle", "charging", "battery", "range anxiety", "range-anxiety"]
CHECKPOINT_META = "data/raw/checkpoint.json"
POSTS_CHECKPOINT = "data/raw/posts_checkpoint.csv"
COMMENTS_CHECKPOINT = "data/raw/comments_checkpoint.csv"
OUTPUT_DIR = Path("outputs")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
LOGS_DIR = Path("logs"); LOGS_DIR.mkdir(parents=True, exist_ok=True)
DATA_DIR = Path("data/raw"); DATA_DIR.mkdir(parents=True, exist_ok=True)

# ---------------------------
# Utilities
# ---------------------------
def setup_logger(name="ev_collection", log_file=None, level=logging.INFO):
    logger = logging.getLogger(name)
    logger.setLevel(level)
    if not logger.handlers:
        fh = logging.FileHandler(log_file or LOGS_DIR / f"{name}_{datetime.now():%Y%m%d_%H%M%S}.log")
        fmt = logging.Formatter("%(asctime)s %(levelname)s %(message)s")
        fh.setFormatter(fmt)
        logger.addHandler(fh)
        sh = logging.StreamHandler(sys.stdout)
        sh.setFormatter(fmt)
        logger.addHandler(sh)
    return logger

def save_dataframe(df, path, format="csv"):
    Path(path).parent.mkdir(parents=True, exist_ok=True)
    if format == "csv":
        df.to_csv(path, index=False)
    else:
        df.to_parquet(path, index=False)

def save_json(obj, path):
    Path(path).parent.mkdir(parents=True, exist_ok=True)
    with open(path, "w") as f:
        json.dump(obj, f, indent=2)

# ---------------------------
# Scraper & Pipeline
# ---------------------------

# ---------------------------
# Arctic Shift Client
# ---------------------------
class ArcticShiftClient:
    def __init__(self, logger=None):
        self.base_url = "https://api.pullpush.io/reddit"
        self.logger = logger or logging.getLogger("ArcticShift")

    def search_submissions(self, subreddit, q=None, after=None, before=None, limit=100):
        url = f"{self.base_url}/search/submission"
        params = {"subreddit": subreddit, "limit": 100}
        if q: params["q"] = q
        if after: params["after"] = after
        if before: params["before"] = before
        try:
            resp = requests.get(url, params=params, timeout=10)
            resp.raise_for_status()
            return resp.json().get("data", [])
        except Exception as e:
            if self.logger: self.logger.error(f"Arctic request failed: {e}")
            return []

    def search_comments(self, subreddit, q=None, after=None, before=None, limit=100):
        url = f"{self.base_url}/search/comment"
        params = {"subreddit": subreddit, "limit": 100}
        if q: params["q"] = q
        if after: params["after"] = after
        if before: params["before"] = before
        try:
            resp = requests.get(url, params=params, timeout=10)
            resp.raise_for_status()
            return resp.json().get("data", [])
        except Exception as e:
            if self.logger: self.logger.error(f"Arctic comment request failed: {e}")
            return []


class EVRedditPipeline:
    def __init__(self,
                 client_id=None,
                 client_secret=None,
                 user_agent="ev-sentiment-collector",
                 subreddits=None,
                 keywords=None,
                 checkpoint_file=CHECKPOINT_META,
                 logger=None):
        self.client_id = client_id
        self.client_secret = client_secret
        self.user_agent = user_agent
        self.subreddits = subreddits or DEFAULT_SUBREDDITS
        self.keywords = keywords or DEFAULT_KEYWORDS
        self.checkpoint_file = checkpoint_file

        self.logger = logger or setup_logger("ev_pipeline", log_file=str(LOGS_DIR / "ev_pipeline.log"))

        # runtime state
        self.reddit = None
        self.posts = []       # list of dicts
        self.comments = []    # list of dicts
        self.collected_post_ids = set()

        # checkpoint / rate limit
        self.should_stop = False
        self.start_time = datetime.now()
        self.last_save = datetime.now()
        self.save_interval_minutes = 5

        # graceful shutdown
        signal.signal(signal.SIGINT, self._signal_handler)
        signal.signal(signal.SIGTERM, self._signal_handler)

        # NLP tools
        self.analyzer = SentimentIntensityAnalyzer()
        self.arctic = ArcticShiftClient(self.logger)
        self._prepare_nlp()

        # initialize reddit and load checkpoint
        self._initialize_reddit()
        self._load_checkpoint()

    def _signal_handler(self, signum, frame):
        self.logger.warning(f"Received signal {signum}; will stop after current iteration and save checkpoint.")
        self.should_stop = True

    def _initialize_reddit(self):
        if not self.client_id:
            self.logger.info("No Reddit credentials provided. PRAW features will be disabled.")
            return

        try:
            self.reddit = praw.Reddit(
                client_id=self.client_id,
                client_secret=self.client_secret,
                user_agent=self.user_agent,
                timeout=30
            )
            # simple connectivity check
            _ = self.reddit.read_only
            self.logger.info(f"Connected to Reddit read_only={self.reddit.read_only}")
        except Exception as e:
            self.logger.error(f"Failed to initialize PRAW: {e}")
            # Do not raise

    def _prepare_nlp(self):
        # spaCy preferred
        if SPACY_AVAILABLE:
            try:
                self.nlp = spacy.load("en_core_web_sm", disable=["ner", "parser"])
                self.logger.info("spaCy found and loaded.")
                self.use_spacy = True
            except Exception:
                self.logger.warning("spaCy installed but model not found; falling back to NLTK.")
                self.use_spacy = False
        else:
            self.logger.info("spaCy not available; using NLTK.")
            self.use_spacy = False

        # NLTK setup
        try:
            nltk.data.find("tokenizers/punkt")
        except Exception:
            nltk.download("punkt")
        try:
            nltk.data.find("corpora/stopwords")
        except Exception:
            nltk.download("stopwords")
        self.stopwords = set(stopwords.words("english"))
        self.stemmer = PorterStemmer()

    def _load_checkpoint(self):
        checkpoint = Path(self.checkpoint_file)
        if checkpoint.exists():
            try:
                with open(checkpoint, "r") as f:
                    meta = json.load(f)
                self.logger.info(f"Loading checkpoint from {checkpoint}: {meta.get('posts_count',0)} posts previously collected.")
                if Path(POSTS_CHECKPOINT).exists():
                    dfp = pd.read_csv(POSTS_CHECKPOINT)
                    self.posts = dfp.to_dict("records")
                    self.collected_post_ids = set(dfp["post_id"].astype(str).tolist())
                    self.logger.info(f"Loaded {len(self.posts)} posts from disk.")
                if Path(COMMENTS_CHECKPOINT).exists():
                    dfc = pd.read_csv(COMMENTS_CHECKPOINT)
                    self.comments = dfc.to_dict("records")
                    self.logger.info(f"Loaded {len(self.comments)} comments from disk.")
            except Exception as e:
                self.logger.warning(f"Failed to load checkpoint: {e}. Starting fresh.")
                self.posts = []
                self.comments = []
                self.collected_post_ids = set()
        else:
            self.logger.info("No checkpoint found; starting fresh.")

    def _save_checkpoint(self):
        try:
            meta = {
                "timestamp": datetime.now().isoformat(),
                "posts_count": len(self.posts),
                "comments_count": len(self.comments),
            }
            save_json(meta, self.checkpoint_file)

            if self.posts:
                save_dataframe(pd.DataFrame(self.posts), POSTS_CHECKPOINT)
            if self.comments:
                save_dataframe(pd.DataFrame(self.comments), COMMENTS_CHECKPOINT)

            self.last_save = datetime.now()
            self.logger.info(f"Checkpoint saved: {len(self.posts)} posts, {len(self.comments)} comments.")
        except Exception as e:
            self.logger.error(f"Failed to save checkpoint: {e}")

    def _clean_text(self, text):
        if not isinstance(text, str):
            return ""
        text = text.replace("\n", " ").strip()
        return text

    def _preprocess(self, text):
        text = self._clean_text(text).lower()
        if self.use_spacy:
            doc = self.nlp(text)
            tokens = [tok.lemma_ for tok in doc if tok.is_alpha and not tok.is_stop and len(tok) > 2]
            return tokens
        else:
            tokens = word_tokenize(text)
            tokens = [t for t in tokens if t.isalpha()]
            tokens = [t for t in tokens if t not in self.stopwords and len(t) > 2]
            tokens = [self.stemmer.stem(t) for t in tokens]
            return tokens

    def get_submission_data(self, submission):
        try:
            return {
                "post_id": submission.id,
                "author": str(submission.author) if submission.author else "[deleted]",
                "title": submission.title,
                "selftext": submission.selftext or "",
                "score": submission.score,
                "num_comments": submission.num_comments,
                "created_utc": datetime.fromtimestamp(submission.created_utc).isoformat(),
                "subreddit": str(submission.subreddit),
                "url": submission.url,
                "is_self": bool(submission.is_self),
                "permalink": submission.permalink,
                "collected_at": datetime.now().isoformat()
            }
        except Exception as e:
            self.logger.warning(f"Failed to extract submission {getattr(submission,'id',None)}: {e}")
            return None

    def get_comments_for_submission(self, submission, max_comments=200):
        comments_out = []
        try:
            submission.comments.replace_more(limit=2)
            all_comments = submission.comments.list()
            for cm in all_comments[:max_comments]:
                try:
                    if isinstance(cm, praw.models.Comment):
                        comments_out.append({
                            "comment_id": cm.id,
                            "post_id": submission.id,
                            "author": str(cm.author) if cm.author else "[deleted]",
                            "body": cm.body,
                            "score": cm.score,
                            "created_utc": datetime.fromtimestamp(cm.created_utc).isoformat(),
                            "parent_id": cm.parent_id,
                            "collected_at": datetime.now().isoformat()
                        })
                except Exception:
                    continue
        except Exception as e:
            self.logger.warning(f"Error fetching comments for {submission.id}: {e}")
        return comments_out

    def _normalize_arctic(self, data):
        return {
            "post_id": data.get("id"),
            "author": data.get("author", "[deleted]"),
            "title": data.get("title", ""),
            "selftext": data.get("selftext", "") or "",
            "score": data.get("score", 0),
            "num_comments": data.get("num_comments", 0),
            "created_utc": datetime.fromtimestamp(data.get("created_utc", 0)).isoformat(),
            "subreddit": data.get("subreddit", ""),
            "url": data.get("url", ""),
            "is_self": data.get("is_self", False),
            "permalink": data.get("permalink", ""),
            "collected_at": datetime.now().isoformat(),
            "source": "arctic_shift"
        }

    def search_and_collect(self, limit_per_subreddit=500, methods=("arctic", "search","hot","top","new")):
        """
        Iterate subreddits and collect posts + comments filtered by keywords where applicable.
        """
        for method in methods:
            if self.should_stop:
                break
            for subreddit in self.subreddits:
                if self.should_stop:
                    break

                self.logger.info(f"Collecting from r/{subreddit} using `{method}` (limit {limit_per_subreddit})")
                
                if method == "arctic":
                    query = " ".join(self.keywords)
                    results = self.arctic.search_submissions(subreddit, limit=limit_per_subreddit, q=None)
                    self.logger.info(f"Arctic Shift returned {len(results)} results")
                    
                    for item in results:
                        if self.should_stop: break
                        pid = item.get("id")
                        if pid in self.collected_post_ids: continue
                        
                        post_data = self._normalize_arctic(item)
                        # Filter keywords again to be safe
                        text_blob = (post_data["title"] + " " + post_data["selftext"]).lower()
                        if not any(k.lower() in text_blob for k in self.keywords):
                            continue
                            
                        self.posts.append(post_data)
                        self.collected_post_ids.add(pid)
                    
                    self._save_checkpoint()
                    continue

                if not self.reddit:
                    continue

                try:
                    sub = self.reddit.subreddit(subreddit)
                    if method == "search":
                        query = " OR ".join([f'"{k}"' for k in self.keywords])
                        submissions = sub.search(query, time_filter="all", limit=limit_per_subreddit)
                    elif method == "hot":
                        submissions = sub.hot(limit=limit_per_subreddit)
                    elif method == "top":
                        submissions = sub.top(time_filter="all", limit=limit_per_subreddit)
                    elif method == "new":
                        submissions = sub.new(limit=limit_per_subreddit)
                    else:
                        submissions = sub.hot(limit=limit_per_subreddit)

                    submissions = list(submissions)
                    self.logger.info(f"Found {len(submissions)} submissions in r/{subreddit}.")

                    for submission in tqdm(submissions, desc=f"r/{subreddit}", leave=False):
                        if self.should_stop:
                            break
                        if submission.id in self.collected_post_ids:
                            continue

                        post_data = self.get_submission_data(submission)
                        if post_data:
                            if method != "search":
                                text_blob = (post_data["title"] + " " + post_data["selftext"]).lower()
                                if not any(k.lower() in text_blob for k in self.keywords):
                                    continue

                            self.posts.append(post_data)
                            self.collected_post_ids.add(submission.id)

                            cms = self.get_comments_for_submission(submission, max_comments=200)
                            if cms:
                                self.comments.extend(cms)

                        if (datetime.now() - self.last_save).total_seconds() > self.save_interval_minutes * 60:
                            self._save_checkpoint()

                    time.sleep(2)

                except Exception as e:
                    self.logger.error(f"Error collecting from r/{subreddit}: {e}\n{traceback.format_exc()}")

        self._save_checkpoint()

    def prepare_corpus(self, documents, min_df=5, no_below=5, no_above=0.5):
        tokens_list = [self._preprocess(d) for d in documents]
        tokens_list = [t for t in tokens_list if len(t) >= 2]
        dictionary = corpora.Dictionary(tokens_list)
        dictionary.filter_extremes(no_below=no_below, no_above=no_above)
        corpus = [dictionary.doc2bow(text) for text in tokens_list]
        return tokens_list, dictionary, corpus

    def train_lda(self, tokens_list, dictionary, corpus, num_topics=6, passes=10, random_state=42):
        lda = models.LdaModel(corpus=corpus, id2word=dictionary, num_topics=num_topics, passes=passes, random_state=random_state)
        return lda

    def assign_topics(self, lda_model, dictionary, texts):
        docs_tokens = [self._preprocess(t) for t in texts]
        corpus = [dictionary.doc2bow(tokens) for tokens in docs_tokens]
        dominant_topics = []
        distributions = []
        for dist in lda_model[corpus]:
            if isinstance(dist, list) and dist:
                sorted_topics = sorted(dist, key=lambda x: x[1], reverse=True)
                dominant_topics.append(sorted_topics[0][0])
                distributions.append({int(k): float(v) for k, v in sorted_topics})
            else:
                dominant_topics.append(None)
                distributions.append({})
        return dominant_topics, distributions

    def compute_sentiment(self, text):
        text = self._clean_text(text)
        if not text:
            return {"neg": 0.0, "neu": 1.0, "pos": 0.0, "compound": 0.0}
        vs = self.analyzer.polarity_scores(text)
        return vs

    def run_pipeline(self, num_topics=6):
        self.logger.info("Preparing documents for topic modeling...")
        post_texts = [(p["title"] + " " + p["selftext"]).strip() for p in self.posts]
        comment_texts = [c["body"] for c in self.comments]
        all_docs = post_texts + comment_texts

        if not all_docs:
            self.logger.error("No documents collected. Run search_and_collect first.")
            return

        tokens_list, dictionary, corpus = self.prepare_corpus(all_docs)
        self.logger.info(f"Prepared corpus: {len(tokens_list)} documents, dictionary size {len(dictionary)}")

        self.logger.info(f"Training LDA with {num_topics} topics...")
        lda_model = self.train_lda(tokens_list, dictionary, corpus, num_topics=num_topics)

        topics = {}
        for t in range(num_topics):
            topics[t] = [word for word, prob in lda_model.show_topic(t, topn=8)]
        save_json(topics, OUTPUT_DIR / "topics_keywords.json")
        self.logger.info("Saved topic keywords")

        dominant_topics, dists = self.assign_topics(lda_model, dictionary, all_docs)

        doc_ids = []
        doc_texts = []
        doc_kind = []
        doc_meta = []

        for i, txt in enumerate(all_docs):
            doc_ids.append(i)
            doc_texts.append(txt)
            doc_kind.append("post" if i < len(post_texts) else "comment")
            doc_meta.append({
                "topic": dominant_topics[i],
                "topic_dist": dists[i]
            })

        df = pd.DataFrame({
            "doc_id": doc_ids,
            "kind": doc_kind,
            "text": doc_texts,
            "dominant_topic": [m["topic"] for m in doc_meta],
            "topic_dist": [m["topic_dist"] for m in doc_meta]
        })

        self.logger.info("Computing sentiment for each document...")
        sentiments = [self.compute_sentiment(t) for t in df["text"].tolist()]
        df_sent = pd.DataFrame(sentiments)
        df = pd.concat([df, df_sent], axis=1)

        df.to_csv(OUTPUT_DIR / "doc_level_sent_topics.csv", index=False)
        self.logger.info(f"Saved document-level CSV to {OUTPUT_DIR / 'doc_level_sent_topics.csv'}")

        agg = df.dropna(subset=["dominant_topic"]).groupby("dominant_topic").agg(
            count=("doc_id","count"),
            avg_compound=("compound","mean"),
            avg_pos=("pos","mean"),
            avg_neg=("neg","mean"),
            avg_neu=("neu","mean")
        ).reset_index()
        agg = agg.sort_values("avg_compound", ascending=False)
        agg.to_csv(OUTPUT_DIR / "topic_sentiment_summary.csv", index=False)
        save_json(agg.to_dict(orient="records"), OUTPUT_DIR / "topic_sentiment_summary.json")
        self.logger.info("Saved topic-level sentiment summary")

        plt.figure(figsize=(8,5))
        plt.bar(agg["dominant_topic"].astype(str), agg["avg_compound"])
        plt.xlabel("Topic")
        plt.ylabel("Average compound sentiment")
        plt.title("Avg VADER compound sentiment by LDA topic")
        plt.tight_layout()
        plt.savefig(OUTPUT_DIR / "topic_avg_compound.png")
        plt.close()
        self.logger.info("Saved sentiment bar chart")

        topics_df = pd.DataFrame([{"topic": t, "keywords": ", ".join(topics[t])} for t in topics])
        topics_df.to_csv(OUTPUT_DIR / "topics_keywords.csv", index=False)

        self.logger.info("Pipeline finished. Outputs placed in `outputs/` directory.")

    def save_final(self):
        if self.posts:
            save_dataframe(pd.DataFrame(self.posts), OUTPUT_DIR / "posts_final.csv")
        if self.comments:
            save_dataframe(pd.DataFrame(self.comments), OUTPUT_DIR / "comments_final.csv")
        self.logger.info("Saved final posts/comments to outputs/")


def main():
    logger = setup_logger("ev_pipeline_runner", log_file=str(LOGS_DIR / "ev_run.log"))
    logger.info("EV Reddit Sentiment pipeline starting.")

    client_id = None
    client_secret = None

    try:
        pipeline = EVRedditPipeline(
            client_id=client_id,
            client_secret=client_secret,
            user_agent="ev-sentiment-collector-1.0",
            subreddits=DEFAULT_SUBREDDITS,
            keywords=DEFAULT_KEYWORDS
        )

        # Collect data: adjust limits as needed
        pipeline.search_and_collect(limit_per_subreddit=300, methods=("arctic", "search","hot","top","new"))

        # Run modeling and sentiment
        pipeline.run_pipeline(num_topics=6)

        # Save final dumps
        pipeline.save_final()

        logger.info("All done.")

    except KeyboardInterrupt:
        logger.warning("Interrupted by user. Saving checkpoint and finishing...")
        try:
            pipeline._save_checkpoint()
            pipeline.save_final()
        except Exception:
            pass

    except Exception as e:
        logger.error(f"Fatal error: {e}\n{traceback.format_exc()}")
        try:
            pipeline._save_checkpoint()
            pipeline.save_final()
        except Exception:
            pass

if __name__ == "__main__":
    main()


2025-11-20 17:30:24,817 INFO EV Reddit Sentiment pipeline starting.


INFO:ev_pipeline_runner:EV Reddit Sentiment pipeline starting.


2025-11-20 17:30:25,527 INFO spaCy found and loaded.


INFO:ev_pipeline:spaCy found and loaded.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


Enter Reddit client_id (or paste into script to avoid prompt): Xz27atpYoei-SfMBQfKs8g
Enter Reddit client_secret (or paste into script to avoid prompt): B-mCbJvQDN3XUP1dtLai0Cn52kwwXg
2025-11-20 17:31:01,742 INFO Connected to Reddit read_only=True


INFO:ev_pipeline:Connected to Reddit read_only=True


2025-11-20 17:31:01,743 INFO No checkpoint found; starting fresh.


INFO:ev_pipeline:No checkpoint found; starting fresh.


2025-11-20 17:31:01,745 INFO Collecting from r/electricvehicles using `search` (limit 300)


INFO:ev_pipeline:Collecting from r/electricvehicles using `search` (limit 300)
It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



2025-11-20 17:31:05,546 INFO Found 247 submissions in r/electricvehicles.


INFO:ev_pipeline:Found 247 submissions in r/electricvehicles.
It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://

2025-11-20 17:35:26,305 INFO Checkpoint saved: 104 posts, 19325 comments.


INFO:ev_pipeline:Checkpoint saved: 104 posts, 19325 comments.
It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://

2025-11-20 17:40:28,066 INFO Checkpoint saved: 237 posts, 44806 comments.


INFO:ev_pipeline:Checkpoint saved: 237 posts, 44806 comments.
It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://

2025-11-20 17:40:49,083 INFO Collecting from r/cars using `search` (limit 300)


INFO:ev_pipeline:Collecting from r/cars using `search` (limit 300)
It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



2025-11-20 17:40:51,733 INFO Found 235 submissions in r/cars.


INFO:ev_pipeline:Found 235 submissions in r/cars.
It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.re

2025-11-20 17:45:29,860 INFO Checkpoint saved: 347 posts, 66402 comments.


INFO:ev_pipeline:Checkpoint saved: 347 posts, 66402 comments.
It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://

2025-11-20 17:50:09,504 INFO Collecting from r/TeslaMotors using `search` (limit 300)


INFO:ev_pipeline:Collecting from r/TeslaMotors using `search` (limit 300)
It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



2025-11-20 17:50:12,955 INFO Found 232 submissions in r/TeslaMotors.


INFO:ev_pipeline:Found 232 submissions in r/TeslaMotors.
It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://async

2025-11-20 17:50:33,321 INFO Checkpoint saved: 487 posts, 92597 comments.


INFO:ev_pipeline:Checkpoint saved: 487 posts, 92597 comments.
It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://

2025-11-20 17:55:36,535 INFO Checkpoint saved: 609 posts, 116382 comments.


INFO:ev_pipeline:Checkpoint saved: 609 posts, 116382 comments.
It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https:/

2025-11-20 17:58:23,191 INFO Collecting from r/electricvehicles using `hot` (limit 300)


INFO:ev_pipeline:Collecting from r/electricvehicles using `hot` (limit 300)
It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



2025-11-20 17:58:26,845 INFO Found 300 submissions in r/electricvehicles.


INFO:ev_pipeline:Found 300 submissions in r/electricvehicles.
It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://

2025-11-20 17:59:59,957 INFO Collecting from r/cars using `hot` (limit 300)


INFO:ev_pipeline:Collecting from r/cars using `hot` (limit 300)
It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



2025-11-20 18:00:03,456 INFO Found 300 submissions in r/cars.


INFO:ev_pipeline:Found 300 submissions in r/cars.
It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.re

2025-11-20 18:00:38,442 INFO Checkpoint saved: 944 posts, 148045 comments.


INFO:ev_pipeline:Checkpoint saved: 944 posts, 148045 comments.
It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https:/

2025-11-20 18:01:46,756 INFO Collecting from r/TeslaMotors using `hot` (limit 300)


INFO:ev_pipeline:Collecting from r/TeslaMotors using `hot` (limit 300)
It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



2025-11-20 18:01:51,580 INFO Found 300 submissions in r/TeslaMotors.


INFO:ev_pipeline:Found 300 submissions in r/TeslaMotors.
It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://async

2025-11-20 18:02:34,723 INFO Collecting from r/electricvehicles using `top` (limit 300)


INFO:ev_pipeline:Collecting from r/electricvehicles using `top` (limit 300)
It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



2025-11-20 18:02:38,427 INFO Found 250 submissions in r/electricvehicles.


INFO:ev_pipeline:Found 250 submissions in r/electricvehicles.
It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://

2025-11-20 18:05:03,282 INFO Collecting from r/cars using `top` (limit 300)


INFO:ev_pipeline:Collecting from r/cars using `top` (limit 300)
It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



2025-11-20 18:05:06,343 INFO Found 250 submissions in r/cars.


INFO:ev_pipeline:Found 250 submissions in r/cars.
It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.re

2025-11-20 18:05:41,714 INFO Checkpoint saved: 1163 posts, 172023 comments.


INFO:ev_pipeline:Checkpoint saved: 1163 posts, 172023 comments.
It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https:

2025-11-20 18:09:48,239 INFO Collecting from r/TeslaMotors using `top` (limit 300)


INFO:ev_pipeline:Collecting from r/TeslaMotors using `top` (limit 300)
It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



2025-11-20 18:09:51,649 INFO Found 250 submissions in r/TeslaMotors.


INFO:ev_pipeline:Found 250 submissions in r/TeslaMotors.
It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://async

2025-11-20 18:10:44,561 INFO Checkpoint saved: 1257 posts, 190729 comments.


INFO:ev_pipeline:Checkpoint saved: 1257 posts, 190729 comments.
It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https:

2025-11-20 18:11:33,432 INFO Collecting from r/electricvehicles using `new` (limit 300)


INFO:ev_pipeline:Collecting from r/electricvehicles using `new` (limit 300)
It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



2025-11-20 18:11:37,682 INFO Found 300 submissions in r/electricvehicles.


INFO:ev_pipeline:Found 300 submissions in r/electricvehicles.
It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://

2025-11-20 18:11:41,358 INFO Collecting from r/cars using `new` (limit 300)


INFO:ev_pipeline:Collecting from r/cars using `new` (limit 300)
It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



2025-11-20 18:11:44,404 INFO Found 300 submissions in r/cars.


INFO:ev_pipeline:Found 300 submissions in r/cars.
It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



2025-11-20 18:11:47,014 INFO Collecting from r/TeslaMotors using `new` (limit 300)


INFO:ev_pipeline:Collecting from r/TeslaMotors using `new` (limit 300)
It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.

It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.
See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.



2025-11-20 18:11:51,493 INFO Found 300 submissions in r/TeslaMotors.


INFO:ev_pipeline:Found 300 submissions in r/TeslaMotors.


2025-11-20 18:11:56,915 INFO Checkpoint saved: 1281 posts, 194152 comments.


INFO:ev_pipeline:Checkpoint saved: 1281 posts, 194152 comments.


2025-11-20 18:11:56,921 INFO Preparing documents for topic modeling...


INFO:ev_pipeline:Preparing documents for topic modeling...


2025-11-20 18:27:09,326 INFO Prepared corpus: 179227 documents, dictionary size 14587


INFO:ev_pipeline:Prepared corpus: 179227 documents, dictionary size 14587


2025-11-20 18:27:09,328 INFO Training LDA with 6 topics...


INFO:ev_pipeline:Training LDA with 6 topics...


2025-11-20 18:33:43,300 INFO Saved topic keywords


INFO:ev_pipeline:Saved topic keywords


2025-11-20 18:48:40,826 INFO Computing sentiment for each document...


INFO:ev_pipeline:Computing sentiment for each document...


2025-11-20 18:49:21,247 INFO Saved document-level CSV to outputs/doc_level_sent_topics.csv


INFO:ev_pipeline:Saved document-level CSV to outputs/doc_level_sent_topics.csv


2025-11-20 18:49:21,304 INFO Saved topic-level sentiment summary


INFO:ev_pipeline:Saved topic-level sentiment summary


2025-11-20 18:49:21,509 INFO Saved sentiment bar chart


INFO:ev_pipeline:Saved sentiment bar chart


2025-11-20 18:49:21,512 INFO Pipeline finished. Outputs placed in `outputs/` directory.


INFO:ev_pipeline:Pipeline finished. Outputs placed in `outputs/` directory.


2025-11-20 18:49:24,260 INFO Saved final posts/comments to outputs/


INFO:ev_pipeline:Saved final posts/comments to outputs/


2025-11-20 18:49:24,261 INFO All done.


INFO:ev_pipeline_runner:All done.
