In [2]:
#!/usr/bin/env python3
"""
social-media-sentiment-analysis/show_tables.py

Load CSVs (raw + processed), save CSV/HTML snapshots to results/,
and optionally print to console.

Usage (from repo root, terminal):
  python social-media-sentiment-analysis/show_tables.py --head 200 --print

Notes for Jupyter/IPython:
- When imported in a notebook the CLI entrypoint will not auto-run.
- In a notebook import the functions and call them directly, e.g.:
    from social_media_sentiment_analysis.show_tables import generate_snapshots
    generate_snapshots(head=200, do_print=True)
"""
import argparse
from pathlib import Path
from datetime import datetime
import pandas as pd
import sys
from typing import Tuple

ROOT = Path.cwd()
SM_DIR = ROOT / "social-media-sentiment-analysis"
RAW_PATH = SM_DIR / "data" / "raw" / "tweets_scraped.csv"
PROC_PATH = SM_DIR / "data" / "processed" / "tweets_clean.csv"
OUT_DIR = SM_DIR / "results"


def load_or_message(path: Path, msg: str) -> pd.DataFrame:
    if path.exists():
        try:
            return pd.read_csv(path)
        except Exception as e:
            return pd.DataFrame({"error": [f"Failed to read {path.name}: {e}"]})
    else:
        return pd.DataFrame({"info": [msg]})


def save_snapshots(df: pd.DataFrame, name_prefix: str, head: int, out_dir: Path) -> Tuple[Path, Path]:
    out_dir.mkdir(parents=True, exist_ok=True)
    ts = datetime.utcnow().strftime("%Y%m%dT%H%M%SZ")
    csv_path = out_dir / f"{name_prefix}_head{head}_{ts}.csv"
    html_path = out_dir / f"{name_prefix}_head{head}_{ts}.html"
    df.head(head).to_csv(csv_path, index=False)
    df.head(head).to_html(html_path, index=False)
    return csv_path, html_path


def generate_snapshots(head: int = 200, do_print: bool = False, out_dir: Path = OUT_DIR) -> int:
    pd.set_option("display.max_columns", 50)
    pd.set_option("display.max_colwidth", 200)

    df_raw = load_or_message(RAW_PATH, "tweets_scraped.csv not found. Run create_sample_data.py")
    df_proc = load_or_message(PROC_PATH, "tweets_clean.csv not found. Run preprocess.py / train_quick.py")

    # Save snapshots
    out_dir.mkdir(parents=True, exist_ok=True)
    raw_csv, raw_html = save_snapshots(df_raw, "tweets_scraped", head, out_dir)
    proc_csv, proc_html = save_snapshots(df_proc, "tweets_clean", head, out_dir)

    # Print short summary
    print(f"Raw file:  {RAW_PATH} -> rows: {len(df_raw)}")
    print(f"Proc file: {PROC_PATH} -> rows: {len(df_proc)}")
    print()
    print("Saved snapshots:")
    print(f" - {raw_csv}")
    print(f" - {raw_html}")
    print(f" - {proc_csv}")
    print(f" - {proc_html}")
    print()

    if do_print:
        # Print tables to console in readable text form
        print("=== Raw scraped tweets (head) ===")
        try:
            print(df_raw.head(head).to_string(index=False))
        except Exception:
            print(df_raw.head(head).to_string())
        print()
        print("=== Processed / cleaned tweets (head) ===")
        try:
            print(df_proc.head(head).to_string(index=False))
        except Exception:
            print(df_proc.head(head).to_string())
        print()

    return 0


def main(argv=None):
    parser = argparse.ArgumentParser(description="Load and save tables for social-media-sentiment-analysis CSVs")
    parser.add_argument("--head", type=int, default=200, help="number of rows to include in snapshots / display")
    parser.add_argument("--print", action="store_true", dest="do_print", help="print tables to console (text)")
    parser.add_argument("--out-dir", default=str(OUT_DIR), help="output folder for snapshots")
    # parse_known_args so extra args injected by IPython won't break when accidentally run in notebooks
    if argv is None:
        args, unknown = parser.parse_known_args()
    else:
        args, unknown = parser.parse_known_args(argv)

    out_dir = Path(args.out_dir)
    return generate_snapshots(head=args.head, do_print=args.do_print, out_dir=out_dir)


if __name__ == "__main__":
    # Detect interactive (Jupyter / IPython) environment; avoid auto-running CLI parsing inside notebooks.
    in_ipy = False
    try:
        from IPython import get_ipython  # type: ignore
        if get_ipython() is not None:
            in_ipy = True
    except Exception:
        in_ipy = False

    if in_ipy:
        msg = (
            "Detected IPython/Jupyter environment. The script will not run its CLI entrypoint here.\n"
            "To use from a notebook, import and call generate_snapshots directly, e.g.:\n"
            "  from social_media_sentiment_analysis.show_tables import generate_snapshots\n"
            "  generate_snapshots(head=200, do_print=True)\n\n"
            "To run from a terminal:\n"
            "  python social-media-sentiment-analysis/show_tables.py --head 200 --print\n"
        )
        print(msg)
    else:
        sys.exit(main())

Detected IPython/Jupyter environment. The script will not run its CLI entrypoint here.
To use from a notebook, import and call generate_snapshots directly, e.g.:
  from social_media_sentiment_analysis.show_tables import generate_snapshots
  generate_snapshots(head=200, do_print=True)

To run from a terminal:
  python social-media-sentiment-analysis/show_tables.py --head 200 --print

