In [2]:
#!/usr/bin/env python3
"""
visualize.py — lightweight helpers for plotting

Usage (terminal):
  python visualize.py social-media-sentiment-analysis/data/processed/tweets_clean.csv

Notes for Jupyter/IPython:
- When run inside Jupyter, the script will not try to parse kernel args.
- Import and call `plot_label_distribution(df, ...)` from a notebook instead.
"""
import os
import sys
from typing import Optional
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import argparse


def plot_label_distribution(df: pd.DataFrame, label_col: str = "label",
                            out: str = "social-media-sentiment-analysis/figures/label_dist.png"):
    """
    Plot and save a horizontal countplot of labels in `label_col`.
    """
    if label_col not in df.columns:
        raise KeyError(f"Column '{label_col}' not found in dataframe. Available columns: {list(df.columns)}")

    os.makedirs(os.path.dirname(out), exist_ok=True)
    plt.figure(figsize=(6, 4))
    sns.countplot(y=df[label_col], order=df[label_col].value_counts().index)
    plt.title("Label distribution")
    plt.tight_layout()
    plt.savefig(out)
    plt.close()
    print("Saved label distribution to", out)


def main(argv: Optional[list] = None):
    parser = argparse.ArgumentParser(prog="visualize.py",
                                     description="Plot label distribution from processed CSV")
    parser.add_argument("input_csv", nargs="?", help="path to processed CSV (with 'label' column)")
    parser.add_argument("--label-col", default="label", help="name of label column")
    parser.add_argument("--out", default="social-media-sentiment-analysis/figures/label_dist.png",
                        help="output path for saved figure")
    # tolerate extra args injected by IPython (e.g. -f <kernel.json>) when accidentally run in notebooks
    if argv is None:
        args, unknown = parser.parse_known_args()
    else:
        args, unknown = parser.parse_known_args(argv)

    if not args.input_csv:
        parser.print_help()
        raise SystemExit("Missing required input CSV path.")

    input_path = args.input_csv
    if not os.path.exists(input_path):
        raise SystemExit(f"Input CSV not found: {input_path}")

    df = pd.read_csv(input_path, encoding="utf-8")
    plot_label_distribution(df, label_col=args.label_col, out=args.out)
    return 0


if __name__ == "__main__":
    # Detect interactive (Jupyter/IPython) environment and avoid auto-parsing kernel argv
    in_ipy = False
    try:
        from IPython import get_ipython  # type: ignore
        if get_ipython() is not None:
            in_ipy = True
    except Exception:
        in_ipy = False

    if in_ipy:
        msg = (
            "Detected IPython/Jupyter environment. To use plotting here:\n"
            "  - Import the function and call it with a DataFrame, e.g.:\n"
            "      from visualize import plot_label_distribution\n"
            "      import pandas as pd\n"
            "      df = pd.read_csv('social-media-sentiment-analysis/data/processed/tweets_clean.csv')\n"
            "      plot_label_distribution(df)\n\n"
            "Or run from a terminal:\n"
            "  python visualize.py social-media-sentiment-analysis/data/processed/tweets_clean.csv\n"
        )
        print(msg)
    else:
        sys.exit(main())

Detected IPython/Jupyter environment. To use plotting here:
  - Import the function and call it with a DataFrame, e.g.:
      from visualize import plot_label_distribution
      import pandas as pd
      df = pd.read_csv('social-media-sentiment-analysis/data/processed/tweets_clean.csv')
      plot_label_distribution(df)

Or run from a terminal:
  python visualize.py social-media-sentiment-analysis/data/processed/tweets_clean.csv

