

from __future__ import annotations

from pathlib import Path

import pandas as pd

from .framing import classify_headline, explain_frame
from ..scraping.utils import get_project_root


def load_headlines() -> pd.DataFrame:

    root = get_project_root()
    csv_path = root / "data" / "raw" / "uk_budget_headlines.csv"

    if not csv_path.exists():
        raise FileNotFoundError(f"CSV file not found: {csv_path}")

    df = pd.read_csv(csv_path)
    return df


def add_framing_column(df: pd.DataFrame) -> pd.DataFrame:

    df = df.copy()
    df["frame"] = df["headline"].astype(str).apply(classify_headline)
    return df


def summarise_by_source_and_frame(df: pd.DataFrame) -> pd.DataFrame:

    grouped = (
        df.groupby(["source", "frame"])
        .size()
        .reset_index(name="count")
        .sort_values(["source", "count"], ascending=[True, False])
    )
    return grouped


def main() -> None:

    df = load_headlines()
    print(f"[Analyse] Loaded {len(df)} headlines.")


    df = add_framing_column(df)


    summary = summarise_by_source_and_frame(df)


    print("\n=== Framing counts by source ===")
    for source in summary["source"].unique():
        sub = summary[summary["source"] == source]
        print(f"\nSource: {source}")
        for _, row in sub.iterrows():
            frame = row["frame"]
            count = int(row["count"])
            zh = explain_frame(frame)
            print(f"  {frame:18s} ({count:2d}) - {zh}")


    root = get_project_root()
    results_dir = root / "data" / "results"
    results_dir.mkdir(parents=True, exist_ok=True)

    summary_path = results_dir / "framing_summary.csv"
    df_path = results_dir / "headlines_with_frames.csv"

    summary.to_csv(summary_path, index=False)
    df.to_csv(df_path, index=False)

    print(f"\n[Analyse] Saved framing summary to: {summary_path}")
    print(f"[Analyse] Saved full headlines with frames to: {df_path}")


if __name__ == "__main__":
    main()
