

from pathlib import Path
from typing import List

import pandas as pd
import matplotlib.pyplot as plt

from ..scraping.utils import get_project_root



POSITIVE_PHRASES: List[str] = [
    "cut taxes",
    "record high",
    "funding boost",
]

POSITIVE_WORDS: List[str] = [
    "boost",
    "rise",
    "rises",
    "growth",
    "benefit",
    "benefits",
    "support",
    "surplus",
    "jobs",
    "investment",
    "funding",
    "relief",
    "help",
]

NEGATIVE_WORDS: List[str] = [
    "cut",
    "cuts",
    "fall",
    "falls",
    "burden",
    "crisis",
    "shortfall",
    "pressure",
    "risk",
    "warn",
    "warning",
    "strike",
    "debt",
    "cost of living",
    "inflation",
]


def load_headlines() -> pd.DataFrame:

    root: Path = get_project_root()
    csv_path = root / "data" / "results" / "headlines_with_frames.csv"
    print(f"[Sentiment] Loading headlines from: {csv_path}")
    df = pd.read_csv(csv_path)
    print(f"[Sentiment] Loaded {len(df)} rows.")
    return df


def compute_sentiment_score(text: str) -> int:

    if not isinstance(text, str):
        return 0

    t = text.lower()

    pos_phrase = sum(1 for p in POSITIVE_PHRASES if p in t)


    pos_words = sum(1 for w in POSITIVE_WORDS if w in t)
    neg_words = sum(1 for w in NEGATIVE_WORDS if w in t)

    return pos_phrase + pos_words - neg_words


def label_sentiment(score: int) -> str:

    if score > 0:
        return "positive"
    if score < 0:
        return "negative"
    return "neutral"


def analyse_sentiment(df: pd.DataFrame) -> pd.DataFrame:

    print("[Sentiment] Computing sentiment scores...")

    df = df.copy()
    df["sentiment_score"] = df["headline"].apply(compute_sentiment_score)
    df["sentiment_label"] = df["sentiment_score"].apply(label_sentiment)

    return df


def save_headlines_with_sentiment(df: pd.DataFrame) -> Path:

    root: Path = get_project_root()
    out_path = root / "data" / "results" / "headlines_with_sentiment.csv"
    df.to_csv(out_path, index=False)
    print(f"[Sentiment] Saved per-headline sentiment -> {out_path}")
    return out_path


def summarise_sentiment(df: pd.DataFrame) -> pd.DataFrame:

    counts = (
        df.groupby(["source", "sentiment_label"])
        .size()
        .reset_index(name="count")
        .rename(columns={"sentiment_label": "sentiment"})
        .sort_values(["source", "sentiment"])
    )
    print("[Sentiment] Summary counts:\n", counts)
    return counts


def save_sentiment_summary(counts_df: pd.DataFrame) -> Path:

    root: Path = get_project_root()
    out_path = root / "data" / "results" / "sentiment_summary.csv"
    counts_df.to_csv(out_path, index=False)
    print(f"[Sentiment] Saved sentiment summary -> {out_path}")
    return out_path


def plot_sentiment_summary(counts_df: pd.DataFrame) -> None:

    root: Path = get_project_root()
    out_dir = root / "data" / "results" / "plots"
    out_dir.mkdir(exist_ok=True, parents=True)

    for src in counts_df["source"].unique():
        sub = counts_df[counts_df["source"] == src]

        plt.figure(figsize=(6, 4))
        plt.bar(sub["sentiment"], sub["count"])
        plt.title(f"Sentiment distribution — {src}")
        plt.ylabel("Count")
        plt.tight_layout()

        out_path = out_dir / f"{src}_sentiment_bar.png"
        plt.savefig(out_path)
        plt.close()

        print(f"[Sentiment] Saved sentiment plot for {src} -> {out_path}")


def main() -> None:
    df = load_headlines()
    df_with_sent = analyse_sentiment(df)
    save_headlines_with_sentiment(df_with_sent)

    counts = summarise_sentiment(df_with_sent)
    save_sentiment_summary(counts)
    plot_sentiment_summary(counts)

    print("[Sentiment] Done.")


if __name__ == "__main__":
    main()
