import io
import json
import logging
import os
import sys
import threading
import time

import pandas as pd
import streamlit as st
from bs4 import BeautifulSoup
from concurrent.futures import ThreadPoolExecutor, as_completed
from dotenv import load_dotenv
from openai import OpenAI
from playwright.sync_api import Page, sync_playwright

from app_screenshot_basic import login_to_portal

LOGGER_FORMAT = "%(asctime)s - %(levelname)s - %(message)s"
formatter = logging.Formatter(LOGGER_FORMAT)
logger = logging.getLogger("seo_excerpt_from_csv")
logger.setLevel(logging.INFO)
_file_handler_exists = any(
    isinstance(h, logging.FileHandler) for h in logger.handlers
)
if not _file_handler_exists:
    file_handler = logging.FileHandler("seo_excerpt_from_csv.log", mode="a")
    file_handler.setFormatter(formatter)
    logger.addHandler(file_handler)

CONSOLE_HANDLER = None


def configure_debug_logging(enabled: bool):
    global CONSOLE_HANDLER
    if enabled:
        logger.setLevel(logging.DEBUG)
        if CONSOLE_HANDLER is None:
            CONSOLE_HANDLER = logging.StreamHandler(sys.stdout)
            CONSOLE_HANDLER.setFormatter(formatter)
            CONSOLE_HANDLER.setLevel(logging.DEBUG)
            logger.addHandler(CONSOLE_HANDLER)
        logger.debug("Debug logging enabled")
    else:
        logger.setLevel(logging.INFO)
        if CONSOLE_HANDLER is not None:
            logger.removeHandler(CONSOLE_HANDLER)
            CONSOLE_HANDLER = None

LOGIN_URL = "https://profesionales.msd.es/login"
DEFAULT_VIEWPORT_WIDTH = 1920
DEFAULT_VIEWPORT_HEIGHT = 1080
MAX_WORKERS = 5
RATE_LIMIT_SECONDS = 5.0
NAVIGATION_TIMEOUT_MS = 90000
MAX_NAVIGATION_RETRIES = 3
NAVIGATION_RETRY_DELAY = 3.0


class RateLimiter:
    def __init__(self, min_interval_sec: float):
        self.min_interval = float(min_interval_sec)
        self._lock = threading.Lock()
        self._last = 0.0

    def wait(self):
        with self._lock:
            now = time.time()
            to_wait = (self._last + self.min_interval) - now
            if to_wait > 0:
                time.sleep(to_wait)
            self._last = time.time()


def load_storage_state(storage_state_path: str) -> dict:
    with open(storage_state_path, "r") as f:
        return json.load(f)


def expand_content(page: Page):
    try:
        page.wait_for_timeout(1500)
        expand_buttons = page.locator("text=/Leer más/i")
        count = expand_buttons.count()
        for i in range(count):
            try:
                btn = expand_buttons.nth(i)
                if btn.is_visible():
                    btn.click(timeout=5000)
                    page.wait_for_timeout(600)
            except Exception as e:
                logger.debug(f"Could not click 'Leer más' {i}: {e}")

        try:
            try:
                page.wait_for_selector(
                    "span.mhh-mcn-v1-accordion-molecule-header__icon-dropdown",
                    timeout=4000,
                )
            except Exception:
                pass
            max_rounds = 3
            for _ in range(max_rounds):
                closed_selector = (
                    "button:has(span.mhh-mcn-v1-accordion-molecule-header__icon-dropdown)"
                    '[aria-expanded="false"]'
                )
                closed = page.locator(closed_selector)
                cc = closed.count()
                if cc == 0:
                    break
                for j in range(cc):
                    try:
                        btn = closed.nth(j)
                        if btn.is_visible():
                            btn.click(timeout=4000)
                            page.wait_for_timeout(400)
                    except Exception as e2:
                        logger.debug(f"Accordion click failed: {e2}")
                page.wait_for_timeout(600)
        except Exception as e:
            logger.debug(f"Accordion expansion error: {e}")

        total_js = "() => document.documentElement.scrollHeight"
        total = page.evaluate(total_js)
        viewport_h = page.viewport_size["height"] if page.viewport_size else 768
        sc = 0
        inc = int(viewport_h * 0.8)
        iters = 0
        while sc < total and iters < 40:
            page.mouse.wheel(0, inc)
            page.wait_for_timeout(400)
            sc += inc
            new_total = page.evaluate(total_js)
            if new_total > total:
                total = new_total
            iters += 1
    except Exception as e:
        logger.debug(f"expand_content error: {e}")


def extract_main_text(html: str) -> str:
    soup = BeautifulSoup(html, "lxml")
    for tag in soup(["script", "style", "noscript"]):
        tag.decompose()

    candidates = []
    for sel in [
        "article",
        "main",
        "div.entry-content",
        "div.post-content",
        "div.article-content",
        "div.article-body",
        "main#mhh_mcn_main",
    ]:
        for el in soup.select(sel):
            text = el.get_text(" ", strip=True)
            if text:
                candidates.append(text)

    if not candidates:
        largest = ""
        for el in soup.find_all("div"):
            text = el.get_text(" ", strip=True)
            if text and len(text) > len(largest):
                largest = text
        if largest:
            candidates.append(largest)

    if not candidates:
        return soup.get_text(" ", strip=True)[:8000]

    best = max(candidates, key=len)
    return best[:8000]


def build_openai_client(api_key: str | None) -> OpenAI | None:
    if not api_key:
        return None
    try:
        return OpenAI(api_key=api_key)
    except Exception as e:
        logger.error(f"Failed to init OpenAI client: {e}")
        return None


def extract_text_from_responses(resp) -> str:
    try:
        if hasattr(resp, "output_text") and resp.output_text:
            return resp.output_text.strip()
    except Exception:
        pass
    try:
        output = getattr(resp, "output", None)
        if output:
            first = output[0]
            content = getattr(first, "content", None)
            if content and len(content) > 0 and hasattr(content[0], "text"):
                return content[0].text.strip()
    except Exception:
        pass
    try:
        choice = resp.choices[0]
        msg = choice.message
        if isinstance(msg, str):
            return msg.strip()
        if hasattr(msg, "content"):
            return msg.content.strip()
    except Exception:
        pass
    return ""


def generate_excerpt_with_openai(
    client: OpenAI | None,
    rate_limiter: RateLimiter,
    model: str,
    article_text: str,
    reasoning_effort: str | None = None,
):
    if not client:
        return ""
    prompt = (
        "Eres un editor SEO. Escribe un Summary Excerpt en español, atractivo y factual, "
        "de máximo 200 caracteres para invitar a leer un artículo. Debe describir brevemente "
        "el contenido, sin promesas médicas, sin emojis, sin comillas y sin hashtags. "
        "Devuelve solo el texto final.\n\nContenido:\n"
        + article_text[:4000]
    )
    try:
        rate_limiter.wait()
        request_kwargs = {"model": model, "input": prompt}
        if reasoning_effort:
            request_kwargs["reasoning"] = {"effort": reasoning_effort}
        resp = client.responses.create(**request_kwargs)
        text = extract_text_from_responses(resp)
        return text[:200].strip()
    except Exception as e:
        logger.warning(f"OpenAI excerpt generation failed: {e}")
        return ""


GLOBAL_RATE_LIMITER = RateLimiter(RATE_LIMIT_SECONDS)


def process_article_excerpt(
    storage_state_path: str,
    row: pd.Series,
    url_col_name: str,
    model_name: str,
    openai_key: str | None,
    debug_mode: bool = False,
    reasoning_effort: str | None = None,
) -> dict:
    article_title = str(row.iloc[0]) if len(row) > 0 else "Articulo"
    article_url = str(row.get(url_col_name, "")).strip() if url_col_name in row else ""

    if debug_mode:
        logger.debug(
            "Processing row: title='%s', url='%s', url_col='%s'",
            article_title,
            article_url,
            url_col_name,
        )

    if not article_url.lower().startswith(("http://", "https://")):
        if debug_mode:
            logger.warning(
                "Skipping row '%s': invalid URL '%s'",
                article_title,
                article_url,
            )
        return {"Status": "Skipped (Invalid URL)", "Excerpt": ""}

    client = build_openai_client(openai_key)
    if debug_mode:
        logger.debug("OpenAI client initialized: %s", bool(client))
    last_error: Exception | None = None

    for attempt in range(1, MAX_NAVIGATION_RETRIES + 1):
        playwright = None
        browser = None
        context = None
        page = None

        if debug_mode:
            logger.debug(
                "Attempt %s/%s for URL '%s'",
                attempt,
                MAX_NAVIGATION_RETRIES,
                article_url,
            )

        try:
            logger.debug("Launching Playwright for '%s' (attempt %s)", article_url, attempt)
            playwright = sync_playwright().start()
            browser = playwright.chromium.launch(headless=True)
            storage_state = load_storage_state(storage_state_path)
            context = browser.new_context(
                storage_state=storage_state,
                viewport={"width": DEFAULT_VIEWPORT_WIDTH, "height": DEFAULT_VIEWPORT_HEIGHT},
                device_scale_factor=1,
                is_mobile=False,
                user_agent=(
                    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
                    "(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
                ),
            )
            page = context.new_page()

            page.goto(article_url, wait_until="load", timeout=NAVIGATION_TIMEOUT_MS)
            logger.debug("Page loaded for '%s'", article_url)
            expand_content(page)
            page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
            page.wait_for_timeout(1200)
            try:
                page.wait_for_load_state("networkidle", timeout=8000)
            except Exception:
                pass

            html = page.content()
            main_text = extract_main_text(html)
            if debug_mode:
                logger.debug(
                    "Extracted main text for '%s' (length=%s)",
                    article_title,
                    len(main_text),
                )
            excerpt = generate_excerpt_with_openai(
                client,
                GLOBAL_RATE_LIMITER,
                model_name,
                main_text,
                reasoning_effort,
            )
            if not excerpt:
                logger.warning(
                    "No excerpt generated for '%s' (URL: %s)",
                    article_title,
                    article_url,
                )
                if debug_mode:
                    print(
                        f"No excerpt generated for '{article_title}' (URL: {article_url})",
                        file=sys.stderr,
                    )
                return {"Status": "Failed (Empty excerpt)", "Excerpt": ""}
            logger.debug(
                "Generated excerpt for '%s' (length=%s)",
                article_title,
                len(excerpt),
            )
            return {"Status": "Success", "Excerpt": excerpt}
        except Exception as e:
            last_error = e
            logger.warning(
                "Attempt %s/%s failed for '%s': %s",
                attempt,
                MAX_NAVIGATION_RETRIES,
                article_url,
                e,
            )
            if attempt < MAX_NAVIGATION_RETRIES:
                time.sleep(NAVIGATION_RETRY_DELAY)
        finally:
            try:
                if page:
                    page.close()
            except Exception:
                pass
            try:
                if context:
                    context.close()
            except Exception:
                pass
            try:
                if browser:
                    browser.close()
            except Exception:
                pass
            try:
                if playwright:
                    playwright.stop()
            except Exception:
                pass

    error_message = (
        f"Error after {MAX_NAVIGATION_RETRIES} attempts: {last_error}" if last_error else "Unknown error"
    )
    logger.error("%s for URL '%s'", error_message, article_url)
    return {"Status": error_message, "Excerpt": ""}


def main():
    load_dotenv()
    st.title("SEO Excerpt Generator desde CSV")
    st.write(
        "Autentica en el portal MSD y genera excerpts para cada URL listada en el CSV "
        "utilizando modelos de OpenAI."
    )

    uploaded_file = st.file_uploader("Sube el CSV con los artículos", type=["csv"])

    col1, col2 = st.columns(2)
    with col1:
        username = st.text_input("Usuario (MSD)")
    with col2:
        password = st.text_input("Contraseña (MSD)", type="password")

    model_name = st.selectbox(
        "Modelo OpenAI",
        options=["gpt-5", "gpt-5-mini", "gpt-4.1", "gpt-4o-mini"],
        index=0,
    )
    debug_mode = st.checkbox(
        "Modo debug (muestra más info en consola y logs)", value=False
    )
    supports_reasoning = model_name in {"gpt-5", "gpt-5-mini"}
    if supports_reasoning:
        reasoning_enabled = st.checkbox(
            "Activar reasoning (pensamiento extendido)", value=False
        )
        reasoning_effort = st.selectbox(
            "Reasoning effort",
            options=["minimal", "low", "medium", "high"],
            index=1,
            disabled=not reasoning_enabled,
        )
        if not reasoning_enabled:
            reasoning_effort = None
    else:
        reasoning_enabled = False
        reasoning_effort = None
    test_mode = st.checkbox(
        "Modo prueba (solo procesa las primeras 10 filas)", value=False
    )

    env_openai = os.getenv("OPENAI_API_KEY", "")
    st.write(
        f"OpenAI API Key en entorno: {'✅' if bool(env_openai) else '⚠️ no encontrada'}"
    )
    st.write(
        f"Concurrencia: {MAX_WORKERS} hilos · Separación entre requests externa: {int(RATE_LIMIT_SECONDS)}s"
    )

    start_btn = st.button("Generar excerpts")

    if start_btn:
        if not uploaded_file:
            st.error("Por favor sube un CSV primero.")
            return
        if not username or not password:
            st.error("Por favor ingresa usuario y contraseña del portal MSD.")
            return
        if not env_openai:
            st.error("No se encontró OPENAI_API_KEY en el entorno.")
            return

        try:
            df = pd.read_csv(uploaded_file)
        except Exception as e:
            st.error(f"Error leyendo CSV: {e}")
            return

        if df.empty:
            st.error("El CSV está vacío.")
            return

        if len(df.columns) < 2:
            st.error("El CSV debe tener al menos dos columnas (la URL en la segunda).")
            return

        url_col = df.columns[1]
        if debug_mode:
            st.write(f"Columnas encontradas en CSV: {list(df.columns)}")
            st.write("Primeras filas del CSV:")
            st.write(df.head(3))
            st.write(f"Columna URL detectada: {url_col}")

        st.info("Autenticando en el portal...")
        storage_state_path = login_to_portal(username, password, LOGIN_URL)
        if not storage_state_path:
            st.error("No fue posible iniciar sesión. Revisa credenciales.")
            return
        st.success("Login correcto. Iniciando procesamiento de artículos...")

        configure_debug_logging(debug_mode)

        df["Excerpt"] = ""
        df["Processing Status"] = "Pending"

        indices_to_process = df.index[:10] if test_mode else df.index
        total = len(indices_to_process)
        progress_bar = st.progress(0.0)
        status_text = st.empty()

        processed = 0
        success = 0

        results: dict[int, dict] = {}
        with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
            future_to_index = {}
            for idx in indices_to_process:
                row = df.loc[idx]
                if debug_mode:
                    logger.debug("Queueing row index %s for processing", idx)
                future = executor.submit(
                    process_article_excerpt,
                    storage_state_path,
                    row,
                    url_col,
                    model_name,
                    env_openai,
                    debug_mode,
                    reasoning_effort,
                )
                future_to_index[future] = idx

            for fut in as_completed(future_to_index):
                idx = future_to_index[fut]
                try:
                    res = fut.result()
                except Exception as e:
                    res = {"Status": f"Error: {e}", "Excerpt": ""}

                df.at[idx, "Excerpt"] = res.get("Excerpt", "")
                df.at[idx, "Processing Status"] = res.get("Status", "Unknown")
                if debug_mode:
                    logger.debug(
                        "Row %s completed with status '%s' (excerpt length=%s)",
                        idx,
                        df.at[idx, "Processing Status"],
                        len(df.at[idx, "Excerpt"] or ""),
                    )

                processed += 1
                if res.get("Status") == "Success":
                    success += 1
                progress_bar.progress(processed / total)
                status_text.text(
                    f"Procesados {processed}/{total} · Éxitos: {success}"
                )

        if debug_mode:
            status_counts = df.loc[indices_to_process, "Processing Status"].value_counts()
            logger.debug("Resumen de estados:")
            for status, count in status_counts.items():
                logger.debug("  %s: %s", status, count)

        st.success(f"¡Completado! Éxitos: {success} de {total}")

        csv_buffer = io.StringIO()
        df.to_csv(csv_buffer, index=False)
        csv_bytes = csv_buffer.getvalue().encode("utf-8")
        st.download_button(
            label="Descargar CSV con excerpts",
            data=csv_bytes,
            file_name="seo_excerpts.csv",
            mime="text/csv",
        )


if __name__ == "__main__":
    main()
