import streamlit as st
import pandas as pd
import time
import logging
import os
import io
import re
import json
import tempfile
import threading
import base64
from urllib.parse import urljoin, urlparse
from concurrent.futures import ThreadPoolExecutor, as_completed

import requests
from bs4 import BeautifulSoup
from playwright.sync_api import sync_playwright, Page, Browser, Playwright

from openai import OpenAI
import tinify
from dotenv import load_dotenv

# Reuse login from the existing capture app (same folder)
from app_screenshot_basic import login_to_portal

# ----------------------------------------------------------------------------
# Logging setup
# ----------------------------------------------------------------------------
logging.basicConfig(
    filename='screenshot_capture.log',
    filemode='a',
    format='%(asctime)s - %(levelname)s - %(message)s',
    level=logging.INFO
)

# ----------------------------------------------------------------------------
# Constants and Globals
# ----------------------------------------------------------------------------
LOGIN_URL = "https://profesionales.msd.es/login"
DEFAULT_VIEWPORT_WIDTH = 1920
DEFAULT_VIEWPORT_HEIGHT = 1080

# Concurrency and rate-limiting
MAX_WORKERS = 5  # As requested
RATE_LIMIT_SECONDS = 5.0  # Separate external requests by 5 seconds

# Output structure
BASE_OUTPUT_DIR = "processed_images"

# ----------------------------------------------------------------------------
# Utilities
# ----------------------------------------------------------------------------
class RateLimiter:
    def __init__(self, min_interval_sec: float):
        self.min_interval = float(min_interval_sec)
        self._lock = threading.Lock()
        self._last = 0.0

    def wait(self):
        with self._lock:
            now = time.time()
            to_wait = (self._last + self.min_interval) - now
            if to_wait > 0:
                time.sleep(to_wait)
            self._last = time.time()

GLOBAL_RATE_LIMITER = RateLimiter(RATE_LIMIT_SECONDS)


def slugify(value: str) -> str:
    value = str(value or "").strip()
    # Remove accents/diacritics by simple replacement for common chars
    replacements = (
        ("á", "a"), ("é", "e"), ("í", "i"), ("ó", "o"), ("ú", "u"),
        ("Á", "a"), ("É", "e"), ("Í", "i"), ("Ó", "o"), ("Ú", "u"),
        ("ñ", "n"), ("Ñ", "n")
    )
    for a, b in replacements:
        value = value.replace(a, b)
    value = re.sub(r"[^a-zA-Z0-9\s-]", "", value)
    value = re.sub(r"\s+", "-", value)
    value = re.sub(r"-+", "-", value)
    return value.lower().strip("-") or "articulo"


def load_storage_state(storage_state_path: str) -> dict:
    with open(storage_state_path, 'r') as f:
        return json.load(f)


def requests_session_from_storage_state(storage_state_path: str) -> requests.Session:
    storage_state = load_storage_state(storage_state_path)
    sess = requests.Session()
    # Mimic a realistic browser UA
    sess.headers.update({
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
    })
    for c in storage_state.get('cookies', []):
        try:
            sess.cookies.set(
                name=c.get('name'),
                value=c.get('value'),
                domain=c.get('domain'),
                path=c.get('path', '/'),
            )
        except Exception as e:
            logging.debug(f"Failed setting cookie {c}: {e}")
    return sess


def expand_content(page: Page):
    """Expand 'Leer más' and accordion elements similarly to the screenshot app."""
    try:
        page.wait_for_timeout(1500)
        # Expand 'Leer más'
        expand_buttons = page.locator('text=/Leer más/i')
        count = expand_buttons.count()
        for i in range(count):
            try:
                btn = expand_buttons.nth(i)
                if btn.is_visible():
                    btn.click(timeout=5000)
                    page.wait_for_timeout(600)
            except Exception as e:
                logging.debug(f"Could not click 'Leer más' {i}: {e}")

        # Expand accordions
        try:
            try:
                page.wait_for_selector('span.mhh-mcn-v1-accordion-molecule-header__icon-dropdown', timeout=4000)
            except Exception:
                pass
            max_rounds = 3
            for _ in range(max_rounds):
                closed_selector = 'button:has(span.mhh-mcn-v1-accordion-molecule-header__icon-dropdown)[aria-expanded="false"]'
                closed = page.locator(closed_selector)
                cc = closed.count()
                if cc == 0:
                    break
                for j in range(cc):
                    try:
                        btn = closed.nth(j)
                        if btn.is_visible():
                            btn.click(timeout=4000)
                            page.wait_for_timeout(400)
                    except Exception as e2:
                        logging.debug(f"Accordion click failed: {e2}")
                page.wait_for_timeout(600)
        except Exception as e:
            logging.debug(f"Accordion expansion error: {e}")

        # Gentle scroll to trigger lazy loading
        total_js = "() => document.documentElement.scrollHeight"
        total = page.evaluate(total_js)
        viewport_h = page.viewport_size['height'] if page.viewport_size else 768
        sc = 0
        inc = int(viewport_h * 0.8)
        iters = 0
        while sc < total and iters < 40:
            page.mouse.wheel(0, inc)
            page.wait_for_timeout(400)
            sc += inc
            new_total = page.evaluate(total_js)
            if new_total > total:
                total = new_total
            iters += 1
    except Exception as e:
        logging.debug(f"expand_content error: {e}")


def extract_main_text(html: str) -> str:
    soup = BeautifulSoup(html, 'lxml')
    # Remove script/style/nav/footer/header
    for tag in soup(["script", "style", "noscript"]):
        tag.decompose()

    candidates = []
    # Priority candidates
    for sel in [
        'article',
        'main',
        'div.entry-content',
        'div.post-content',
        'div.article-content',
        'div.article-body',
        'main#mhh_mcn_main',
    ]:
        for el in soup.select(sel):
            text = el.get_text(" ", strip=True)
            if text:
                candidates.append(text)
    # Fallback: largest texty div
    if not candidates:
        largest = ""
        for el in soup.find_all('div'):
            text = el.get_text(" ", strip=True)
            if text and len(text) > len(largest):
                largest = text
        if largest:
            candidates.append(largest)

    if not candidates:
        return soup.get_text(" ", strip=True)[:8000]

    best = max(candidates, key=len)
    return best[:8000]  # cap


def get_meta_og_image(soup: BeautifulSoup) -> str | None:
    for attr in ("property", "name"):
        el = soup.find('meta', attrs={attr: 'og:image'})
        if el and el.get('content'):
            return el['content']
    return None


def parse_srcset(srcset: str) -> str:
    """Pick the largest candidate from a srcset string."""
    try:
        parts = [p.strip() for p in srcset.split(',') if p.strip()]
        best_url = None
        best_w = -1
        for p in parts:
            segs = p.split()
            if len(segs) == 1:
                url = segs[0]
                w = 999999  # if width unknown, treat as huge
            else:
                url = segs[0]
                w = 0
                for s in segs[1:]:
                    m = re.match(r"(\d+)w", s)
                    if m:
                        w = int(m.group(1))
                        break
            if w > best_w:
                best_w = w
                best_url = url
        return best_url
    except Exception:
        return None


def get_best_img_url(tag) -> str | None:
    # Prefer srcset (largest)
    for attr in ('data-srcset', 'srcset'):
        srcset = tag.get(attr)
        if srcset:
            u = parse_srcset(srcset)
            if u:
                return u
    # Then data-src or src
    for attr in ('data-src', 'data-original', 'src'):
        u = tag.get(attr)
        if u:
            return u
    return None


def discover_image_urls(html: str, base_url: str) -> list[str]:
    soup = BeautifulSoup(html, 'lxml')
    urls = []
    # First try OG image as hero
    og = get_meta_og_image(soup)
    if og:
        urls.append(urljoin(base_url, og))

    # Find main container
    container = None
    for sel in [
        'article',
        'main',
        'div.entry-content',
        'div.post-content',
        'div.article-content',
        'div.article-body',
        'main#mhh_mcn_main',
    ]:
        c = soup.select_one(sel)
        if c:
            container = c
            break

    scope = container if container else soup
    seen = set()
    for img in scope.find_all('img'):
        u = get_best_img_url(img)
        if not u:
            continue
        if u.startswith('data:image'):
            # keep data URIs as-is
            key = u[:50]
            if key not in seen:
                seen.add(key)
                urls.append(u)
            continue
        absu = urljoin(base_url, u)
        if absu not in seen:
            seen.add(absu)
            urls.append(absu)

    return urls


def ensure_dir(path: str):
    os.makedirs(path, exist_ok=True)


def infer_ext_from_url_or_data(url: str) -> str:
    if url.startswith('data:image/'):
        m = re.match(r"data:image/([a-zA-Z0-9+]+);base64,", url)
        if m:
            fmt = m.group(1).lower()
            if fmt == 'jpeg':
                return '.jpg'
            return f'.{fmt}'
        return '.jpg'
    parsed = urlparse(url)
    ext = os.path.splitext(parsed.path)[1].lower()
    if ext in ('.jpg', '.jpeg', '.png', '.webp', '.gif'):
        return ext
    return '.jpg'


def save_data_uri_image(data_url: str, out_path: str):
    header, b64data = data_url.split(',', 1)
    with open(out_path, 'wb') as f:
        f.write(base64.b64decode(b64data))


def download_image(session: requests.Session, url: str, out_path: str) -> bool:
    try:
        if url.startswith('data:image/'):
            save_data_uri_image(url, out_path)
            return True
        resp = session.get(url, timeout=30)
        if resp.status_code == 200:
            with open(out_path, 'wb') as f:
                f.write(resp.content)
            return True
        logging.warning(f"Image download failed {url}: HTTP {resp.status_code}")
        return False
    except Exception as e:
        logging.warning(f"Image download error {url}: {e}")
        return False


def tinify_optimize(input_path: str, output_path: str, tinify_key: str | None) -> bool:
    if not tinify_key:
        # No key, skip optimization but still copy file
        try:
            import shutil
            shutil.copyfile(input_path, output_path)
            return True
        except Exception as e:
            logging.warning(f"Copy (no tinify key) failed {input_path} -> {output_path}: {e}")
            return False
    try:
        GLOBAL_RATE_LIMITER.wait()
        tinify.key = tinify_key
        source = tinify.from_file(input_path)
        source.to_file(output_path)
        return True
    except Exception as e:
        logging.warning(f"Tinify failed for {input_path}: {e}")
        return False


def build_openai_client(api_key: str | None) -> OpenAI | None:
    if not api_key:
        return None
    try:
        client = OpenAI(api_key=api_key)
        return client
    except Exception as e:
        logging.error(f"Failed to init OpenAI client: {e}")
        return None


def extract_text_from_responses(resp) -> str:
    # Best-effort extraction across possible SDK versions
    try:
        if hasattr(resp, 'output_text') and resp.output_text:
            return resp.output_text.strip()
    except Exception:
        pass
    try:
        # responses API often: resp.output[0].content[0].text
        output = getattr(resp, 'output', None)
        if output:
            first = output[0]
            content = getattr(first, 'content', None)
            if content and len(content) > 0 and hasattr(content[0], 'text'):
                return content[0].text.strip()
    except Exception:
        pass
    try:
        choice = resp.choices[0]
        msg = choice.message
        if isinstance(msg, str):
            return msg.strip()
        if hasattr(msg, 'content'):
            return msg.content.strip()
    except Exception:
        pass
    return ""


def generate_excerpt_with_openai(client: OpenAI | None, model: str, article_text: str) -> str:
    if not client:
        return ""
    prompt = (
        "Eres un editor SEO. Escribe un Summary Excerpt en español, atractivo y factual, de máximo 200 caracteres "
        "para invitar a leer un artículo. Debe describir brevemente el contenido, sin promesas médicas, sin emojis, "
        "sin comillas y sin hashtags. Devuelve solo el texto final.\n\nContenido:\n" + article_text[:4000]
    )
    try:
        GLOBAL_RATE_LIMITER.wait()
        resp = client.responses.create(
            model=model,
            input=prompt
        )
        text = extract_text_from_responses(resp)
        # enforce ~200 chars hard cap
        return text[:200].strip()
    except Exception as e:
        logging.warning(f"OpenAI excerpt generation failed: {e}")
        return ""


def image_to_data_url(path: str) -> str:
    ext = os.path.splitext(path)[1].lower()
    mime = 'image/jpeg'
    if ext == '.png':
        mime = 'image/png'
    elif ext == '.webp':
        mime = 'image/webp'
    elif ext == '.gif':
        mime = 'image/gif'
    with open(path, 'rb') as f:
        b64 = base64.b64encode(f.read()).decode('utf-8')
    return f"data:{mime};base64,{b64}"


def generate_image_seo_metadata(client: OpenAI | None, model: str, image_path: str, article_title: str, article_context: str) -> tuple[str, str]:
    """Return (seo_filename_without_ext, alt_text)."""
    if not client:
        # fallback heuristics
        return slugify(f"{article_title}-img"), f"Imagen relacionada con {article_title}"[:120]

    data_url = image_to_data_url(image_path)
    prompt = (
        "Eres un experto SEO y editor de imágenes. \n"
        "Tarea: analiza la imagen y el contexto del artículo y devuelve un JSON con estas claves: \n"
        "- filename: nombre de archivo en español en kebab-case (4-6 palabras), sin tildes ni stopwords innecesarias, sin extensión.\n"
        "- alt_text: texto alternativo claro (10-16 palabras), descriptivo y útil para accesibilidad, sin emojis ni hashtags.\n"
        "Responde SOLO con JSON válido y nada más.\n\n"
        f"Título del artículo: {article_title}\n\nContexto (resumen):\n{article_context[:800]}\n"
    )

    try:
        GLOBAL_RATE_LIMITER.wait()
        resp = client.responses.create(
            model=model,
            input=[{
                "role": "user",
                "content": [
                    {"type": "input_text", "text": prompt},
                    {"type": "input_image", "image_url": data_url}
                ]
            }]
        )
        text = extract_text_from_responses(resp)
        # parse JSON
        try:
            data = json.loads(text)
            filename = slugify(str(data.get('filename') or article_title))
            alt_text = str(data.get('alt_text') or f"Imagen relacionada con {article_title}")
            # constrain alt length a bit
            if len(alt_text) > 200:
                alt_text = alt_text[:200]
            return filename, alt_text
        except Exception:
            # fallback
            return slugify(f"{article_title}-img"), f"Imagen relacionada con {article_title}"[:120]
    except Exception as e:
        logging.warning(f"OpenAI image SEO failed: {e}")
        return slugify(f"{article_title}-img"), f"Imagen relacionada con {article_title}"[:120]


# ----------------------------------------------------------------------------
# Article processing worker
# ----------------------------------------------------------------------------

def process_article(storage_state_path: str,
                    row: pd.Series,
                    url_col_name: str,
                    name_col_name: str,
                    model_name: str,
                    openai_key: str | None,
                    tinify_key: str | None,
                    debug_mode: bool = False) -> dict:
    """Return a dict with status info for reporting."""
    username = None  # not needed here
    client = build_openai_client(openai_key)

    article_name = str(row.get(name_col_name, "Articulo"))
    article_url = None
    if url_col_name in row and pd.notna(row[url_col_name]):
        article_url = str(row[url_col_name]).strip()
    # Fallback by index 12 if named col missing
    if (not article_url) and len(row) > 12 and pd.notna(row.iloc[12]):
        article_url = str(row.iloc[12]).strip()

    if debug_mode:
        logging.info(f"Processing row: name='{article_name}', url='{article_url}', row_len={len(row)}, url_col='{url_col_name}'")
        if len(row) > 12:
            logging.info(f"row.iloc[12] = '{row.iloc[12]}'")
        else:
            logging.info("row has less than 13 columns")

    if not article_url or not article_url.lower().startswith(("http://", "https://")):
        return {
            "Status": "Skipped (Invalid URL)",
            "Processed Folder Path": "",
            "Images Found": 0,
            "Excerpt Length": 0,
        }

    folder_slug = slugify(article_name)
    article_dir = os.path.join(BASE_OUTPUT_DIR, folder_slug)
    ensure_dir(article_dir)

    # Prepare alttext file collector
    alt_lines = []

    # Open Playwright in this thread
    playwright = None
    browser = None
    context = None
    page = None

    try:
        playwright = sync_playwright().start()
        browser = playwright.chromium.launch(headless=True)

        storage_state = load_storage_state(storage_state_path)
        context = browser.new_context(
            storage_state=storage_state,
            viewport={"width": DEFAULT_VIEWPORT_WIDTH, "height": DEFAULT_VIEWPORT_HEIGHT},
            device_scale_factor=1,
            is_mobile=False,
            user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
        )
        page = context.new_page()

        page.goto(article_url, wait_until='load', timeout=60000)
        expand_content(page)
        # final settle
        page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
        page.wait_for_timeout(1200)
        try:
            page.wait_for_load_state('networkidle', timeout=8000)
        except Exception:
            pass

        html = page.content()
        main_text = extract_main_text(html)

        # Excerpt
        excerpt = generate_excerpt_with_openai(client, model_name, main_text)
        if excerpt:
            with open(os.path.join(article_dir, 'excerpt.txt'), 'w', encoding='utf-8') as f:
                f.write(excerpt)

        # Images discovery
        images = discover_image_urls(html, page.url)

        # Download + optimize + SEO naming + alt text
        sess = requests_session_from_storage_state(storage_state_path)
        optimized_count = 0
        for idx, img_url in enumerate(images, start=1):
            ext = infer_ext_from_url_or_data(img_url)
            tmp_name = f"downloaded_img_{idx}{ext}"
            tmp_path = os.path.join(article_dir, tmp_name)
            ok = download_image(sess, img_url, tmp_path)
            if not ok:
                continue

            # Optimize with Tinify
            optimized_tmp = os.path.join(article_dir, f"optimized_img_{idx}{ext}")
            ok2 = tinify_optimize(tmp_path, optimized_tmp, tinify_key)
            if not ok2:
                # fallback: keep original
                optimized_tmp = tmp_path

            # Generate SEO filename + alt text
            seo_name_noext, alt_text = generate_image_seo_metadata(
                client, model_name, optimized_tmp, article_name, main_text[:600]
            )
            final_name = f"{seo_name_noext}{ext}"
            final_path = os.path.join(article_dir, final_name)

            # Rename/move optimized_tmp to final name
            try:
                if os.path.abspath(optimized_tmp) != os.path.abspath(final_path):
                    if os.path.exists(final_path):
                        os.remove(final_path)
                    os.replace(optimized_tmp, final_path)
            except Exception as e:
                logging.debug(f"Rename optimized file failed: {e}")
                final_path = optimized_tmp  # keep previous
                final_name = os.path.basename(final_path)

            # Remove original temp if exists and different
            try:
                if os.path.exists(tmp_path) and os.path.abspath(tmp_path) != os.path.abspath(final_path):
                    os.remove(tmp_path)
            except Exception:
                pass

            alt_lines.append(f"{final_name}\t{alt_text}")
            optimized_count += 1

        if alt_lines:
            with open(os.path.join(article_dir, 'alttext.txt'), 'w', encoding='utf-8') as f:
                f.write("\n".join(alt_lines))

        return {
            "Status": "Success",
            "Processed Folder Path": os.path.abspath(article_dir),
            "Images Found": optimized_count,
            "Excerpt Length": len(excerpt or ""),
        }

    except Exception as e:
        logging.error(f"Error processing article {article_url}: {e}", exc_info=True)
        return {
            "Status": f"Error: {e}",
            "Processed Folder Path": os.path.abspath(article_dir),
            "Images Found": 0,
            "Excerpt Length": 0,
        }
    finally:
        try:
            if page:
                page.close()
        except Exception:
            pass
        try:
            if context:
                context.close()
        except Exception:
            pass
        try:
            if browser:
                browser.close()
        except Exception:
            pass
        try:
            if playwright:
                playwright.stop()
        except Exception:
            pass


# ----------------------------------------------------------------------------
# Streamlit App
# ----------------------------------------------------------------------------

def main():
    load_dotenv()
    st.title("SEO Sunday Digest – Preparación para Publicación Pública")
    st.write("Este flujo autentica, extrae contenido de artículos privados y prepara recursos para publicar con buen SEO.")

    # Inputs
    uploaded_file = st.file_uploader("Sube el Excel con los artículos", type=["xlsx", "xls"])

    col1, col2 = st.columns(2)
    with col1:
        username = st.text_input("Usuario (MSD)")
    with col2:
        password = st.text_input("Contraseña (MSD)", type="password")

    # Model selection (default gpt-5)
    model_name = st.selectbox("Modelo OpenAI", options=["gpt-5", "gpt-4.1", "gpt-4o-mini"], index=0)

    # Debug mode
    debug_mode = st.checkbox("Modo debug (muestra más info en consola y logs)", value=False)

    # API keys from env (with visual hints)
    env_openai = os.getenv("OPENAI_API_KEY", "")
    env_tinify = os.getenv("TINIFY_API_KEY", "")

    st.write(f"OpenAI API Key en entorno: {'✅' if bool(env_openai) else '⚠️ no encontrada'}")
    st.write(f"Tinify API Key en entorno: {'✅' if bool(env_tinify) else '⚠️ no encontrada'}")

    # Fixed concurrency and rate limit (as requested)
    st.write(f"Concurrencia: {MAX_WORKERS} hilos · Separación entre requests externa: {int(RATE_LIMIT_SECONDS)}s")

    start_btn = st.button("Iniciar procesamiento")

    if start_btn:
        if not uploaded_file:
            st.error("Por favor sube un Excel primero.")
            return
        if not username or not password:
            st.error("Por favor ingresa usuario y contraseña del portal MSD.")
            return

        # Read Excel
        try:
            df = pd.read_excel(uploaded_file, header=0)
        except Exception as e:
            st.error(f"Error leyendo Excel: {e}")
            return

        if df.empty:
            st.error("El Excel está vacío.")
            return

        # Validate columns
        name_col = "Name"
        # Prefer URL.1 (full URL) over URL (partial)
        url_col = "URL.1" if "URL.1" in df.columns else "URL"
        if debug_mode:
            st.write(f"Columnas encontradas en Excel: {list(df.columns)}")
            st.write("Primeras filas del Excel:")
            st.write(df.head(3))
        if name_col not in df.columns:
            st.warning(f"No se encontró columna '{name_col}'. Se usará el valor en la columna C (índice 2) si existe.")
        if url_col not in df.columns:
            st.warning(f"No se encontró columna '{url_col}'. Se tomará la columna M (índice 12) por posición si existe.")

        # Do login once and obtain storage state path
        st.info("Autenticando en el portal...")
        storage_state_path = login_to_portal(username, password, LOGIN_URL)
        if not storage_state_path:
            st.error("No fue posible iniciar sesión. Revisa credenciales.")
            return
        st.success("Login correcto. Iniciando procesamiento de artículos...")

        # Ensure base output directory
        ensure_dir(BASE_OUTPUT_DIR)

        total = len(df)
        progress_bar = st.progress(0.0)
        status_text = st.empty()

        # Prepare output columns
        df['Processed Folder Path'] = ""
        df['Images Found'] = 0
        df['Excerpt Length'] = 0
        df['Processing Status'] = "Pending"

        processed = 0
        success = 0

        # Thread pool
        results = {}
        with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
            future_to_index = {}
            for idx, row in df.iterrows():
                future = executor.submit(
                    process_article,
                    storage_state_path,
                    row,
                    url_col,
                    name_col,
                    model_name,
                    env_openai,
                    env_tinify,
                    debug_mode
                )
                future_to_index[future] = idx

            for fut in as_completed(future_to_index):
                idx = future_to_index[fut]
                try:
                    res = fut.result()
                except Exception as e:
                    res = {
                        "Status": f"Error: {e}",
                        "Processed Folder Path": "",
                        "Images Found": 0,
                        "Excerpt Length": 0,
                    }
                # Update DF
                df.at[idx, 'Processed Folder Path'] = res.get('Processed Folder Path', '')
                df.at[idx, 'Images Found'] = res.get('Images Found', 0)
                df.at[idx, 'Excerpt Length'] = res.get('Excerpt Length', 0)
                df.at[idx, 'Processing Status'] = res.get('Status', 'Unknown')

                processed += 1
                if res.get('Status') == 'Success':
                    success += 1
                progress_bar.progress(processed / total)
                status_text.text(f"Procesados {processed}/{total} · Éxitos: {success}")

        st.success(f"¡Completado! Éxitos: {success} de {total}")

        # Download report
        output = io.BytesIO()
        with pd.ExcelWriter(output, engine='xlsxwriter') as writer:
            df.to_excel(writer, index=False, sheet_name='Report')
            ws = writer.sheets['Report']
            for i, col_name in enumerate(df.columns):
                max_len = df[col_name].astype(str).map(len).max() if not df[col_name].empty else 0
                ws.set_column(i, i, max(len(str(col_name)), int(max_len)) + 2)
        excel_bytes = output.getvalue()
        st.download_button(
            label="Descargar reporte",
            data=excel_bytes,
            file_name="seo_sundaydigest_report.xlsx",
            mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
        )


if __name__ == "__main__":
    main()