import os
import base64
import mimetypes
from pathlib import Path
from urllib.parse import urljoin, quote
from bs4 import BeautifulSoup
import requests

# === Configuration ===
SCRIPT_DIR = Path(__file__).resolve().parent
INPUT_DIR = SCRIPT_DIR  # your downloaded site files
OUTPUT_DIR = SCRIPT_DIR / "recompiled-pages"
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

BASE_URL = "https://josefkulovany.com/"

# === Helpers ===

def sanitize_filename(fn):
    return quote(fn, safe='').replace('%20', '_').replace('%', '_')

def encode_image(path: Path):
    try:
        mime, _ = mimetypes.guess_type(path.name)
        mime = mime or "application/octet-stream"
        with open(path, "rb") as f:
            return f"data:{mime};base64,{base64.b64encode(f.read()).decode()}"
    except:
        return None

def fetch_text(url: str) -> str:
    try:
        r = requests.get(url, timeout=10)
        r.raise_for_status()
        return r.text
    except:
        return None

# === Core Logic ===

def process_html(file_path: Path):
    soup = BeautifulSoup(file_path.read_text(encoding='utf-8', errors='ignore'), "html.parser")
    base_path = file_path.parent

    # Ensure base href
    if soup.head and not soup.head.find("base"):
        soup.head.insert(0, soup.new_tag("base", href="./"))

    # Inline all CSS
    for link in soup.find_all("link", rel="stylesheet", href=True):
        href = link['href']
        content = None
        if href.startswith("http"):
            content = fetch_text(href)
        else:
            local = (base_path / href).resolve()
            if local.exists():
                content = local.read_text(encoding='utf-8', errors='ignore')
        if content:
            style = soup.new_tag("style")
            style.string = content
            link.replace_with(style)

    # Inline JS
    for script in soup.find_all("script", src=True):
        src = script['src']
        content = None
        if src.startswith("http"):
            content = fetch_text(src)
        else:
            local = (base_path / src).resolve()
            if local.exists():
                content = local.read_text(encoding='utf-8', errors='ignore')
        if content:
            new = soup.new_tag("script")
            new.string = content
            script.replace_with(new)

    # Fix images: embed base64 or fallback
    for img in soup.find_all("img", src=True):
        src = img['src']
        local = (base_path / src).resolve()
        data_uri = None
        if local.exists():
            data_uri = encode_image(local)
        if data_uri:
            img['src'] = data_uri
            img.attrs.pop("srcset", None)
        else:
            img['src'] = urljoin(BASE_URL, src)

    # Fix zoom: wrap each <img> in clickable <a> to its src, then inject lightbox script
    for img in soup.find_all("img"):
        if "src" not in img.attrs:
            continue  # skip images without src attribute
        if img.parent.name != "a":
            a = soup.new_tag("a", href=img['src'])
            img.wrap(a)
            a['class'] = 'zoomable'

    # Inject minimal lightbox script and CSS
    script_tag = soup.new_tag("script")
    script_tag.string = '''
    document.addEventListener("click", function(e){
      if (e.target.closest("a.zoomable")) {
        e.preventDefault();
        const src = e.target.closest("a.zoomable").href;
        const overlay = document.createElement("div");
        overlay.style = "position:fixed;top:0;left:0;width:100%;height:100%;background:#0008;display:flex;align-items:center;justify-content:center;z-index:10000;";
        const img = document.createElement("img");
        img.src = src;
        img.style = "max-width:90%;max-height:90%";
        overlay.appendChild(img);
        overlay.addEventListener("click", ()=>overlay.remove());
        document.body.appendChild(overlay);
      }
    });
    '''
    soup.body.append(script_tag)

    # Rewrite masthead links to local compiled files
    for a in soup.find_all("a", href=True):
        href = a['href']
        if href.startswith(BASE_URL):
            target = href[len(BASE_URL):].rstrip('/')
            target = target.replace('%20', '_')
            a['href'] = sanitize_filename(target)

    return str(soup)

def recompile_all():
    for src in INPUT_DIR.rglob("*.html"):
        if OUTPUT_DIR in src.parents:
            continue
        out_rel = src.relative_to(INPUT_DIR)
        out_path = OUTPUT_DIR / out_rel
        out_path.parent.mkdir(parents=True, exist_ok=True)
        html = process_html(src)
        out_path.write_text(html, encoding='utf-8')
        print("✓", out_rel)

if __name__ == "__main__":
    recompile_all()
