import os
from pathlib import Path
from urllib.parse import urljoin, quote
import base64
import mimetypes

from bs4 import BeautifulSoup

# Paths
SCRIPT_DIR = Path(__file__).resolve().parent
INPUT_DIR = SCRIPT_DIR
OUTPUT_DIR = SCRIPT_DIR / "recompiled-pages"
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

BASE_URL = "https://josefkulovany.com/"

def sanitize_filename(filename):
    return quote(filename, safe='').replace('%20', '_').replace('%', '_')

def encode_image_to_base64(local_path):
    try:
        mime, _ = mimetypes.guess_type(str(local_path))
        if mime is None:
            mime = "application/octet-stream"
        with open(local_path, "rb") as f:
            b64_data = base64.b64encode(f.read()).decode('utf-8')
        return f"data:{mime};base64,{b64_data}"
    except Exception:
        return None

def process_html(file_path):
    with open(file_path, "r", encoding="utf-8") as f:
        soup = BeautifulSoup(f, "html.parser")

    # Ensure <base href="./"> for relative links
    if not soup.find("base"):
        soup.head.insert(0, soup.new_tag("base", href="./"))

    base_path = file_path.parent

    # For link and script tags: rewrite URLs to live absolute URLs (preserve site structure and CSS/JS loading)
    for tag, attr in [("link", "href"), ("script", "src")]:
        for el in soup.find_all(tag):
            if el.has_attr(attr):
                original_url = el[attr]
                # Make absolute URL on live site
                live_url = urljoin(BASE_URL, original_url)
                el[attr] = live_url

    # For img tags: try local file, inline base64 if possible, else fallback to live absolute URL
    for img in soup.find_all("img", src=True):
        orig_src = img["src"]
        local_img_path = (base_path / orig_src).resolve()

        if local_img_path.exists() and local_img_path.is_file():
            data_uri = encode_image_to_base64(local_img_path)
            if data_uri:
                img["src"] = data_uri
                # Remove srcset if present because it conflicts with data URI
                if "srcset" in img.attrs:
                    del img.attrs["srcset"]
            else:
                # Could not encode, fallback to live URL
                img["src"] = urljoin(BASE_URL, orig_src)
        else:
            # No local file, use live URL
            img["src"] = urljoin(BASE_URL, orig_src)

    return str(soup)

def recompile_all():
    for file in INPUT_DIR.glob("*.html"):
        sanitized = sanitize_filename(file.name)
        output_file = OUTPUT_DIR / sanitized
        try:
            compiled_html = process_html(file)
            with open(output_file, "w", encoding="utf-8") as out:
                out.write(compiled_html)
            print(f"✔️ Recompiled: {file.name} -> {output_file.name}")
        except Exception as e:
            print(f"❌ Error processing {file.name}: {e}")

if __name__ == "__main__":
    recompile_all()
