import os
import re
import shutil
import base64
import mimetypes
import requests
from pathlib import Path
from urllib.parse import urljoin, quote

from bs4 import BeautifulSoup
from htmlmin import minify
from csscompressor import compress
from jsmin import jsmin

BASE_URL = "https://josefkulovany.com/"
SCRIPT_DIR = Path(__file__).resolve().parent
SRC_DIR = SCRIPT_DIR
OUT_DIR = SCRIPT_DIR / "recompiled-pages"
ASSETS_DIR = OUT_DIR / "assets"
OUT_DIR.mkdir(parents=True, exist_ok=True)
ASSETS_DIR.mkdir(parents=True, exist_ok=True)

COPY_EXTENSIONS = {".woff", ".woff2", ".ttf", ".eot", ".otf", ".mp4", ".mp3", ".ico", ".svg", ".webp", ".bmp", ".gif", ".png", ".jpg", ".jpeg"}

def read_file(path):
    return path.read_text(encoding="utf-8", errors="ignore")

def write_file(path, content):
    path.parent.mkdir(parents=True, exist_ok=True)
    path.write_text(content, encoding="utf-8")

def sanitize_filename(filename):
    return quote(filename, safe='').replace('%20', '_').replace('%', '_')

def copy_asset(src_path):
    src_path = src_path.resolve()
    if not src_path.exists() or not src_path.is_file():
        return None
    dest_name = src_path.name
    dest_path = ASSETS_DIR / dest_name
    counter = 1
    while dest_path.exists():
        stem = src_path.stem
        suffix = src_path.suffix
        dest_name = f"{stem}_{counter}{suffix}"
        dest_path = ASSETS_DIR / dest_name
        counter += 1
    shutil.copy2(src_path, dest_path)
    return Path("assets") / dest_name

def encode_image_base64(local_path):
    try:
        mime, _ = mimetypes.guess_type(str(local_path))
        if mime is None:
            mime = "application/octet-stream"
        b64_data = base64.b64encode(local_path.read_bytes()).decode('utf-8')
        return f"data:{mime};base64,{b64_data}"
    except Exception:
        return None

def inline_css(css_path, base_path, processed=None):
    if processed is None:
        processed = set()
    css_path = (base_path / css_path).resolve()
    if not css_path.exists() or css_path in processed:
        return ""
    processed.add(css_path)

    css_text = read_file(css_path)

    # Recursive @import
    def import_replacer(match):
        import_url = match.group(1).strip("'\"")
        if import_url.startswith("http") or import_url.startswith("data:"):
            return match.group(0)
        return inline_css(import_url, css_path.parent, processed)

    css_text = re.sub(r'@import\s+url\(([^)]+)\);?', import_replacer, css_text)
    css_text = re.sub(r'@import\s+["\']([^"\']+)["\'];?', import_replacer, css_text)

    def url_replacer(match):
        orig_url = match.group(1).strip("'\"")
        if orig_url.startswith("data:") or orig_url.startswith("http"):
            return f"url({orig_url})"
        asset_path = (css_path.parent / orig_url).resolve()
        copied = copy_asset(asset_path)
        return f"url({copied.as_posix()})" if copied else f"url({orig_url})"

    css_text = re.sub(r'url\(([^)]+)\)', url_replacer, css_text)
    return compress(css_text)

def process_html(html_path):
    soup = BeautifulSoup(read_file(html_path), "html.parser")
    base_path = html_path.parent

    if not soup.find("base"):
        soup.head.insert(0, soup.new_tag("base", href="./"))

    # Inline CSS
    for link in soup.find_all("link", rel="stylesheet"):
        href = link.get("href", "")
        if href.startswith("http"):
            link.decompose()
            continue
        try:
            css_code = inline_css(href, base_path)
            style_tag = soup.new_tag("style")
            style_tag.string = css_code
            link.replace_with(style_tag)
        except:
            link.decompose()

    # Inline JS
    for script in soup.find_all("script", src=True):
        src = script["src"]
        if src.startswith("http"):
            script.decompose()
            continue
        js_path = (base_path / src).resolve()
        try:
            js_code = jsmin(read_file(js_path))
            new_script = soup.new_tag("script")
            new_script.string = js_code
            script.replace_with(new_script)
        except:
            script.decompose()

    # Inline images or fallback to BASE_URL
    for img in soup.find_all("img", src=True):
        src = img["src"]
        img_path = (base_path / src).resolve()
        if img_path.exists():
            data_uri = encode_image_base64(img_path)
            if data_uri:
                img["src"] = data_uri
                img.attrs.pop("srcset", None)
                continue
        img["src"] = urljoin(BASE_URL, src)

    # Other assets (audio, video, source, iframe, etc.)
    for tag in soup.find_all(src=True):
        src = tag["src"]
        if src.startswith("http") or src.startswith("data:"):
            continue
        asset_path = (base_path / src).resolve()
        copied = copy_asset(asset_path)
        tag["src"] = copied.as_posix() if copied else urljoin(BASE_URL, src)

    # Fonts/icons/etc
    for tag in soup.find_all(href=True):
        href = tag["href"]
        if href.startswith(("http", "data:", "#")):
            continue
        asset_path = (base_path / href).resolve()
        ext = asset_path.suffix.lower()
        copied = copy_asset(asset_path) if ext in COPY_EXTENSIONS else None
        tag["href"] = copied.as_posix() if copied else urljoin(BASE_URL, href)

    return minify(str(soup), remove_empty_space=True, remove_comments=True)

def recompile_all():
    for html_path in SRC_DIR.rglob("*.html"):
        rel_path = html_path.relative_to(SRC_DIR)
        out_path = OUT_DIR / rel_path
        try:
            compiled = process_html(html_path)
            write_file(out_path, compiled)
            print(f"✅ Compiled: {rel_path}")
        except Exception as e:
            print(f"❌ Error processing {rel_path}: {e}")

if __name__ == "__main__":
    recompile_all()
