import os
import shutil
import base64
import mimetypes
from pathlib import Path
from urllib.parse import quote

from bs4 import BeautifulSoup
from htmlmin import minify
from csscompressor import compress
from jsmin import jsmin

# === Paths ===
SCRIPT_DIR = Path(__file__).resolve().parent
SRC_DIR = SCRIPT_DIR
OUT_DIR = SCRIPT_DIR / "recompiled-pages"
ASSETS_DIR = OUT_DIR / "assets"

OUT_DIR.mkdir(parents=True, exist_ok=True)
ASSETS_DIR.mkdir(parents=True, exist_ok=True)

# === File types to copy ===
COPY_EXTENSIONS = {".woff", ".woff2", ".ttf", ".eot", ".otf", ".mp4", ".mp3", ".ico", ".svg", ".webp", ".bmp", ".gif"}

# === Helpers ===
def read_file(path):
    return path.read_text(encoding="utf-8", errors="ignore")

def write_file(path, content):
    path.parent.mkdir(parents=True, exist_ok=True)
    path.write_text(content, encoding="utf-8")

def sanitize_filename(filename):
    return quote(filename, safe='').replace('%20', '_').replace('%', '_')

def copy_asset(src_path):
    src_path = src_path.resolve()
    if not src_path.exists() or not src_path.is_file():
        return None
    dest_path = ASSETS_DIR / src_path.name
    if not dest_path.exists():
        shutil.copy2(src_path, dest_path)
    return Path("assets") / src_path.name

def inline_css(css_path, base_path):
    css_path = (base_path / css_path).resolve()
    if not css_path.exists():
        return ""
    css_text = read_file(css_path)

    # Replace url(...) references inside CSS
    import re
    def replace_url(match):
        url = match.group(1).strip('\'"')
        if url.startswith("data:") or url.startswith("http"):
            return f"url({url})"
        asset_path = (css_path.parent / url).resolve()
        copied = copy_asset(asset_path)
        if copied:
            return f"url({copied.as_posix()})"
        return f"url({url})"

    css_text = re.sub(r"url\(([^)]+)\)", replace_url, css_text)
    return compress(css_text)

# === Main HTML Processing ===
def inline_assets(html_path):
    html_path = html_path.resolve()
    base_path = html_path.parent
    html = read_file(html_path)
    soup = BeautifulSoup(html, "html.parser")

    # Ensure <base href="./"> exists
    if not soup.find("base"):
        soup.head.insert(0, soup.new_tag("base", href="./"))

    # Inline stylesheets
    for link in soup.find_all("link", rel="stylesheet"):
        href = link.get("href", "")
        if href.startswith("http"):
            link.decompose()
            continue
        try:
            css_code = inline_css(href, base_path)
            style_tag = soup.new_tag("style")
            style_tag.string = css_code
            link.replace_with(style_tag)
        except Exception:
            link.decompose()

    # Inline JavaScript
    for script in soup.find_all("script", src=True):
        src = script["src"]
        if src.startswith("http"):
            script.decompose()
            continue
        js_path = (base_path / src).resolve()
        if js_path.exists():
            try:
                js_code = jsmin(read_file(js_path))
                new_script = soup.new_tag("script")
                new_script.string = js_code
                script.replace_with(new_script)
            except Exception:
                script.decompose()
        else:
            script.decompose()

    # Inline or copy <img> tags
    for img in soup.find_all("img", src=True):
        src = img["src"]
        if src.startswith("http"):
            img.decompose()
            continue
        img_path = (base_path / src).resolve()
        if img_path.exists() and img_path.is_file():
            try:
                mime, _ = mimetypes.guess_type(str(img_path))
                if mime is None:
                    mime = "application/octet-stream"
                b64 = base64.b64encode(img_path.read_bytes()).decode()
                img["src"] = f"data:{mime};base64,{b64}"
                img.attrs.pop("srcset", None)
            except Exception:
                copied = copy_asset(img_path)
                if copied:
                    img["src"] = copied.as_posix()
                else:
                    img.decompose()
        else:
            img.decompose()

    # Rewrite other src-based tags (audio, video, source, etc.)
    for tag in soup.find_all(src=True):
        if tag.name == "img":
            continue
        src = tag["src"]
        if src.startswith("http") or src.startswith("data:"):
            continue
        asset_path = (base_path / src).resolve()
        copied = copy_asset(asset_path)
        if copied:
            tag["src"] = copied.as_posix()
        else:
            tag.decompose()

    # Rewrite hrefs (fonts, icons, etc.)
    for tag in soup.find_all(href=True):
        href = tag["href"]
        if href.startswith(("http", "data:", "#")):
            continue
        asset_path = (base_path / href).resolve()
        ext = asset_path.suffix.lower()
        if ext in COPY_EXTENSIONS:
            copied = copy_asset(asset_path)
            if copied:
                tag["href"] = copied.as_posix()
            else:
                tag.decompose()

    # Minify HTML
    minified_html = minify(str(soup), remove_empty_space=True, remove_comments=True)
    return minified_html

# === Batch Processor ===
def process_all_pages():
    for root, _, files in os.walk(SRC_DIR):
        root_path = Path(root)
        if root_path == OUT_DIR or root_path.is_relative_to(OUT_DIR):
            continue
        for file in files:
            if file.lower().endswith((".html", ".php")):
                html_path = root_path / file
                rel_path = html_path.relative_to(SRC_DIR)
                out_path = OUT_DIR / rel_path
                try:
                    compiled_html = inline_assets(html_path)
                    write_file(out_path.with_suffix(".html"), compiled_html)
                    print(f"✅ Compiled: {out_path.with_suffix('.html')}")
                except Exception as e:
                    print(f"❌ Error processing {html_path}: {e}")

# === Run ===
if __name__ == "__main__":
    process_all_pages()
