import os
import re
import shutil
import base64
import mimetypes
from pathlib import Path

from bs4 import BeautifulSoup
from htmlmin import minify
from csscompressor import compress
from jsmin import jsmin

SRC_DIR = Path("/home/josefkulovany").resolve()
OUT_DIR = SRC_DIR / "recompiled-pages"
ASSETS_DIR = OUT_DIR / "assets"
OUT_DIR.mkdir(exist_ok=True, parents=True)
ASSETS_DIR.mkdir(exist_ok=True, parents=True)

COPY_EXTENSIONS = {ext.lower() for ext in [
    ".woff", ".woff2", ".ttf", ".eot", ".otf",
    ".mp4", ".mp3", ".ico", ".svg", ".webp",
    ".bmp", ".gif", ".png", ".jpg", ".jpeg",
    ".css", ".js"
]}

def read_file(path):
    return path.read_text(encoding="utf-8", errors="ignore")

def write_file(path, content):
    path.parent.mkdir(parents=True, exist_ok=True)
    path.write_text(content, encoding="utf-8")

def copy_asset(src_path):
    src_path = src_path.resolve()
    if not src_path.exists() or not src_path.is_file():
        return None
    dest_name = src_path.name
    dest_path = ASSETS_DIR / dest_name
    counter = 1
    while dest_path.exists():
        stem, suffix = src_path.stem, src_path.suffix
        dest_name = f"{stem}_{counter}{suffix}"
        dest_path = ASSETS_DIR / dest_name
        counter += 1
    shutil.copy2(src_path, dest_path)
    return Path("assets") / dest_name

def inline_css(css_path, base_path, processed_files=None):
    if processed_files is None:
        processed_files = set()
    css_path = (base_path / css_path).resolve()
    if css_path in processed_files:
        return ""
    processed_files.add(css_path)
    if not css_path.exists():
        return ""

    css_text = read_file(css_path)

    def import_replacer(match):
        import_url = match.group(1).strip('\'"')
        if import_url.startswith("http") or import_url.startswith("data:"):
            return match.group(0)
        return inline_css(import_url, css_path.parent, processed_files)

    css_text = re.sub(r'@import\s+url\(([^)]+)\);?', import_replacer, css_text)
    css_text = re.sub(r'@import\s+["\']([^"\']+)["\'];?', import_replacer, css_text)

    def url_replacer(match):
        orig_url = match.group(1).strip('\'"')
        if orig_url.startswith("data:") or orig_url.startswith("http"):
            return f"url({orig_url})"
        asset_path = (css_path.parent / orig_url).resolve()
        copied = copy_asset(asset_path)
        return f"url('/recompiled-pages/{copied.as_posix()}')" if copied else f"url({orig_url})"

    css_text = re.sub(r'url\(([^)]+)\)', url_replacer, css_text)

    return compress(css_text)

def inline_assets(html_path):
    html_path = html_path.resolve()
    html = read_file(html_path)
    soup = BeautifulSoup(html, "html.parser")
    base_path = html_path.parent

    if not soup.find("base"):
        base_tag = soup.new_tag("base", href="/recompiled-pages/")
        soup.head.insert(0, base_tag)

    critical_head_tags = ["meta", "title", "base", "link", "style", "script"]
    preserved_head = [tag.extract() for tag in list(soup.head.contents) if tag.name in critical_head_tags]

    for link in soup.find_all("link", rel="stylesheet"):
        href = link.get("href", "")
        if href.startswith("http"):
            continue
        try:
            css_code = inline_css(href, base_path)
            style_tag = soup.new_tag("style")
            style_tag.string = css_code
            link.replace_with(style_tag)
        except Exception:
            link.decompose()

    for script in soup.find_all("script", src=True):
        src = script["src"]
        if src.startswith("http"):
            new_script = soup.new_tag("script", src=src)
            script.replace_with(new_script)
            continue
        js_path = (base_path / src).resolve()
        if js_path.exists():
            try:
                js_code = jsmin(read_file(js_path))
                new_script = soup.new_tag("script")
                new_script.string = js_code
                script.replace_with(new_script)
            except Exception:
                script.decompose()
        else:
            script.decompose()

    for img in soup.find_all("img", src=True):
        src = img["src"]
        if src.startswith("http"):
            continue
        img_path = (base_path / src).resolve()
        if img_path.exists():
            try:
                mime = mimetypes.guess_type(str(img_path))[0] or "application/octet-stream"
                b64 = base64.b64encode(img_path.read_bytes()).decode()
                img["src"] = f"data:{mime};base64,{b64}"
                if "srcset" in img.attrs:
                    del img.attrs["srcset"]
            except Exception:
                pass

    for tag in soup.find_all(src=True):
        src = tag["src"]
        if src.startswith("http") or src.startswith("data:"):
            continue
        asset_path = (base_path / src).resolve()
        copied = copy_asset(asset_path)
        if copied:
            tag["src"] = f"/recompiled-pages/{copied.as_posix()}"

    for tag in soup.find_all(href=True):
        href = tag["href"]
        if href.startswith(("http", "data:", "#")):
            continue
        asset_path = (base_path / href).resolve()
        if asset_path.suffix.lower() in COPY_EXTENSIONS:
            copied = copy_asset(asset_path)
            if copied:
                tag["href"] = f"/recompiled-pages/{copied.as_posix()}"

    for tag in reversed(preserved_head):
        soup.head.insert(0, tag)

    return minify(str(soup), remove_empty_space=True, remove_comments=True)

def process_all_pages():
    for root, _, files in os.walk(SRC_DIR):
        root_path = Path(root)
        if root_path == OUT_DIR or OUT_DIR in root_path.parents:
            continue
        for file in files:
            if file.lower().endswith((".html", ".php")):
                html_path = root_path / file
                rel_path = html_path.relative_to(SRC_DIR)
                out_path = OUT_DIR / rel_path
                try:
                    compiled_html = inline_assets(html_path)
                    write_file(out_path.with_suffix(".html"), compiled_html)
                    print(f"✅ Compiled: {out_path.with_suffix('.html')}")
                except Exception as e:
                    print(f"❌ Error processing {html_path}: {e}")

if __name__ == "__main__":
    process_all_pages()
