import os
import re
import shutil
import base64
from pathlib import Path

from bs4 import BeautifulSoup
from htmlmin import minify
from csscompressor import compress
from jsmin import jsmin

SRC_DIR = Path("/home/josefkulovany").resolve()
OUT_DIR = SRC_DIR / "recompiled-pages"
ASSETS_DIR = OUT_DIR / "assets"
OUT_DIR.mkdir(exist_ok=True, parents=True)
ASSETS_DIR.mkdir(exist_ok=True, parents=True)

COPY_EXTENSIONS = {".woff", ".woff2", ".ttf", ".eot", ".otf", ".mp4", ".mp3", ".ico", ".svg", ".webp", ".bmp", ".gif", ".png", ".jpg", ".jpeg"}

def read_file(path):
    return path.read_text(encoding="utf-8", errors="ignore")

def write_file(path, content):
    path.parent.mkdir(parents=True, exist_ok=True)
    path.write_text(content, encoding="utf-8")

def copy_asset(src_path):
    src_path = src_path.resolve()
    if not src_path.exists() or not src_path.is_file():
        return None
    # Create a unique filename in assets folder to avoid collisions
    dest_name = src_path.name
    dest_path = ASSETS_DIR / dest_name
    counter = 1
    while dest_path.exists():
        # If collision, add suffix number
        stem = src_path.stem
        suffix = src_path.suffix
        dest_name = f"{stem}_{counter}{suffix}"
        dest_path = ASSETS_DIR / dest_name
        counter += 1
    shutil.copy2(src_path, dest_path)
    return Path("assets") / dest_name

def inline_css(css_path, base_path, processed_files=None):
    """
    Read CSS file, recursively inline @import CSS,
    fix URLs to copied assets, and compress.
    """
    if processed_files is None:
        processed_files = set()

    css_path = (base_path / css_path).resolve()
    if css_path in processed_files:
        return ""  # Avoid circular imports
    processed_files.add(css_path)

    if not css_path.exists():
        return ""

    css_text = read_file(css_path)

    # Handle @import statements recursively
    def import_replacer(match):
        import_url = match.group(1).strip('\'"')
        # Only process local imports
        if import_url.startswith("http") or import_url.startswith("data:"):
            return match.group(0)
        imported_css = inline_css(import_url, css_path.parent, processed_files)
        return imported_css

    css_text = re.sub(r'@import\s+url\(([^)]+)\);?', import_replacer, css_text)
    css_text = re.sub(r'@import\s+["\']([^"\']+)["\'];?', import_replacer, css_text)

    # Fix url(...) paths: copy assets and rewrite URLs to new assets folder
    def url_replacer(match):
        orig_url = match.group(1).strip('\'"')
        if orig_url.startswith("data:") or orig_url.startswith("http"):
            return f"url({orig_url})"
        asset_path = (css_path.parent / orig_url).resolve()
        copied = copy_asset(asset_path)
        if copied:
            return f"url({copied.as_posix()})"
        else:
            return f"url({orig_url})"

    css_text = re.sub(r'url\(([^)]+)\)', url_replacer, css_text)

    # Compress CSS
    return compress(css_text)

def inline_assets(html_path):
    html_path = html_path.resolve()
    html = read_file(html_path)
    soup = BeautifulSoup(html, "html.parser")
    base_path = html_path.parent

    # Inline <link rel=stylesheet> (CSS)
    for link in soup.find_all("link", rel="stylesheet"):
        href = link.get("href", "")
        if href.startswith("http"):
            # External CSS: skip or remove?
            link.decompose()
            continue
        try:
            css_code = inline_css(href, base_path)
            style_tag = soup.new_tag("style")
            style_tag.string = css_code
            link.replace_with(style_tag)
        except Exception:
            link.decompose()

    # Inline <script src=...>
    for script in soup.find_all("script", src=True):
        src = script["src"]
        if src.startswith("http"):
            # Remove external scripts (or keep if needed)
            script.decompose()
            continue
        js_path = (base_path / src).resolve()
        if js_path.exists():
            try:
                js_code = jsmin(read_file(js_path))
                new_script = soup.new_tag("script")
                new_script.string = js_code
                script.replace_with(new_script)
            except Exception:
                script.decompose()
        else:
            script.decompose()

    # Inline images <img>
    for img in soup.find_all("img", src=True):
        src = img["src"]
        if src.startswith("http"):
            img.decompose()
            continue
        img_path = (base_path / src).resolve()
        if img_path.exists() and img_path.is_file():
            try:
                mime = f"image/{img_path.suffix.lstrip('.')}"
                b64 = base64.b64encode(img_path.read_bytes()).decode()
                img["src"] = f"data:{mime};base64,{b64}"
                if "srcset" in img.attrs:
                    del img.attrs["srcset"]
            except Exception:
                img.decompose()
        else:
            img.decompose()

    # Rewrite other src attributes (audio, video, source, iframe, etc)
    for tag in soup.find_all(src=True):
        src = tag["src"]
        if src.startswith("http") or src.startswith("data:"):
            continue
        asset_path = (base_path / src).resolve()
        copied = copy_asset(asset_path)
        if copied:
            tag["src"] = copied.as_posix()
        else:
            tag.decompose()

    # Rewrite href attributes for fonts, icons, etc.
    for tag in soup.find_all(href=True):
        href = tag["href"]
        if href.startswith("http") or href.startswith("data:") or href.startswith("#"):
            continue
        asset_path = (base_path / href).resolve()
        ext = asset_path.suffix.lower()
        if ext in COPY_EXTENSIONS:
            copied = copy_asset(asset_path)
            if copied:
                tag["href"] = copied.as_posix()
            else:
                tag.decompose()

    # Minify final HTML output
    minified_html = minify(str(soup), remove_empty_space=True, remove_comments=True)

    return minified_html

def process_all_pages():
    for root, _, files in os.walk(SRC_DIR):
        root_path = Path(root)
        # Skip output folder to prevent recursion
        if root_path == OUT_DIR or OUT_DIR in root_path.parents:
            continue
        for file in files:
            if file.lower().endswith((".html", ".php")):
                html_path = root_path / file
                rel_path = html_path.relative_to(SRC_DIR)
                out_path = OUT_DIR / rel_path
                try:
                    compiled_html = inline_assets(html_path)
                    write_file(out_path.with_suffix(".html"), compiled_html)
                    print(f"✅ Compiled: {out_path.with_suffix('.html')}")
                except Exception as e:
                    print(f"❌ Error processing {html_path}: {e}")

if __name__ == "__main__":
    process_all_pages()
