import os
import re
from pathlib import Path
from bs4 import BeautifulSoup

# Supported image/media extensions
MEDIA_EXTS = {'.png', '.jpg', '.jpeg', '.gif', '.svg', '.webp', '.bmp', '.ico'}

# Directory where symlinks to all found media will be stored
SYMLINK_DEST = Path("public_assets")
SYMLINK_DEST.mkdir(exist_ok=True)

# Root directory to scan
ROOT_DIR = Path(".").resolve()

# Patterns to extract src/href/urls
SRC_HREF_REGEX = re.compile(r'''(?:src|href)\s*=\s*["']([^"']+)["']''')
URL_REGEX = re.compile(r'url\(["\']?([^"\')]+)["\']?\)')

def find_media_references(filepath):
    """Extract all image/media paths from a file."""
    refs = set()
    with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
        content = f.read()

        # Use BeautifulSoup for structured HTML
        if filepath.suffix in {".html", ".php"}:
            soup = BeautifulSoup(content, "html.parser")
            for tag in soup.find_all(["img", "link", "script"]):
                for attr in ["src", "href"]:
                    val = tag.get(attr)
                    if val and any(val.lower().endswith(ext) for ext in MEDIA_EXTS):
                        refs.add(val)
        # Also check CSS-style `url(...)` refs
        refs.update(URL_REGEX.findall(content))

    return refs

def normalize_ref(ref, base_path):
    """Resolve relative path refs to absolute ones."""
    ref = ref.split("?")[0].split("#")[0]  # Strip query/fragments
    ref_path = (base_path / ref).resolve()
    return ref_path if ref_path.exists() else None

def symlink_media(ref_path):
    """Create a symlink in the public assets folder if it doesn't exist."""
    try:
        if ref_path.suffix.lower() not in MEDIA_EXTS:
            return
        target = SYMLINK_DEST / ref_path.name
        if not target.exists():
            os.symlink(ref_path, target)
            print(f"✅ Linked: {target} → {ref_path}")
    except Exception as e:
        print(f"⚠️ Failed to link {ref_path}: {e}")

def main():
    for root, dirs, files in os.walk(ROOT_DIR):
        for file in files:
            if file.endswith((".html", ".php")):
                filepath = Path(root) / file
                refs = find_media_references(filepath)
                for ref in refs:
                    ref_path = normalize_ref(ref, filepath.parent)
                    if ref_path:
                        symlink_media(ref_path)

if __name__ == "__main__":
    main()
