import os
import re
from pathlib import Path

# Root directory to scan & fix
ROOT_DIR = Path("/home/josefkulovany")  # change this to your root

# Extensions of pages to scan
PAGE_EXTS = {".html", ".php"}

# Image file extensions to recognize
IMG_EXTS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".svg", ".bmp", ".tiff"}

# Regex pattern to find image refs in HTML and CSS styles (src="...", url(...))
IMG_REF_PATTERN = re.compile(
    r"""(?:
        src\s*=\s*["']([^"']+)["'] |      # src="..."
        url\(\s*['"]?([^"')]+)['"]?\s*\)  # url('...')
    )""",
    re.IGNORECASE | re.VERBOSE,
)

def find_all_images(root):
    """Build a dict: filename -> list of absolute image paths found on disk"""
    images = {}
    for img_path in root.rglob("*"):
        if img_path.is_file() and img_path.suffix.lower() in IMG_EXTS:
            name = img_path.name
            images.setdefault(name, []).append(img_path.resolve())
    return images

def make_relative_path(from_path, to_path):
    """Make a relative path from from_path parent to to_path"""
    return os.path.relpath(to_path, start=from_path.parent)

def fix_image_paths_in_file(file_path, images_on_disk):
    """Scan and fix image references in one HTML/PHP file."""
    content = file_path.read_text(encoding="utf-8", errors="ignore")
    changed = False

    def replacement(match):
        # Extract image URL from src or url()
        img_ref = match.group(1) or match.group(2)
        img_ref_clean = img_ref.split("?")[0].split("#")[0]  # Remove query/fragment
        img_name = os.path.basename(img_ref_clean)

        # Check if referenced path exists relative to page
        ref_path = (file_path.parent / img_ref_clean).resolve()
        if ref_path.exists():
            # Image found at current reference, no change needed
            return match.group(0)

        # Image file missing at current path, try to find it by filename
        candidates = images_on_disk.get(img_name)
        if not candidates:
            # No image found on disk by that name; keep original
            return match.group(0)

        # Choose the best candidate — simplest: first found
        real_path = candidates[0]
        rel_path = make_relative_path(file_path, real_path)

        # Replace old image ref with new relative path
        # Preserve the attribute (src= or url()) from original text
        if match.group(1):  # src="..."
            new_ref = f'src="{rel_path}"'
        else:  # url(...)
            new_ref = f'url("{rel_path}")'

        nonlocal changed
        changed = True
        return new_ref

    # Replace all image refs in file content
    new_content = IMG_REF_PATTERN.sub(replacement, content)

    if changed:
        print(f"Fixed image references in {file_path}")
        file_path.write_text(new_content, encoding="utf-8")

def main():
    print(f"Scanning {ROOT_DIR} for images...")
    images_on_disk = find_all_images(ROOT_DIR)
    print(f"Found {sum(len(v) for v in images_on_disk.values())} images on disk.")

    # Scan all pages and fix image refs
    for page_path in ROOT_DIR.rglob("*"):
        if page_path.suffix.lower() in PAGE_EXTS and page_path.is_file():
            fix_image_paths_in_file(page_path, images_on_disk)

    print("Done fixing image links!")

if __name__ == "__main__":
    main()
