import numpy as np
import pandas as pd
import re

# Golden ratio constant
phi = (1 + np.sqrt(5)) / 2

# First 50 primes for symbolic entropy indexing
PRIMES = [
    2, 3, 5, 7, 11, 13, 17, 19, 23, 29,
    31, 37, 41, 43, 47, 53, 59, 61, 67, 71,
    73, 79, 83, 89, 97, 101, 103, 107, 109, 113,
    127, 131, 137, 139, 149, 151, 157, 163, 167, 173,
    179, 181, 191, 193, 197, 199, 211, 223, 227, 229
]

def fib_real(n):
    # Real-valued generalized Fibonacci using Binet's formula and cosine term
    from math import cos, pi, sqrt
    phi_inv = 1 / phi
    term1 = phi**n / sqrt(5)
    term2 = (phi_inv**n) * cos(pi * n)
    return term1 - term2

def D(n, beta, r=1.0, k=1.0, Omega=1.0, base=2):
    Fn_beta = fib_real(n + beta)
    idx = int(np.floor(n + beta) + len(PRIMES)) % len(PRIMES)
    Pn_beta = PRIMES[idx]
    dyadic = base ** (n + beta)
    val = phi * Fn_beta * dyadic * Pn_beta * Omega
    val = np.maximum(val, 1e-30)  # Avoid underflow to zero
    return np.sqrt(val) * (r ** k)

def invert_D(value, r=1.0, k=1.0, Omega=1.0, base=2, max_n=50, steps=200):
    candidates = []
    for n in np.linspace(0, max_n, steps):
        for beta in np.linspace(0, 1, 10):
            val = D(n, beta, r, k, Omega, base)
            diff = abs(val - value)
            candidates.append((diff, n, beta))
    best = min(candidates, key=lambda x: x[0])
    return best[1], best[2]

def parse_codata_ascii(filename):
    constants = []
    # Pattern matches: Name (up to double spaces), Value, Uncertainty, Unit
    pattern = re.compile(r"^\s*(.*?)\s{2,}([0-9Ee\+\-\.]+)\s+([0-9Ee\+\-\.]+|exact)\s+(\S+)")
    with open(filename, "r") as f:
        for line in f:
            if line.startswith("Quantity") or line.strip() == "" or line.startswith("-"):
                continue
            m = pattern.match(line)
            if m:
                name, value_str, uncert_str, unit = m.groups()
                try:
                    value = float(value_str.replace("e", "E"))
                    uncertainty = None if uncert_str == "exact" else float(uncert_str.replace("e", "E"))
                    constants.append({
                        "name": name.strip(),
                        "value": value,
                        "uncertainty": uncertainty,
                        "unit": unit.strip()
                    })
                except:
                    # Skip unparsable lines
                    continue
    return pd.DataFrame(constants)

def symbolic_fit_all_constants(df, r=1.0, k=1.0, Omega=1.0, base=2):
    results = []
    for _, row in df.iterrows():
        val = row['value']
        if val <= 0 or val > 1e50:
            continue
        try:
            n, beta = invert_D(val, r=r, k=k, Omega=Omega, base=base)
            approx = D(n, beta, r, k, Omega, base)
            error = abs(val - approx)
            results.append({
                "name": row['name'],
                "value": val,
                "unit": row['unit'],
                "n": n,
                "beta": beta,
                "approx": approx,
                "error": error,
                "uncertainty": row['uncertainty']
            })
        except Exception as e:
            # Skip if inversion fails
            print(f"Failed inversion for {row['name']}: {e}")
            continue
    return pd.DataFrame(results)

if __name__ == "__main__":
    print("Parsing CODATA constants from allascii.txt...")
    codata_df = parse_codata_ascii("allascii.txt")

    print(f"Parsed {len(codata_df)} constants.")

    print("Fitting symbolic dimensions to constants...")
    fitted_df = symbolic_fit_all_constants(codata_df)

    # Sort by error ascending for best fits
    fitted_df_sorted = fitted_df.sort_values("error")

    print("\nTop 176 best symbolic fits:")
    print(fitted_df_sorted.head(176).to_string(index=False))

    print("\nTop 176 worst symbolic fits:")
    print(fitted_df_sorted.tail(176).to_string(index=False))
