import numpy as np
import pandas as pd
from scipy.optimize import minimize, differential_evolution
from tqdm import tqdm
from joblib import Parallel, delayed
import logging
import time
import matplotlib.pyplot as plt
import os
import signal
import sys

# Set up logging
logging.basicConfig(filename='symbolic_cosmo_fit_revised.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', force=True)

# Constants
phi = (1 + np.sqrt(5)) / 2
sqrt_5 = np.sqrt(5)
base = 10000  # As specified in the tree structure
PRIMES = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71][:10000]  # Simplified for testing
fib_cache = {}

def fib_real(n):
    if n in fib_cache:
        return fib_cache[n]
    if n > 100 or n < -100:
        return 0.0  # Prevent overflow for large |n|
    term1 = phi**n / np.sqrt(5)
    term2 = ((1/phi)**n) * np.cos(np.pi * n)
    result = term1 - term2
    fib_cache[n] = result
    return result

def D_domain(n, beta, Omega, k_exponent, r=1.0, scale=1.0):
    try:
        n_plus_beta = n + beta
        if abs(n_plus_beta) > 50:  # Cap to prevent overflow
            return 1e-30
        Fn_beta = fib_real(n_plus_beta)
        dyadic = base ** n_plus_beta
        phi_power = phi ** (k_exponent * n_plus_beta)
        val = sqrt_5 * Omega * phi_power * dyadic * Fn_beta
        val = scale * np.maximum(val, 1e-30)
        return np.sqrt(val) / r
    except Exception as e:
        logging.warning(f"D_domain error for n={n}, beta={beta}: {e}")
        return 1e-30

def invert_D_domain(value, Omega, k_exponent, r=1.0, scale=1.0, max_n=1000, steps=1000):
    candidates = []
    log_val = np.log10(max(abs(value), 1e-30))
    scale_factors = np.logspace(max(log_val - 5, -20), min(log_val + 5, 20), num=20)
    n_values = np.linspace(-10, 10, steps)  # Narrowed range for stability
    try:
        for n in n_values:
            for beta in np.linspace(0, 1, 10):
                for dynamic_scale in scale_factors:
                    val = D_domain(n, beta, Omega, k_exponent, r, scale * dynamic_scale)
                    if val is not None and np.isfinite(val):
                        diff = abs(val - abs(value))
                        candidates.append((diff, n, beta, dynamic_scale))
                    val_inv = 1 / D_domain(n, beta, Omega, k_exponent, r, scale * dynamic_scale)
                    if val_inv is not None and np.isfinite(val_inv):
                        diff = abs(val_inv - abs(value))
                        candidates.append((diff, n, beta, dynamic_scale))
        if not candidates:
            logging.error(f"invert_D_domain: No valid candidates for value {value}")
            return None, None, None, None
        candidates = sorted(candidates, key=lambda x: x[0])[:10]
        valid_vals = [D_domain(n, beta, Omega, k_exponent, r, scale * s) if x[0] < 1e-10 else 1/D_domain(n, beta, Omega, k_exponent, r, scale * s)
                      for x, n, beta, s in candidates]
        valid_vals = [v for v in valid_vals if v is not None and np.isfinite(v)]
        emergent_uncertainty = np.std(valid_vals) if len(valid_vals) > 1 else abs(valid_vals[0]) * 0.01 if valid_vals else 1e-10
        best = candidates[0]
        return best[1], best[2], best[3], emergent_uncertainty
    except Exception as e:
        logging.error(f"invert_D_domain failed for value {value}: {e}")
        return None, None, None, None

def parse_categorized_codata(filename):
    try:
        df = pd.read_csv(filename, sep='\t', header=0,
                         names=['name', 'value', 'uncertainty', 'unit', 'category'],
                         dtype={'name': str, 'value': float, 'uncertainty': float, 'unit': str, 'category': str},
                         na_values=['exact'])
        df['uncertainty'] = df['uncertainty'].fillna(0.0)
        return df
    except FileNotFoundError:
        logging.error(f"Input file {filename} not found")
        raise
    except Exception as e:
        logging.error(f"Error parsing {filename}: {e}")
        raise

def fit_constant(row, Omega, k_exponent, r=1.0, scale=1.0):
    try:
        name, value, uncertainty = row['name'], row['value'], row['uncertainty']
        abs_value = abs(value)
        sign = np.sign(value)
        n, beta, dynamic_scale, emergent_uncertainty = invert_D_domain(abs_value, Omega, k_exponent, r, scale)
        if n is None:
            return {
                'name': name, 'codata_value': value, 'n': None, 'beta': None, 'emergent_value': None,
                'error': None, 'rel_error': None, 'codata_uncertainty': uncertainty, 'emergent_uncertainty': None,
                'scale': None, 'bad_data': True, 'bad_data_reason': 'No valid fit found'
            }
        approx = D_domain(n, beta, Omega, k_exponent, r, scale * dynamic_scale) if value > 0 else 1 / D_domain(n, beta, Omega, k_exponent, r, scale * dynamic_scale)
        approx *= sign
        error = abs(approx - value)
        rel_error = error / max(abs(value), 1e-30) if abs(value) > 0 else np.inf
        bad_data = rel_error > 1e-12 or (emergent_uncertainty is not None and uncertainty is not None and abs(emergent_uncertainty - uncertainty) > 10 * uncertainty)
        bad_data_reason = f"High rel_error ({rel_error:.2e})" if rel_error > 1e-12 else f"Uncertainty deviation" if (emergent_uncertainty is not None and uncertainty is not None and abs(emergent_uncertainty - uncertainty) > 10 * uncertainty) else ""
        return {
            'name': name, 'codata_value': value, 'n': n, 'beta': beta, 'emergent_value': approx,
            'error': error, 'rel_error': rel_error, 'codata_uncertainty': uncertainty,
            'emergent_uncertainty': emergent_uncertainty, 'scale': scale * dynamic_scale,
            'bad_data': bad_data, 'bad_data_reason': bad_data_reason
        }
    except Exception as e:
        logging.error(f"fit_constant failed for {name}: {e}")
        return {
            'name': name, 'codata_value': value, 'n': None, 'beta': None, 'emergent_value': None,
            'error': None, 'rel_error': None, 'codata_uncertainty': uncertainty, 'emergent_uncertainty': None,
            'scale': None, 'bad_data': True, 'bad_data_reason': f"Processing error: {str(e)}"
        }

def symbolic_fit_constants(df, constants_params):
    results = []
    for name, params in constants_params.items():
        if name not in df['name'].values:
            continue
        row = df[df['name'] == name].iloc[0]
        result = fit_constant(row, params['Omega'], params['k_exponent'])
        results.append(result)
    return pd.DataFrame(results)

def main():
    signal.signal(signal.SIGINT, lambda sig, frame: sys.exit(0))
    start_time = time.time()

    # Define constants and their parameters from the tree
    constants_params = {
        'Planck constant': {'Omega': phi, 'k_exponent': 6, 'codata_value': 6.62607015e-34},
        'Newtonian constant of gravitation': {'Omega': 6.6743e-11, 'k_exponent': 10, 'codata_value': 6.6743e-11},
        'Boltzmann constant': {'Omega': 1.380649e-23, 'k_exponent': 8, 'codata_value': 1.380649e-23},
        'atomic mass unit': {'Omega': 1.66053906660e-27, 'k_exponent': 7, 'codata_value': 1.66053906660e-27},
        'biological cell length': {'Omega': 1.0e-5, 'k_exponent': 1, 'codata_value': 1.0e-5}
    }

    # Parse CODATA
    input_file = "categorized_allascii.txt"
    if not os.path.exists(input_file):
        raise FileNotFoundError(f"{input_file} not found")
    df = parse_categorized_codata(input_file)
    logging.info(f"Parsed {len(df)} constants")

    # Fit specified constants
    df_results = symbolic_fit_constants(df, constants_params)
    with open("symbolic_fit_results_revised.txt", 'w', encoding='utf-8') as f:
        df_results.to_csv(f, sep="\t", index=False)
    logging.info("Saved results to symbolic_fit_results_revised.txt")

    # Print results
    print("\nFitted Constants:")
    print(df_results[['name', 'codata_value', 'emergent_value', 'n', 'beta', 'error', 'rel_error', 'bad_data', 'bad_data_reason']].to_string(index=False))

    # Plot relative errors
    plt.figure(figsize=(10, 5))
    plt.bar(df_results['name'], df_results['rel_error'], color='skyblue', edgecolor='black')
    plt.xticks(rotation=45)
    plt.title('Relative Errors for Fitted Constants')
    plt.xlabel('Constant Name')
    plt.ylabel('Relative Error')
    plt.grid(True)
    plt.tight_layout()
    plt.savefig('rel_errors_revised.png')
    plt.close()

    logging.info(f"Total runtime: {time.time() - start_time:.2f} seconds")

if __name__ == "__main__":
    try:
        main()
    except KeyboardInterrupt:
        logging.info("KeyboardInterrupt detected. Exiting gracefully.")
        sys.exit(0)