import pandas as pd
import numpy as np

# --- Configuration for loading the data ---
file_path = "symbolic_fit_results_emergent.txt"

# --- Load the data ---
try:
    df_results = pd.read_csv(file_path, sep="\t")
except FileNotFoundError:
    print(f"Error: The file '{file_path}' was not found. Please ensure it's in the same directory as this script.")
    exit()
except Exception as e:
    print(f"An error occurred while loading the file: {e}")
    exit()

# --- Prepare data for outlier detection ---
# Convert 'error' column to numeric, coercing errors to NaN
df_results['error'] = pd.to_numeric(df_results['error'], errors='coerce')

# Drop rows where 'error' is NaN, as these cannot be used for outlier detection
df_results_cleaned = df_results.dropna(subset=['error'])

if df_results_cleaned.empty:
    print("No valid data available for outlier analysis after cleaning. Check your input file.")
else:
    # --- Identify Outliers using IQR method on 'error' ---
    Q1 = df_results_cleaned['error'].quantile(0.25)
    Q3 = df_results_cleaned['error'].quantile(0.75)
    IQR = Q3 - Q1

    # Define outlier bounds (1.5 * IQR rule)
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR

    # Filter for outliers (values below lower_bound or above upper_bound)
    outliers = df_results_cleaned[(df_results_cleaned['error'] < lower_bound) | (df_results_cleaned['error'] > upper_bound)]

    # --- Print the list of outliers ---
    if not outliers.empty:
        print("--- List of Outliers (based on Absolute Error, IQR Method) ---")
        # Print selected columns for better readability, sorted by error (highest first)
        print(outliers[['name', 'value', 'unit', 'error', 'uncertainty', 'rel_error', 'bad_data_reason']].sort_values('error', ascending=False).to_string(index=False))
    else:
        print("No outliers detected using the 1.5 * IQR rule for absolute errors.")