import pandas as pd
import re
import logging
import os

# Set up logging
logging.basicConfig(filename='categorize_allascii.log', level=logging.INFO, 
                    format='%(asctime)s - %(levelname)s - %(message)s')

def categorize_constant(name, unit):
    lname = name.lower()
    lunit = unit.lower()

    if "ratio" in lname:
        return "ratio"
    elif "mass" in lname and "equivalent" in lname:
        return "mass-energy equivalent"
    elif "mass" in lname:
        return "mass"
    elif "charge" in lname:
        return "charge"
    elif "mag. mom." in lname or "magneton" in lname:
        return "magnetic"
    elif "planck" in lname:
        return "Planck-related"
    elif "electron volt" in lname or "ev" in lunit:
        return "energy (eV)"
    elif "joule" in lname or "j" in lunit:
        return "energy (J)"
    elif "wavelength" in lname or "m^-1" in lunit:
        return "wavelength/frequency"
    elif "frequency" in lname or "hz" in lunit:
        return "frequency"
    elif "kelvin" in lname or "k" == lunit:
        return "temperature"
    elif "constant" in lname:
        return "named constant"
    elif "time" in lname or lunit == "s":
        return "time"
    elif "velocity" in lname or "m s^-1" in lunit:
        return "velocity"
    elif "current" in lname or lunit == "a":
        return "current"
    elif "permittivity" in lname or "permeability" in lname or "ohm" in lunit:
        return "electromagnetic property"
    elif "molar" in lname or "mol" in lunit:
        return "molar property"
    elif "length" in lname or lunit == "m":
        return "length"
    elif "action" in lname:
        return "action"
    elif "unit" in lname:
        return "unit definition"
    else:
        return "other"

def parse_and_categorize_constants(filename):
    constants = []
    pattern = re.compile(r"^\s*(.*?)\s{2,}(\-?\d+\.?\d*(?:\s*[Ee][\+\-]?\d+)?(?:\.\.\.)?)\s+(\-?\d+\.?\d*(?:\s*[Ee][\+\-]?\d+)?|exact)\s+(\S.*)")
    try:
        with open(filename, "r") as f:
            for line in f:
                if line.startswith("Quantity") or line.strip() == "" or line.startswith("-"):
                    continue
                m = pattern.match(line)
                if m:
                    name, value_str, uncert_str, unit = m.groups()
                    try:
                        value = float(value_str.replace("...", "").replace(" ", ""))
                        uncertainty = 0.0 if uncert_str == "exact" else float(uncert_str.replace("...", "").replace(" ", ""))
                        category = categorize_constant(name.strip(), unit.strip())
                        constants.append({
                            "name": name.strip(),
                            "value": value,
                            "uncertainty": uncertainty,
                            "unit": unit.strip(),
                            "category": category
                        })
                    except Exception as e:
                        logging.warning(f"Failed parsing line: {line.strip()} - {e}")
                        continue
    except FileNotFoundError:
        logging.error(f"Input file {filename} not found")
        raise
    except Exception as e:
        logging.error(f"Error reading file {filename}: {e}")
        raise
    return pd.DataFrame(constants)

def main():
    input_file = "allascii.txt"
    output_file = "categorized_allascii.txt"
    
    try:
        # Check if input file exists
        if not os.path.exists(input_file):
            raise FileNotFoundError(f"{input_file} not found in the current directory")
        
        # Parse and categorize constants
        logging.info(f"Parsing and categorizing constants from {input_file}")
        df = parse_and_categorize_constants(input_file)
        
        # Save to tab-separated text file
        df.to_csv(output_file, sep="\t", index=False, 
                  columns=["name", "value", "uncertainty", "unit", "category"],
                  header=["Quantity", "Value", "Uncertainty", "Unit", "Category"])
        logging.info(f"Saved categorized constants to {output_file}")
        print(f"Successfully generated {output_file} with {len(df)} constants")
        
    except Exception as e:
        logging.error(f"Error in main: {e}")
        print(f"Error: {e}")
        raise

if __name__ == "__main__":
    main()