import json
import re
from collections import defaultdict, Counter

# Function to normalize names
def normalize_name(name):
    # Normalize: lowercase, remove accents, handle variations
    name = re.sub(r'[^\w\s]', '', name.lower().replace('é', 'e').replace('á', 'a').replace('ñ', 'n'))
    name_map = {
        'leo messi': 'lionel messi',
        'kylian mbappe': 'kylian mbappé',
        'beyonce': 'beyoncé',
        'amanda sc gorman': 'amanda gorman',
        'nikkietutorials': 'nikkie de jager',
        'the stokes twins': 'stokes twins',
        'kourtney kardashian barker': 'kourtney kardashian'
    }
    return name_map.get(name, name)

# Load JSON data
def load_json(file_path):
    try:
        with open(file_path, 'r') as f:
            return json.load(f)
    except FileNotFoundError:
        print(f"Error: {file_path} not found.")
        return {}

# Combine connections and track file occurrences
def combine_connections(json_files):
    # Track edges and the files they appear in
    edge_files = defaultdict(list)
    
    for file_idx, file in enumerate(json_files):
        data = load_json(file)
        for person, friends in data.items():
            norm_person = normalize_name(person)
            for friend in friends:
                norm_friend = normalize_name(friend)
                # Create a canonical edge (sort to treat A-B and B-A as the same)
                edge = tuple(sorted([norm_person, norm_friend]))
                edge_files[edge].append(file_idx)
    
    # Filter for edges that appear in at least two files
    shared_connections = defaultdict(set)
    for edge, files in edge_files.items():
        if len(files) >= 2:
            person1, person2 = edge
            shared_connections[person1].add(person2)
            shared_connections[person2].add(person1)  # Ensure mutual connections
    
    return shared_connections

# Generate DOT file for shared connections
def generate_dot_file(connections, output_file, max_nodes=2000):
    with open(output_file, 'w') as f:
        f.write("graph shared_friendships {\n")
        f.write("    rankdir=LR;\n")  # Left-to-right layout
        f.write("    node [shape=circle, style=filled, fillcolor=lightblue];\n")
        f.write("    edge [color=navy];\n")
        
        nodes_written = set()
        edges_written = set()
        node_count = 0
        
        for person, friends in connections.items():
            if node_count >= max_nodes:
                break
            nodes_written.add(person)
            for friend in friends:
                if friend in connections and (person, friend) not in edges_written and (friend, person) not in edges_written:
                    edges_written.add((person, friend))
                    f.write(f'    "{person}" -- "{friend}";\n')
            node_count += 1
        
        f.write("}\n")
    print(f"DOT file generated: {output_file}")

# Main execution
if __name__ == "__main__":
    json_files = [
        'friends1.json',
        'friends2.json',
        'friends3.json',
        'friends4.json',
        'friends5.json',
        'friends6.json'
    ]
    
    # Combine connections, keeping only those in 2+ files
    shared_connections = combine_connections(json_files)
    
    # Generate DOT file for shared connections
    generate_dot_file(shared_connections, 'shared_friendship_graph.dot')