import json
import re
from collections import defaultdict
import networkx as nx

# Function to normalize names
def normalize_name(name):
    # Normalize: lowercase, remove accents, handle variations
    name = re.sub(r'[^\w\s]', '', name.lower().replace('é', 'e').replace('á', 'a').replace('ñ', 'n'))
    name_map = {
        'leo messi': 'lionel messi',
        'kylian mbappe': 'kylian mbappé',
        'beyonce': 'beyoncé',
        'amanda sc gorman': 'amanda gorman',
        'nikkietutorials': 'nikkie de jager',
        'the stokes twins': 'stokes twins',
        'kourtney kardashian barker': 'kourtney kardashian'
    }
    return name_map.get(name, name)

# Load JSON data
def load_json(file_path):
    try:
        with open(file_path, 'r') as f:
            return json.load(f)
    except FileNotFoundError:
        print(f"Error: {file_path} not found.")
        return {}

# Combine connections and track node occurrences across files
def combine_connections(json_files):
    G = nx.Graph()
    node_files = defaultdict(set)  # Track which files each node appears in
    
    for file_idx, file in enumerate(json_files):
        data = load_json(file)
        for person, friends in data.items():
            norm_person = normalize_name(person)
            node_files[norm_person].add(file_idx)
            for friend in friends:
                norm_friend = normalize_name(friend)
                node_files[norm_friend].add(file_idx)
                G.add_edge(norm_person, norm_friend)  # Add edge to graph
    
    return G, node_files

# Find bridge connections to connect islands
def find_bridge_connections(G, node_files, json_files):
    # Identify connected components (islands)
    components = list(nx.connected_components(G))
    if len(components) <= 1:
        print("No islands to bridge; graph is fully connected.")
        return G
    
    print(f"Found {len(components)} islands.")
    
    # Create a new graph with bridge connections
    bridged_G = nx.Graph()
    
    # Add all edges from original graph
    for u, v in G.edges():
        bridged_G.add_edge(u, v)
    
    # Find potential bridge edges: nodes in multiple files that connect different components
    component_map = {}
    for i, comp in enumerate(components):
        for node in comp:
            component_map[node] = i
    
    bridge_edges = []
    for node in node_files:
        if len(node_files[node]) >= 2:  # Node appears in 2+ files
            neighbors = list(G.neighbors(node))
            for i, u in enumerate(neighbors):
                for v in neighbors[i+1:]:
                    if component_map.get(u, -1) != component_map.get(v, -1):
                        # Edge u-v would connect different components
                        if not bridged_G.has_edge(u, v):
                            bridge_edges.append((u, v))
    
    # Add bridge edges to the graph
    for u, v in bridge_edges:
        bridged_G.add_edge(u, v, bridge=True)
    
    print(f"Added {len(bridge_edges)} bridge edges.")
    return bridged_G

# Generate DOT file for the bridged graph
def generate_dot_file(G, output_file, max_nodes=500):
    with open(output_file, 'w') as f:
        f.write("graph bridged_friendships {\n")
        f.write("    rankdir=LR;\n")
        f.write("    node [shape=circle, style=filled, fillcolor=lightblue];\n")
        f.write("    edge [color=navy, penwidth=1];\n")
        
        # Highlight bridge edges
        f.write('    edge [color=red, penwidth=2, style=dashed, label="bridge"];\n')
        for u, v, data in G.edges(data=True):
            if data.get('bridge', False):
                f.write(f'    "{u}" -- "{v}" [color=red, penwidth=2, style=dashed, label="bridge"];\n')
        
        # Regular edges
        f.write("    edge [color=navy, penwidth=1, style=solid];\n")
        nodes_written = set()
        edges_written = set()
        node_count = 0
        
        for u, v, data in G.edges(data=True):
            if node_count >= max_nodes:
                break
            if not data.get('bridge', False):  # Skip bridge edges (already written)
                if (u, v) not in edges_written and (v, u) not in edges_written:
                    nodes_written.add(u)
                    nodes_written.add(v)
                    f.write(f'    "{u}" -- "{v}";\n')
                    edges_written.add((u, v))
                    node_count += 1
        
        f.write("}\n")
    print(f"DOT file generated: {output_file}")

# Main execution
if __name__ == "__main__":
    json_files = [
        'friends1.json',
        'friends2.json',
        'friends3.json',
        'friends4.json',
        'friends5.json',
        'friends6.json'
    ]
    
    # Combine connections and track node occurrences
    G, node_files = combine_connections(json_files)
    
    # Find and add bridge connections
    bridged_G = find_bridge_connections(G, node_files, json_files)
    
    # Generate DOT file for the bridged graph
    generate_dot_file(bridged_G, 'bridged_friendship_graph.dot')