import json
import re
from collections import defaultdict
import networkx as nx

# Function to normalize names
def normalize_name(name):
    # Normalize: lowercase, remove accents, handle variations
    name = re.sub(r'[^\w\s]', '', name.lower().replace('é', 'e').replace('á', 'a').replace('ñ', 'n'))
    name_map = {
        'leo messi': 'lionel messi',
        'kylian mbappe': 'kylian mbappé',
        'beyonce': 'beyoncé',
        'amanda sc gorman': 'amanda gorman',
        'nikkietutorials': 'nikkie de jager',
        'the stokes twins': 'stokes twins',
        'kourtney kardashian barker': 'kourtney kardashian'
    }
    return name_map.get(name, name)

# Load JSON data
def load_json(file_path):
    try:
        with open(file_path, 'r') as f:
            return json.load(f)
    except FileNotFoundError:
        print(f"Error: {file_path} not found.")
        return {}

# Combine connections and track node occurrences
def combine_connections(json_files):
    G = nx.Graph()
    node_files = defaultdict(set)
    
    for file_idx, file in enumerate(json_files):
        data = load_json(file)
        for person, friends in data.items():
            norm_person = normalize_name(person)
            node_files[norm_person].add(file_idx)
            for friend in friends:
                norm_friend = normalize_name(friend)
                node_files[norm_friend].add(file_idx)
                G.add_edge(norm_person, norm_friend)
    
    return G, node_files

# Find bridge connections to connect islands
def find_bridge_connections(G, node_files, json_files):
    components = list(nx.connected_components(G))
    print(f"Found {len(components)} initial islands.")
    
    if len(components) <= 1:
        print("Graph is fully connected; no bridging needed.")
        return G
    
    bridged_G = G.copy()
    
    # Prioritize high-degree nodes and cross-file nodes
    degree = G.degree()
    cross_file_nodes = [node for node, files in node_files.items() if len(files) >= 2]
    high_degree_nodes = sorted(G.nodes(), key=lambda x: degree[x], reverse=True)[:20]
    priority_nodes = list(set(cross_file_nodes + high_degree_nodes))
    
    # Ensure Barack Obama and Arnold Schwarzenegger are prioritized
    target_nodes = ['barack obama', 'arnold schwarzenegger']
    priority_nodes = list(set(priority_nodes + target_nodes))
    
    # Map nodes to components
    component_map = {}
    for i, comp in enumerate(components):
        for node in comp:
            component_map[node] = i
    
    # Find shortest paths to connect components, prioritizing target nodes
    bridge_edges = []
    connected_components = set()
    
    for target in target_nodes:
        if target not in G:
            continue
        target_comp = component_map.get(target)
        if target_comp is None:
            continue
        for node in priority_nodes:
            if node == target or node not in G:
                continue
            node_comp = component_map.get(node)
            if node_comp != target_comp:
                try:
                    path = nx.shortest_path(G, target, node)
                    for j in range(len(path)-1):
                        u, v = path[j], path[j+1]
                        if not bridged_G.has_edge(u, v):
                            bridge_edges.append((u, v))
                    connected_components.add(node_comp)
                except nx.NetworkXNoPath:
                    continue
    
    # Connect remaining components via priority nodes
    for i, comp1 in enumerate(components):
        if i in connected_components:
            continue
        for comp2 in components[i+1:]:
            if comp2 in connected_components:
                continue
            min_path = None
            min_length = float('inf')
            for node1 in comp1:
                if node1 not in priority_nodes:
                    continue
                for node2 in comp2:
                    if node2 not in priority_nodes:
                        continue
                    try:
                        path = nx.shortest_path(G, node1, node2)
                        if len(path) < min_length:
                            min_length = len(path)
                            min_path = path
                    except nx.NetworkXNoPath:
                        continue
            if min_path:
                for j in range(len(min_path)-1):
                    u, v = min_path[j], min_path[j+1]
                    if not bridged_G.has_edge(u, v):
                        bridge_edges.append((u, v))
                connected_components.add(component_map[min_path[0]])
    
    # Add bridge edges
    for u, v in bridge_edges:
        bridged_G.add_edge(u, v, bridge=True)
    
    print(f"Added {len(bridge_edges)} bridge edges.")
    
    # Check remaining islands
    final_components = list(nx.connected_components(bridged_G))
    print(f"After bridging, {len(final_components)} islands remain.")
    if len(final_components) > 1:
        print("Remaining islands (first few nodes per island):")
        for i, comp in enumerate(final_components):
            print(f"Island {i+1}: {list(comp)[:5]}")
            if 'barack obama' in comp:
                print(f"  Note: Barack Obama is in Island {i+1}")
            if 'arnold schwarzenegger' in comp:
                print(f"  Note: Arnold Schwarzenegger is in Island {i+1}")
    
    return bridged_G

# Generate DOT file
def generate_dot_file(G, output_file, max_nodes=2000):
    with open(output_file, 'w') as f:
        f.write("graph bridged_friendships {\n")
        f.write("    rankdir=LR;\n")
        f.write("    node [shape=circle, style=filled, fillcolor=lightblue];\n")
        f.write('    "barack obama" [fillcolor=lightgreen];\n')
        f.write('    "arnold schwarzenegger" [fillcolor=lightgreen];\n')
        f.write("    edge [color=navy, penwidth=1];\n")
        
        # Bridge edges
        f.write('    edge [color=red, penwidth=2, style=dashed, label="bridge"];\n')
        for u, v, data in G.edges(data=True):
            if data.get('bridge', False):
                f.write(f'    "{u}" -- "{v}" [color=red, penwidth=2, style=dashed, label="bridge"];\n')
        
        # Regular edges
        f.write("    edge [color=navy, penwidth=1, style=solid];\n")
        nodes_written = set()
        edges_written = set()
        node_count = 0
        
        for u, v, data in G.edges(data=True):
            if node_count >= max_nodes:
                break
            if not data.get('bridge', False):
                if (u, v) not in edges_written and (v, u) not in edges_written:
                    nodes_written.add(u)
                    nodes_written.add(v)
                    f.write(f'    "{u}" -- "{v}";\n')
                    edges_written.add((u, v))
                    node_count += 1
        
        f.write("}\n")
    print(f"DOT file generated: {output_file}")

# Main execution
if __name__ == "__main__":
    json_files = [
        'friends1.json',
        'friends2.json',
        'friends3.json',
        'friends4.json',
        'friends5.json',
        'friends6.json'
    ]
    
    # Combine connections
    G, node_files = combine_connections(json_files)
    
    # Bridge islands
    bridged_G = find_bridge_connections(G, node_files, json_files)
    
    # Generate DOT file
    generate_dot_file(bridged_G, 'bridged_friendship_graph.dot')