In [5]:
import nltk
import networkx as nx
import matplotlib.pyplot as plt
import powerlaw
import community as community_louvain
import matplotlib.cm as cm

In [6]:
# Open the file with character names
with open('characters.txt', 'r', encoding='utf-8') as f:
    characters = [line.strip().split(',') for line in f]
# Initialize a graph
G = nx.Graph()
    
# Add nodes to the graph
for character_list in characters:
    G.add_node(character_list[0])

# Open the second file
with open('count_of_monte_cristo.txt', 'r', encoding='utf-8') as f:
    # Read the entire file content
    text = f.read()

# Tokenize the text into sentences
sentences = nltk.sent_tokenize(text)

# For each sentence, check if any of the aliases are in the sentence
for sentence in sentences:
    # Find all characters present in the sentence
    characters_in_sentence = [character_list[0] for character_list in characters if any(alias in sentence for alias in character_list)]
    # Add edges between all pairs of characters in the sentence
    for i in range(len(characters_in_sentence)):
        for j in range(i+1, len(characters_in_sentence)):
            if characters_in_sentence[i] != characters_in_sentence[j]:  # Check if the characters are different
                # Add the edge without weight
                G.add_edge(characters_in_sentence[i], characters_in_sentence[j])

In [81]:
# Calculate average clustering coefficient
avg_clustering_coefficient = nx.average_clustering(G)

# Calculate diameter
diameter = nx.diameter(G)

# Print the results
print("Average Clustering Coefficient:", avg_clustering_coefficient)
print("Diameter:", diameter)


Average Clustering Coefficient: 0.5939336224917304
Diameter: 3


In [101]:

# Initialize variables to store the best matching p value and the difference in average clustering coefficient and diameter
best_p = None
min_diff = float('inf')
number_of_nodes = G.number_of_nodes()
# Get the degrees of all nodes
degrees = [deg for node, deg in G.degree()]

# Calculate the average degree
average_degree = sum(degrees) / len(degrees)

# Iterate over a range of p values
for p in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]:
    # Generate a Watts-Strogatz graph
    WS = nx.watts_strogatz_graph(number_of_nodes, round(average_degree), p)
    # Calculate the average clustering coefficient
    avg_clustering_coefficient_WS = nx.average_clustering(WS)
    # Calculate the diameter
    diameter_WS = nx.diameter(WS)

    diff = abs(avg_clustering_coefficient_WS - avg_clustering_coefficient) + abs(diameter_WS - diameter)
    # Check if the current p value is better than the previous best p value
    if diff < min_diff:
        best_p = p
        min_diff = diff


# Print the best matching p value
print("Best matching p value for Watts Strogatz graph:", best_p, "with a difference of", diff)
diff = None
# Iterate over a range of p values
for p in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]:
    # Generate an Erdos-Rényi graph
    ER = nx.erdos_renyi_graph(number_of_nodes, p)

    # Calculate the average clustering coefficient
    avg_clustering_coefficient_ER = nx.average_clustering(ER)
    # Calculate the diameter
    diameter_ER = nx.diameter(ER)

    diff = abs(avg_clustering_coefficient_ER - avg_clustering_coefficient) + abs(diameter_ER - diameter)
    # Check if the current p value is better than the previous best p value
    if diff < min_diff:
        best_p = p
        min_diff = diff

# Print the best matching p value
print("Best matching p value for Erdos-Rényi graph:", best_p, "with a difference of", diff)




Best matching p value for Watts Strogatz graph: with a difference of 0.3 0.42677502351494967
Best matching p value for Erdos-Rényi graph: 0.3 with a difference of 1.2933392356965514
