ig.plot(g4,bbox=(0, 0, 600, 400), layout = layout4, edge_width = [log(i["value"])/2+0.5 for i in links4], vertex_label_dist = -3.5, vertex_color = [i["colour"] for i in nodes4 ], vertex_label = [i["name"] if i["colour"] != "#808080" else "" for i in nodes4], vertex_shape = [sexshape(sex_dict[i["name"]]) for i in nodes4 ], vertex_size = [log(1000*x**2 + 1)*3 + 2 for x in g4.eigenvector_centrality()])
# Aggregated graph
ig.plot(g_total, layout = layout_total, edge_width = [log(i["value"])/2+0.5 for i in links_total], vertex_color = [i["colour"] for i in nodes_total ], vertex_shape = [sexshape(i["sex"]) for i in nodes_total ], vertex_size = [log(1000*x**2 + 1)*3 + 2 for x in g_total.eigenvector_centrality()])
#Имя выброса
centr = np.array(g_total.eigenvector_centrality())
nodes_total[centr.argmin()]["name"]
#Для 4ого эпизода
p = g4.density()
n = g4.vcount()
print(f"{p:.2f}", ">", f"{log(n)/n:.2f}")
prob4 = n*exp(-1*(n-1)*p)
#По всем эпизодам
p = g_total.density()
n = g_total.vcount()
print(f"{p:.2f}", ">", f"{log(n)/n:.2f}")
prob_total = n*exp(-1*(n-1)*p)
#Вероятности появления одинокой вершины, если верна модель случайного графа
print(f"{prob4:0.3f}", f"{prob_total:0.3f}")
#Два распределения степеней на соседних графиках
hist_both=pd.DataFrame([[x, "Эпизод IV"] for x in g4.degree()]+[[x, "Все фильмы"] for x in g_total.degree()], columns = ["Degree", 'Граф'])
sns.displot(hist_both, x = "Degree", col = "Граф", stat = "probability", common_norm = False, discrete = True)
gnp4_sample = []
gnp4char = [[], [], [], [], []]#[density, degree, diameter, apl, transitivity]
#[round(g4.density(), 2), round(np.mean(g4.degree()), 2), round(g4.diameter(), 2), round(g4.average_path_length(), 2), round(nx.transitivity(g4.to_networkx()), 2)]
for i in range(1000):
gnp4 = nx.erdos_renyi_graph(g4.vcount(), g4.density())
gnp4 = g.from_networkx(gnp4)
gnp4_sample += gnp4.degree()
gnp4char[0] += [gnp4.density()]
gnp4char[1] += [np.mean(gnp4.degree())]
gnp4char[2] += [gnp4.diameter()]
gnp4char[3] += [gnp4.average_path_length()]
gnp4char[4] +=[nx.transitivity(gnp4.to_networkx())]
#Распределение степеней вершин в Эпизоде IV
hist_gnp4 = pd.DataFrame([[x, "True"] for x in g4.degree()]+[[x, "Model"] for x in gnp4_sample], columns = ["Degree", "Type"])
sns.displot(hist_gnp4,x = "Degree", stat = "probability", hue = "Type", common_norm = False, discrete = True, element = "step")
gr = g4
def error_WS(inp): #inp = [dim, size, nei, p]
WS_sample = []
bins = [-0.5] + [0.5+x for x in range(max(gr.degree())+1)]
for i in range(100):
g_WS = g.Watts_Strogatz(dim = int(inp[0]), size = int(inp[1]), nei = int(inp[2]), p = inp[3])
WS_sample += g_WS.degree()
errors = np.histogram(WS_sample, bins = bins, density = True)[0] - np.histogram(gr.degree(), bins = bins, density = True)[0]
return np.linalg.norm(errors)
WS_sample = []
wschar = [[], [], [], [], []]
for i in range(1000):
g_WS = g.Watts_Strogatz(dim = 1, size = 22, nei = 2, p = 0.853)
WS_sample +=g_WS.degree()
wschar[0] += [g_WS.density()]
wschar[1] += [np.mean(g_WS.degree())]
wschar[2] += [g_WS.diameter()]
wschar[3] += [g_WS.average_path_length()]
wschar[4] +=[nx.transitivity(g_WS.to_networkx())]
#ns.displot(WS_sample, discrete = True, stat = "probability")
WS_sample_transitivity = []
for i in range(1000):
g_WS = g.Watts_Strogatz(dim = 1, size = 22, nei = 2, p = 0.853)
WS_sample_transitivity +=[nx.transitivity(g_WS.to_networkx())]
WS_sample_transitivitydf = pd.DataFrame(WS_sample_transitivity, columns = ["Transitivity"])
core = set()
for i, x in enumerate(nodes4):
if x["colour"] != "#808080":
core.add(i)
edges_in_core = []
for i, x in enumerate(links4):
if x["source"] in core and x["target"] in core:
edges_in_core += [(x["source"], x["target"])]
g_core = g(len(core), edges_in_core)
gr = g4
def error_BA(inp): #inp = [m, power]
BA_sample = []
bins = [-0.5] + [0.5+x for x in range(max(gr.degree())+1)]
for i in range(100):
g_BA = g.Barabasi(n=gr.vcount(),m=int(inp[0]),power = inp[1], start_from=g_core)
BA_sample += g_BA.degree()
errors = np.histogram(BA_sample, bins = bins, density = True)[0] - np.histogram(gr.degree(), bins = bins, density = True)[0]
return np.linalg.norm(errors)
BA_sample = []
bachar = [[], [], [], [], []]
for i in range(1000):
g_BA = g.Barabasi(n=gr.vcount(),m=5,power = 0.0679, start_from=g_core)
BA_sample += g_BA.degree()
bachar[0] += [g_BA.density()]
bachar[1] += [np.mean(g_BA.degree())]
bachar[2] += [g_BA.diameter()]
bachar[3] += [g_BA.average_path_length()]
bachar[4] +=[nx.transitivity(g_BA.to_networkx())]
hist_BA = pd.DataFrame([[x, "True"] for x in gr.degree()]+[[x, "Model"] for x in BA_sample], columns = ["Degree", "Type"])
sns.displot(hist_BA,x = "Degree", stat = "probability", hue = "Type", common_norm = False, discrete = True, element = "step")
from scipy.stats import chisquare
bins = [-0.5] + [0.5+x for x in range(max(gr.degree())+1)]
chisquare(np.histogram(gr.degree(), bins = bins, density = True)[0], np.histogram(BA_sample, bins = bins, density = True)[0])
#Average path length
sns.displot(bachar[2], stat = "probability")
plt.vlines(x = 3, ymin = 0, ymax = 0.5, colors = 'red')
#Average path length
sns.displot(bachar[3], stat = "probability")
plt.vlines(x = 1.91, ymin = 0, ymax = 0.1, colors = 'red')
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(10, 10))
ax1.title.set_text('Betweenness')
ax2.title.set_text('Eigenvector')
ax3.title.set_text('Degree')
ax4.title.set_text('Values')
ig.plot(g4, layout = layout4, target = ax1, edge_width = [log(i["value"])/2+0.5 for i in links4], vertex_size = 20, vertex_color = [cmap(x/max(g4.betweenness())) for x in g4.betweenness()], vertex_shape = [sexshape(sex_dict[i["name"]]) for i in nodes4 ] )
ig.plot(g4, layout = layout4, target = ax2, edge_width = [log(i["value"])/2+0.5 for i in links4], vertex_size = 20, vertex_color = [cmap(x/max(g4.eigenvector_centrality(weights = [y["value"] for y in links4]))) for x in g4.eigenvector_centrality(weights = [x["value"] for x in links4])], vertex_shape = [sexshape(sex_dict[i["name"]]) for i in nodes4 ] )
ig.plot(g4, layout = layout4,target = ax3, edge_width = [log(i["value"])/2+0.5 for i in links4], vertex_size = 20, vertex_color = [cmap(x/max(g4.degree())) for x in g4.degree()], vertex_shape = [sexshape(sex_dict[i["name"]]) for i in nodes4 ] )
ig.plot(g4, layout = layout4,target = ax4, edge_width = [log(i["value"])/2+0.5 for i in links4], vertex_size = 20, vertex_color = [cmap(x["value"]/max([y["value"] for y in nodes4 ])) for x in nodes4], vertex_shape = [sexshape(sex_dict[i["name"]]) for i in nodes4 ] )
fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(15, 5))
ax1.title.set_text('Not weighted')
ax2.title.set_text('Weighted')
ax3.title.set_text('Values')
ig.plot(g4, layout = layout4, target = ax1, edge_width = [log(i["value"])/2+0.5 for i in links4], vertex_size = 20, vertex_color = [cmap(x/max(g4.eigenvector_centrality())) for x in g4.eigenvector_centrality()], vertex_shape = [sexshape(sex_dict[i["name"]]) for i in nodes4 ] )
ig.plot(g4, layout = layout4, target = ax2, edge_width = [log(i["value"])/2+0.5 for i in links4], vertex_size = 20, vertex_color = [cmap(x/max(g4.eigenvector_centrality(weights = [y["value"] for y in links4]))) for x in g4.eigenvector_centrality(weights = [x["value"] for x in links4])], vertex_shape = [sexshape(sex_dict[i["name"]]) for i in nodes4 ] )
ig.plot(g4, layout = layout4,target = ax3, edge_width = [log(i["value"])/2+0.5 for i in links4], vertex_size = 20, vertex_color = [cmap(x["value"]/max([y["value"] for y in nodes4 ])) for x in nodes4], vertex_shape = [sexshape(sex_dict[i["name"]]) for i in nodes4 ] )
#Eigenvector centrality not weighted all movies
ig.plot(g_total,bbox=(0, 0, 600, 400), layout = layout_total, edge_width = [log(i["value"])/2+0.5 for i in links_total], vertex_size = 10, vertex_color = [cmap(x/max(g_total.eigenvector_centrality())) for x in g_total.eigenvector_centrality()], vertex_shape = [sexshape(i["sex"]) for i in nodes_total ] )
#Eigenvector centrality weighted all movies
ig.plot(g_total,bbox=(0, 0, 600, 400), layout = layout_total, edge_width = [log(i["value"])/2+0.5 for i in links_total], vertex_size = 10, vertex_color = [cmap(x/max(g_total.eigenvector_centrality(weights = [y["value"] for y in links_total]))) for x in g_total.eigenvector_centrality(weights = [x["value"] for x in links_total])], vertex_shape = [sexshape(i["sex"]) for i in nodes_total ] )
r4 = (np.diag(arr).sum()-(arr.sum(axis = 0)*arr.sum(axis = 1)).sum())/(1-(arr.sum(axis = 0)*arr.sum(axis = 1)).sum())
print(f"{r4:0.2f}")
scipy.stats.chi2_contingency(arr)
r_total = (np.diag(arr).sum()-(arr.sum(axis = 0)*arr.sum(axis = 1)).sum())/(1-(arr.sum(axis = 0)*arr.sum(axis = 1)).sum())
print(f"{r_total:0.2f}")
scipy.stats.chi2_contingency(arr)
communities = g4.community_edge_betweenness()
communities = communities.as_clustering()
num_communities = len(communities)
palette1 = ig.RainbowPalette(n=num_communities)
for i, community in enumerate(communities):
g4.vs[community]["color"] = i
community_edges = g4.es.select(_within=community)
community_edges["color"] = i
#ig.plot(communities,bbox=(0, 0, 600, 400), layout = layout4, edge_width = [log(i["value"])/2+0.5 for i in links4], vertex_label_dist = -3.5, vertex_color = [i["colour"] for i in nodes4 ], vertex_label = [i["name"] if i["colour"] != "#808080" else "" for i in nodes4], vertex_shape = [sexshape(sex_dict[i["name"]]) for i in nodes4 ], vertex_size = [log(1000*x**2 + 1)*3 + 2 for x in g4.eigenvector_centrality()], mark_groups = True)
ig.plot(communities,bbox=(0, 0, 600, 400), layout = layout4, edge_width = [log(i["value"])/2+0.5 for i in links4], vertex_label_dist = -2.5, vertex_color = [i["colour"] for i in nodes4 ], vertex_label = [i["name"] for i in nodes4], vertex_shape = [sexshape(sex_dict[i["name"]]) for i in nodes4 ], vertex_size = [log(1000*x**2 + 1)*3 + 2 for x in g4.eigenvector_centrality()], mark_groups = True)
communities = g_total.community_leading_eigenvector()
num_communities = len(communities)
palette = ig.RainbowPalette(n=num_communities)
for i, community in enumerate(communities):
g_total.vs[community]["color"] = i
community_edges = g_total.es.select(_within=community)
community_edges["color"] = i
ig.plot(communities, palette=palette, layout = layout_total, edge_width = [log(i["value"])/2+0.5 for i in links_total],
vertex_color = [i["colour"] for i in nodes_total ], vertex_shape = [sexshape(i["sex"]) for i in nodes_total ],
vertex_size = [log(1000*x**2 + 1)*3 + 2 for x in g_total.eigenvector_centrality()],
vertex_label = [i["name"] if i["colour"] != "#808080" else "" for i in nodes_total], mark_groups = True)
communities = g_total.community_fastgreedy()
communities = communities.as_clustering()
num_communities = len(communities)
palette = ig.RainbowPalette(n=num_communities)
for i, community in enumerate(communities):
g_total.vs[community]["color"] = i
community_edges = g_total.es.select(_within=community)
community_edges["color"] = i
#ig.plot(communities,bbox=(0, 0, 600, 400), layout = layout4, edge_width = [log(i["value"])/2+0.5 for i in links4], vertex_label_dist = -3.5, vertex_color = [i["colour"] for i in nodes4 ], vertex_label = [i["name"] if i["colour"] != "#808080" else "" for i in nodes4], vertex_shape = [sexshape(sex_dict[i["name"]]) for i in nodes4 ], vertex_size = [log(1000*x**2 + 1)*3 + 2 for x in g4.eigenvector_centrality()], mark_groups = True)
ig.plot(communities, palette=palette, layout = layout_total, edge_width = [log(i["value"])/2+0.5 for i in links_total],
vertex_color = [i["colour"] for i in nodes_total ], vertex_shape = [sexshape(i["sex"]) for i in nodes_total ],
vertex_size = [log(1000*x**2 + 1)*3 + 2 for x in g_total.eigenvector_centrality()],
vertex_label = [i["name"] if i["colour"] != "#808080" else "" for i in nodes_total], mark_groups = True)
#ig.plot(communities,bbox=(0, 0, 600, 400), layout = layout4, edge_width = [log(i["value"])/2+0.5 for i in links4], vertex_label_dist = -2.5, vertex_color = [i["colour"] for i in nodes4 ], vertex_label = [i["name"] for i in nodes4], vertex_shape = [sexshape(sex_dict[i["name"]]) for i in nodes4 ], vertex_size = [log(1000*x**2 + 1)*3 + 2 for x in g4.eigenvector_centrality()], mark_groups = True)

