ASSIGNMENT 1 - NETWORK ANALYSIS

1. Create a network

# Basic packaging for network exploration import networkx as nx import numpy as np import pandas as pd import seaborn as sns import matplotlib.pyplot as plt import community as community_louvain sns.set()

#import attributs attributes = pd.read_csv('https://raw.githubusercontent.com/SDS-AAU/SDS-master/master/00_data/network_krackhard/Krack-High-Tec-Attributes.csv')

#lowering the heads attributes.columns = [w.lower() for w in attributes.columns]

attributes = attributes.set_index('id').to_dict('index')

attributes

#import edges as df edge_list = ['source', 'target', 'weight'] df_edge_advice =pd.read_csv("/work/Krack-High-Tec-edgelist-Advice.txt", sep='\s+', header=None, names=edge_list) df_edge_friend =pd.read_csv("/work/Krack-High-Tec-edgelist-Friendship.txt", sep='\s+', header=None, names=edge_list) df_edge_report =pd.read_csv("/work/Krack-High-Tec-edgelist-ReportsTo.txt", sep='\s+', header=None, names=edge_list)

#Remove ids without an edge df_edge_advice = df_edge_advice[df_edge_advice.weight != 0] df_edge_friend = df_edge_friend[df_edge_friend.weight != 0] df_edge_report = df_edge_report[df_edge_report.weight != 0]

#Create networks for each of the 3 networks (Advice, Friendship, ReportsTo) G_a = nx.from_pandas_edgelist(df_edge_advice, source='source', target='target', edge_attr='weight', create_using=nx.Graph()) G_f = nx.from_pandas_edgelist(df_edge_friend, source='source', target='target', edge_attr='weight', create_using=nx.Graph()) G_r = nx.from_pandas_edgelist(df_edge_report, source='source', target='target', edge_attr='weight', create_using=nx.Graph())

#Add the attributes to the nodes of each network nx.set_node_attributes(G_a, attributes) nx.set_node_attributes(G_f, attributes) nx.set_node_attributes(G_r, attributes)

#test the number of notes for each network len(G_a.nodes()), len(G_f.nodes()), len(G_r.nodes())

#test the number of edges for each network len(G_a.edges), len(G_f.edges), len(G_r.edges)

2. Analysis

#create a overall network T of all the other networks T = nx.compose_all([G_a, G_f, G_r])

#Print the density, transitivity and reciprocity of the overall network T print('Density is',nx.density(T)) print('Transitivity is', nx.transitivity(T)) print('Recisporcity is', nx.reciprocity(T))

Are Relationships like friendship and advice giving usually reciprocal?

edge_advice = df_edge_advice.set_index('source').to_dict() edge_friend = df_edge_friend.set_index('source').to_dict() edge_report = df_edge_report.set_index('source').to_dict()

ties = ['edge_advice', 'edge_friend', 'edge_report'] for tie in ties: T_sub = T.edge_subgraph([(u,v) for u,v,d in T.edges(data=True) if tie in d.keys()]) print('Tie Type:' + tie)

#find how many frinds of friends each person have, that are friend with the person sum(nx.triangles(G_f).values())/3 #all the people have a total 102 friends, that are friends with the first persons too.

#find the mean of how many people each node is connected to print('Advice-seeking:',pd.Series([*nx.degree_centrality(G_a).values()]).mean()) print('Friendships:', pd.Series([*nx.degree_centrality(G_f).values()]).mean()) #employees are more likely to have a advice-seeking relationship

B: Node level characteristics: Likewise, find out:

deg_cent_a = nx.degree_centrality(G_a) deg_cent_f = nx.degree_centrality(G_f) deg_cent_r = nx.degree_centrality(G_r) #Most polular advice giver max_dc_a = max(deg_cent_a.values()) max_dc_f = max(deg_cent_f.values()) max_dc_r = max(deg_cent_r.values()) # Find the user(s) that have collaborated the most: prolific_collaborators prolific_collaborators_a = [n for n, dc in deg_cent_a.items() if dc == max_dc_a] prolific_collaborators_f = [n for n, dc in deg_cent_f.items() if dc == max_dc_f] prolific_collaborators_r = [n for n, dc in deg_cent_r.items() if dc == max_dc_r] # Print the most prolific collaborator(s) print('Most polular advice giver: id',prolific_collaborators_a) print('Most polular friend giver: id',prolific_collaborators_f) print('Most polular Reported to: id',prolific_collaborators_r)

#Create a sub graph of managers and non managers managers = ( node for node, data in G_f.nodes(data=True) if data.get("level") == 3) G_f_m = G_f.subgraph(managers) non_managers = ( node for node, data in G_f.nodes(data=True) if data.get("level") != 3) G_f_nm = G_f.subgraph(non_managers)

print('Non_mangaers:',pd.Series([*nx.degree_centrality(G_f_nm).values()]).mean()) print('Managers:',pd.Series([*nx.degree_centrality(G_f_m).values()]).mean()) print('Managers are more popular as friends')

C: Relational Characteristics: Answer the following questions:

#Create a sub graph of managers in advice givers G_a_m = G_a.subgraph(managers) print('Age') print('Friendships:', nx.numeric_assortativity_coefficient(G_f, "age")) print('Advice givers:', nx.numeric_assortativity_coefficient(G_a, "age")) print('Managers of the same age are more likly to become friends') print('') print('Tenure') print('Friendships:', nx.numeric_assortativity_coefficient(G_f, "tenure")) print('Advice givers:', nx.numeric_assortativity_coefficient(G_a, "tenure")) print('Managers with the same tenure are more likly to become advice givers') print('') print('Level') print('Friendships:', nx.numeric_assortativity_coefficient(G_f, "level")) print('Advice givers:', nx.numeric_assortativity_coefficient(G_a, "level")) print('Managers on the same level are more likly to become friends') print('') print('Deptartment') print('Friendships:', nx.numeric_assortativity_coefficient(G_f, "dept")) print('Advice givers:', nx.numeric_assortativity_coefficient(G_a, "dept")) print('Managers from the same department are more likly to become friends')

df_edge_friend['combined'] = df_edge_friend[edge_list].apply(lambda row: '_'.join(row.values.astype(str)), axis=1) df_edge_advice['combined'] = df_edge_advice[edge_list].apply(lambda row: '_'.join(row.values.astype(str)), axis=1)

#make a coloum to compare df_edge_friend['combined'] = df_edge_friend[edge_list].apply(lambda row: '_'.join(row.values.astype(str)), axis=1) df_edge_advice['combined'] = df_edge_advice[edge_list].apply(lambda row: '_'.join(row.values.astype(str)), axis=1) # make a new dataframe with a boolean that compare if friends also gives advice compare = df_edge_friend['combined'].isin(df_edge_advice['combined']) compare.value_counts() # Friends are more likely to also give advice

3. Visualization

# Here we can calculate different centrality indicators as well as partition (community detection) centrality_dgr = nx.degree_centrality(G_a) centrality_eig = nx.eigenvector_centrality_numpy(G_a) partition = community_louvain.best_partition(G_a) #that will take some time... degree = G_a.degree()

# All these indicators can now be set as attribute of the Graph nx.set_node_attributes(G_a, centrality_dgr, 'dgr') nx.set_node_attributes(G_a, centrality_eig, 'eig') nx.set_node_attributes(G_a, partition, 'partition') nx.set_node_attributes(G_a, dict(degree), 'degree_basic')

nx.draw(G_a,pos=nx.circular_layout(G_a), with_labels=True, node_size=[v * 5000 for v in centrality_eig.values()])

nx.draw(G_f,pos=nx.circular_layout(G_f), with_labels=True, node_size=[v * 5000 for v in centrality_eig.values()])

nx.draw(G_r,pos=nx.circular_layout(G_r), with_labels=True, node_size=[v * 5000 for v in centrality_eig.values()])

nx.draw(T,pos=nx.circular_layout(T), with_labels = True, node_size=[v * 5000 for v in centrality_eig.values()])