import numpy as np
print ("Your numpy version: %6.6s (need at least 1.7.1)" % np.__version__)
Your numpy version: 1.19.5 (need at least 1.7.1)
# example
# 1.2 create adjacency matrix H
H = np.matrix([[1, 0, 2, 0, 4, 3],
[3, 0, 1, 1, 0, 0],
[2, 0, 4, 0, 1, 0],
[0, 0, 1, 0, 0, 1],
[8, 0, 3, 0, 5, 2],
[0 ,0, 0, 0, 0, 0]])
# 1.3 Modify the Adjacency Matrix
# set the diagonal to zero
for i in range(6):
H[i, i] = 0
print(H)
[[0 0 2 0 4 3]
[3 0 1 1 0 0]
[2 0 0 0 1 0]
[0 0 1 0 0 1]
[8 0 3 0 0 2]
[0 0 0 0 0 0]]
# normalize the matrix
H = H/H.sum(axis=0)
np.nan_to_num(H, copy=False, nan=0)
/shared-libs/python3.7/py-core/lib/python3.7/site-packages/ipykernel_launcher.py:2: RuntimeWarning: invalid value encountered in true_divide
# 1.4 Identify the dangling nodes
# create d dangling nodes
d = 1 - H.sum(0)
# 1.5 Calculate the influence vector
# article vector
a = np.array([3/14, 2/14, 5/14, 1/14, 2/14, 1/14])
# influential vector
pi = np.array([1/6, 1/6, 1/6, 1/6, 1/6, 1/6])
alpha = 0.85
epslon = 0.00001
residual = 1
max_iter = 100
iter_count = 0
# iteration to get fast
while(residual >= epslon and iter_count < max_iter):
pi_prime = alpha * np.matmul(H, pi) + ((alpha * np.matmul(d, pi))[0, 0] + 1 - alpha) * a
pi_prime = np.squeeze(np.asarray(pi_prime))
# calculate l1 norm
l1_norm = 0
for i, j in zip(pi, pi_prime):
l1_norm += abs(i - j)
residual = l1_norm
pi = pi_prime
iter_count += 1
temp = np.matmul(H, pi)
# 1.6 Calculate EF
EF = 100 * temp / temp.sum(1)
EF
# 1.1 Data Input
m = 100
with open('./links.txt') as f:
for l in f.readlines():
m = min(int(l.split()[0][:-1]), m)
print(m)
# start from 0
# matrix dimention should be 10748
0
adj_m = np.matrix([[0 for j in range(10748)] for i in range(10748)])
# 1.2 creating adjcent matrix
with open('./links.txt') as f:
for l in f.readlines():
i,j,k = l.split()
i = int(i[:-1])
j = int(j[:-1])
k = int(k)
adj_m[i,j] = k
# 1.3
# zero all diagnols
for i in range(10748):
adj_m[i, i] = 0
# normalize columns
H = adj_m/adj_m.sum(axis=0)
/shared-libs/python3.7/py-core/lib/python3.7/site-packages/ipykernel_launcher.py:2: RuntimeWarning: invalid value encountered in true_divide
# testings
testing = np.matrix([[1,0], [1, 0]])
testing = testing/testing.sum(0)
print(testing)
np.nan_to_num(testing, copy=False, nan=0)
print(testing)
print(1-testing.sum(0))
[[0.5 nan]
[0.5 nan]]
[[0.5 0. ]
[0.5 0. ]]
[[0. 1.]]
/shared-libs/python3.7/py-core/lib/python3.7/site-packages/ipykernel_launcher.py:3: RuntimeWarning: invalid value encountered in true_divide
This is separate from the ipykernel package so we can avoid doing imports until
# 1.4 find the dangling nodes
np.nan_to_num(H, copy=False, nan=0)
d = 1 - H.sum(0)
# 1.5 calculating influnece vector
# article vector
a = np.array([1/10748 for i in range(10748)])
# influential vector
pi = np.array([1/10748 for i in range(10748)])
# iteration steps
alpha = 0.85
epslon = 0.00001
residual = 1
max_iter = 100
iter_count = 0
while(residual >= epslon and iter_count < max_iter):
pi_prime = alpha * np.matmul(H, pi) + ((alpha * np.matmul(d, pi))[0, 0] + 1 - alpha) * a
pi_prime = np.squeeze(np.asarray(pi_prime))
# calculate l1 norm
l1_norm = 0
for i, j in zip(pi, pi_prime):
l1_norm += abs(i - j)
residual = l1_norm
pi = pi_prime
iter_count += 1
# 1.6 Calculate EF
temp = np.matmul(H, pi)
EF = 100 * temp / temp.sum(1)
import heapq
index_hp = [] # format: [(EF score, article number), ...]
for i in range(10748):
heapq.heappush(index_hp, (EF[0, i], i))
if len(index_hp) > 20:
heapq.heappop(index_hp)
print(index_hp)
[(0.1490020753486353, 5002), (0.1493705178705957, 422), (0.1507377956621044, 1223), (0.1635672519020087, 1383), (0.17044316755763203, 7580), (0.17508190055871345, 1922), (0.18274353734472784, 5966), (0.17020090089146345, 900), (0.16799625951657732, 1559), (0.20143500560867666, 1994), (0.22525532878980195, 6697), (0.23517265944061178, 6523), (0.18076805782432617, 6179), (0.18503137049967266, 2992), (0.24739565945324454, 725), (0.22611774005016175, 6569), (1.108640190716874, 8930), (0.2167006766885307, 6667), (0.20648047147465692, 4408), (0.24381834932428528, 239)]