from graph_class import graph
from collections import deque, Counter, defaultdict
import matplotlib.pyplot as plt
import networkx as nx
import pandas as pd
import random
import math
import time
import cugraph

class DFS_Vars:
"""
A class to keep track of variables we use during DFS.
Depending on the application of DFS, not all vars are neccessarily use
"""
def __init__(self, sigma):
self.first = defaultdict(int)
self.last = defaultdict(int)
self.visited = set()
self.exploring = set()
self.root = {}
self.Fcomp = {}
self.parent = {}
self.fcomp = 0
self.time = 0
self.traversal_order = []
self.cycles = []
self.cyclic = False
self.F = graph()

def DFS_iter(G, sigma): # sigma is an ordering of the vertices
''' Iterative DFS on entire graph '''
dfs_vars = DFS_Vars(sigma)
for v in sigma:
if v not in dfs_vars.visited:
# print("Running DFS on", v)
dfs_vars.fcomp += 1
dfs_vars.root[dfs_vars.fcomp] = v
dfs_iter(G, v, dfs_vars)
return dfs_vars
def dfs_iter(G, v, dfs_vars):
'''
Iterative DFS from a vertex.
- Note that time here isn't calculated exactly as we did in class, but the order of last[v] is still preserved
- Note that this method only returns the pieces neccessary for the SCC algorithm
'''
node_stack = deque([v]) # start with the passed vertex v
while node_stack:
v = node_stack.pop()
# The trick to keeping track of time during iterative DFS is that, the first time we pop v off the stack
# We push it back onto the stack and we know that we've finished exploring once we reach
# v a second time. Credit: https://stackoverflow.com/questions/24051386/kosaraju-finding-finishing-time-using-iterative-dfs
if v not in dfs_vars.visited:
# Add v to the stack a second time
node_stack.append(v)
# Mark v visited
dfs_vars.visited.add(v)
dfs_vars.Fcomp[v] = dfs_vars.fcomp # Set v's tree in the forest
for u in G.adj_list[v]: # sorted by reverse alphabetical order
# If not visited
if u not in dfs_vars.visited:
dfs_vars.F.Add_Edge(v,u)
node_stack.append(u)
else:
# If we have yet to set the finish time for v
if dfs_vars.last[v] == 0:
dfs_vars.last[v] = dfs_vars.time
dfs_vars.time += 1

def get_random_dag(G):
# Convert G into a DAG, Gπ, by removing all nodes that do not satisfy the topological order
# That is, all edges (j, i) such that π(i) < π(j), will be removed.
verts = list(G.vertices)
random.shuffle(verts)
dfs_results = DFS_iter(G, verts)
pi = sorted(dfs_results.last.keys(), key=lambda v: dfs_results.last[v], reverse=True) # topological ordering
pi_order = {k: v for v, k in enumerate(pi)}
# Remove bad edges to form the DAG reduction:
G_pi = graph()
for i, neighbors in G.adj_list.items():
for j in neighbors:
if pi_order[i] < pi_order[j]:
G_pi.Add_Edge(i, j)
return G_pi, pi

def get_networkx(G, edge_weight=1):
'''
cuGraph takes networkx graphs as output, so this function converts our in-class graph
class to a networkx graph.
'''
networkx_graph = nx.DiGraph()
for v in G.vertices:
for u in G.adj_list[v]:
networkx_graph.add_edge(v, u, weight=edge_weight)
return networkx_graph

# Read in Graph
fb_graph = graph()
fb_graph.Read_Edges(fname='facebooksample.txt', sep=' ', flag='d')

## Show that shortest path fails when we attempt to operate on a non-DAG
fb_graph_nx = get_networkx(fb_graph, edge_weight=-1)
source = list(fb_graph.vertices)[0]
# cugraph.shortest_path_length(fb_graph_nx, source=source) # Running this cell throws Error

random_fb_dag, pi = get_random_dag(fb_graph) # pi is the randomly discovered topological ordering used to make the DAG
random_fb_dag = get_networkx(random_fb_dag, edge_weight=-1) # set all edge weights to -1
print('Source Vertex is', pi[0])

# Show the algorithm in action
source = pi[0]
sssp_result = cugraph.sssp(random_fb_dag, source=source) # GPU implementation of Single Source Shortest Path (SSSP)
sssp_result # note that e+38 distances indicate positive infinity

# This is the longest path in the random DAG we have created
-sssp_result['distance'].min()

# Get the vertex v which has the longest path in the DAG from source -> v
sssp_result[sssp_result['distance'] == sssp_result['distance'].min()]

# Show the distance to the source, and show that source has no parent
sssp_result[sssp_result['vertex'] == source]

target_vertex = sssp_result[sssp_result['distance'] == sssp_result['distance'].min()]
target_vertex

sssp_result[sssp_result['vertex'] == target_vertex['predecessor'].iloc[0]]

# Recover the longest path in the DAG
curr_vertex = target_vertex
longest_path = deque([])
while int(curr_vertex['predecessor'].iloc[0]) >= 0:
longest_path.appendleft(curr_vertex['vertex'].iloc[0])
curr_vertex = sssp_result[sssp_result['vertex'] == curr_vertex['predecessor'].iloc[0]]
print(longest_path)

# Demonstrate how different random DAGs produce different longest path results
# Note that shortest_path_length is an analagous parallel shorest path algorithm, which returns less data
for _ in range(5):
random_fb_dag, pi = get_random_dag(fb_graph) # pi is the randomly discovered topological ordering used to make the DAG
random_fb_dag = get_networkx(random_fb_dag, edge_weight=-1)
print('Source Vertex is', pi[0])
result = cugraph.shortest_path_length(random_fb_dag, source=pi[0]) # GPU implementation of shortest path
print('Longest path found in this DAG is:', -result['distance'].min() )

# 1e
def check_path(G: graph, p: list):
'''
Take a graph, G, and a list of vertices, p.
Returns True if p is a simple path, False otherwise.
Meant as a validation that my longest path algorithm works
'''
# If there are any repeats, then p is not a path:
if len(set(p)) != len(p):
return False
# If the path is not feasible (i.e. we cannot traverse the graph to get the path) then return False
for u, v in zip(p[:-1], p[1:]): # if path p = 1, 2, 3, 4, p[:-1] = 1, 2, 3 and p[1:] = 2, 3, 4
if not G.isEdge(u, v):
return False
return True

# Recover the longest path in the DAG
def get_longest_path(sssp_result):
'''
Recovers the longest path from the result of the longest path algorithm
run by cuGraph
'''
target_vertex = sssp_result[sssp_result['distance'] == sssp_result['distance'].min()]
curr_vertex = target_vertex
longest_path = deque([])
while int(curr_vertex['predecessor'].iloc[0]) >= 0:
longest_path.appendleft(curr_vertex['vertex'].iloc[0])
curr_vertex = sssp_result[sssp_result['vertex'] == curr_vertex['predecessor'].iloc[0]]
return list(longest_path)

def random_dags_longest_path(G, time_limit):
'''
Computes longest paths in G by generating random DAG reductions of G and finding the longest path in them
Stops either when we reach n! tries, or when the time limit is up. Clearly, we will not reach n!, but we'll
see how far we can go.
'''
n = len(list(G.vertices))
start_time = time.time()
longest_path = None
longest_path_len = 0
num_permutations_tried = 0
same_ordering = 0 # if we attempt the same ordering 100 times in a row, we'll give up
for i in range(math.factorial(n)):
# Get random DAG
G_pi, pi = get_random_dag(G)
G_pi = get_networkx(G_pi, edge_weight=-1) # negative weights
# Compute the shortest paths
source = pi[0]
path_len = -cugraph.shortest_path_length(G_pi, source)['distance'].min()
if path_len > longest_path_len:
# Recover the path itself
longest_path_len = path_len
sssp_result = cugraph.sssp(G_pi, source=source) # get longest path (this method returns predecessors as well)
longest_path = get_longest_path(sssp_result)
print('New longest path:', path_len)
if time.time() - start_time > time_limit:
num_permutations_tried = i
print('Tried', num_permutations_tried, 'random DAG permutations before the time limit was reached.')
break
return longest_path_len, longest_path, num_permutations_tried

longest_path_len, longest_path, num_permutations_tried = random_dags_longest_path(fb_graph, 600) # 600 seconds

print('Longest path found was', int(longest_path_len), 'vertices long and the number of DAGs tried in 10 minutes was', num_permutations_tried)

# Show that the path is valid
valid = check_path(fb_graph, longest_path)
print("The returned path is valid:", valid)

# Confirm the length of the path
len(longest_path)

# Show the path itself
print('The path itself was:\n' + str(longest_path))

# Try the same with epinions
epinions = graph()
epinions.Read_Edges(fname="epinions.txt")

longest_path_len, longest_path, num_permutations_tried = random_dags_longest_path(epinions, 600) # 600 seconds

print('Longest path found was', int(longest_path_len), 'vertices long and the number of DAGs tried in 10 minutes was', num_permutations_tried)