###  Find a longest Path in a DAG  

#### input: 
    - source node:the begining of a specific path
    - sink node:the end of a specific path
    - directed graphs with weights assigned to the edges in an adjacency list form

#### output:
    - int:The length of a longest path in the graph 
    - str:a longest path 

#### Algorithm:
    1. Find the topological order of the graph
    2. Use dynamic programming to find the longest path
    3. Use trace back to retrieve the path

#### Variable:
    - edges: a list to store Edge objects
    - topoList: a topologically ordered list for storing nodes
    - nodeDict: in which the keys are nodes and values are scores
    - longestPath: reverse longest path (list)
    - longestDAG: longest path (string)

### Import related modules

In [None]:
import random
import math

### Create a class for reading adjacency list 

In [4]:
class GraphReader():
    """
    A class to Read adjacency file and format it.

    ...

    Attributes
    ----------
    infile : str
        adjacency file

    Methods
    -------
    graphReader:
        return the sourceNode, sinkNode, edge
    """
    def __init__(self, infile):
        """
        Constructs all the necessary attributes for the GraphReader object.

        Parameters
        ----------
            infile : str
                adjacency file
        """
        self.file = infile
    def graphReader(self):
        """
        return the sourceNode, sinkNode, edge

        """
        with open(self.file) as file:
            sourceNode=file.readline().rstrip() #the first line is sourceNode
            sinkNode=file.readline().rstrip() #the second line is sinkNode
            edge=[]
            #the followings are edges
            while True:
                nextEdge=file.readline().rstrip()
                if nextEdge:
                    edge.append(nextEdge)
                else:
                    break
        return sourceNode, sinkNode, edge

### A class to store a graph edge

In [None]:
class Edge():
    """
    A class to represent an Edge.

    Attributes
    ----------
    edge : str
        example: 0->1:7
    """
    def __init__(self, edge):
        """
        Constructs all the necessary attributes for the Edge object.

        Parameters
        ----------
            edge : str
        """
        # ----------------------------------------------------------------
        #use split to retrieve node.src,node.dest,node.weight respectively
        self.src = edge.split('->')[0]
        self.dest = edge.split('->')[1].split(':')[0]
        self.weight = edge.split('->')[1].split(':')[1]
        # -----------------------------------------------------------------

### A class to find longest path in a DAG
##### Methods:
    - topologicalGraph: to build a topologically order list named 'topoList'
    - findLongestDAG: find the length of a longest path in the graph and the path

In [145]:
class FindLongestDAG():
    """
        Find longest path in a DAG
        
        Attributes
        ----------
        edges : list
            example: 0->1:7
        source: str
        sink: str
    """
    def __init__(self,edges,source,sink):
         """
        Constructs all the necessary attributes for the FindLongestDAG object.

        Parameters
        ----------
            edges : list
            source: str
            sink: str
        """
        self.edges=edges
        self.source=source
        self.sink=sink
    def topologicalGraph(self):
        """
        Build a topologically order list named 'topoList'
        
        Return
        ----------
            nodes : list
            topoList: str
           
        """
        edges=[Edge(i) for i in self.edges]  #edges is a list with Edges objects in it
        nodes=[] #initialize a nodes list
        for i in edges:
            nodes.extend([i.src,i.dest])#all the nodes with duplicated ones
        nodes=set(nodes) #use set to retain unique nodes
        topoList=[] #topologically ordered nodes
        candidates=[] #to store nodes without incoming edges
        for node in nodes:
            if node not in [edge.dest for edge in edges]: #node not in the dest of any edge means there is no incoming edge to the node
                candidates.append(node)
        while candidates:
            source=random.choice(candidates) #randomly choose a node with 0 indegree
            topoList.append(source) #this will be the first in topologically ordered list
            candidates.remove(source)
            #print('source is %s'% (source))
            for i in list(edges): #we do not modify the iterator but its copy               
                if i.src==source: #i.src==source means i is the edge from source to i.dest
                    #print('edge is %s:%s'%(i.src,i.dest))
                    edges.remove(i) #then we remove this edge
                    if i.dest not in [j.dest for j in edges]: #means there is no income from other nodes                       
                        candidates.append(i.dest)
                        #print('candidates is %s'%candidates)
        if edges: #if there are still edges, it means this is not a DAG
            print("Not DAG")
        return nodes,topoList
    
    def findLongestDAG(self):
        """
        Find the length of a longest path in the graph and the path
        
        Return
        ----------
            nodeDict[self.sink] : str
                the score of the path
            longestDAG[2:]: str
                the longest path       
        """
        edges=[Edge(i) for i in self.edges] #edges is a list with Edges objects in it
        nodes,topoList=self.topologicalGraph()
        nodeDict={} #this is to store the score of every node        
        nodeDict[self.source]=0 #assign 0 score to source node
        longestPath=self.sink # longestPath is a reverse path starting from sink node
        for i in topoList:   
            #To make sure the path starts from source node, we must set nodeScore -infinite
            nodeScore=-math.inf 
            # -------------------------------------------------------------------------------------------
            for j in [j for j in edges if j.dest==i]: #j.dest==i means j is an edge which connects j.src and i
            #We retain the maximum score for a node   
                if nodeDict[j.src]+int(j.weight)>nodeScore: 
                    nodeScore=nodeDict[j.src]+int(j.weight)            
            if i!=self.source: 
                nodeDict[i]=nodeScore #assign the maximum value(could be -infinite) to the nodeScore
            # --------------------------------------------------------------------------------------------
        
        # Here we find the path using trace back
        # -------------------------------------------------------------------------------------------
        Lastnode=self.sink
        longestPath=[]
        longestPath.append(Lastnode)        
        while Lastnode!=self.source: #stop until we find the source
            for j in [j for j in edges if j.dest==Lastnode]:
                #every time we minus an incoming edge to see whether the resulting score in the dict
                if nodeDict[Lastnode]-int(j.weight) not in nodeDict.values(): 
                    continue
                else:
                    Lastnode={value:key for key, value in nodeDict.items()}[nodeDict[Lastnode]-int(j.weight)]
                    longestPath.append(Lastnode)
                    break
        longestDAG=''
        while longestPath:
            longestDAG+='->'+longestPath.pop()
        # --------------------------------------------------------------------------------------------
        return nodeDict[self.sink],longestDAG[2:] #[2:] means we throw out the begining ->

### Main:
    - input: filename passed as first parameter to main
    - output: a text file.

In [148]:
def main(infile):
    """
    Find a longest path between two nodes in an weighted DAG.

    Parameters
        ----------
        infile : str 
            the filename of the adjacency list

        Returns
        -------
        A file named output.txt
    """
    #Instantiation
    myclass=GraphReader(infile)
    #use graphReader to read the adjacenct list and return sourceNode, sinkNode, edge
    sourceNode, sinkNode, edge=myclass.graphReader()
    #use function findLongestDAG in class FindLongestDAG to return the length of path and the path
    pathLength, longestDAG=FindLongestDAG(edge,sourceNode, sinkNode).findLongestDAG()
    #Write the output into a file.
    with open('output.txt', 'w') as file:
        print(str(pathLength) + '\n' + longestDAG, file=file)

if __name__ == "__main__":
    main('rosalind_ba5d.txt')        

### Inspection
    Inspector1: Hsiang Yun Lu
        - The comments are sometimes put above the code but sometimes after the code. This will let your code look messy
        - Some variable names are confusing，eg: for node in nodes 
        - Some functions or classed lack doctrings：such as the functions of class GraphReader 