# Imports

In [None]:
import pandas as pd
import numpy as np
import time
import smtplib
from bs4 import BeautifulSoup
import xml.etree.ElementTree as ET
import requests 
import json
import os



In [None]:
## Creating XML Tree Object ##

TARGET = 'servicestatus.json'
SOURCE = 'http://web.mta.info/status/serviceStatus.txt'

response = requests.get(SOURCE)
xml_string = response.text
root = ET.fromstring(xml_string)
# ET.fromstring converts a string of XML content to a XML tree object.


# Functions Dealing with MTA Service Status

### Function to intake xml root and return full list of MTA lines present in the XML. 

In [None]:
def getFullLineList(xmlroot):
    FullLineList = []
    num = 0
    
    while num < len(root[2]):
        FullLineList.append((num,root[2][num][0].text))
        num += 1
        
    return FullLineList

## Creating a list of all lines ##

alllines=['123','456','7','ACE','BDFM','G','JZ','L','NQR','S','SIR']

FullLineList = getFullLineList(root)

### Function to intake service status xml object and return lines with service changes as a list of tuples

In [None]:
def Return_SC_Lines(xmlroot):
    # xmlroot must be an xml object of the MTA Service Status page using xml Element Tree package
    num_lines = list(range(0,len(xmlroot[2])))
    SC_lines = []
    
    for ea in num_lines:
        if xmlroot[2][ea][1].text != 'GOOD SERVICE':
            SC_lines.append((xmlroot[2][ea][0].text,xmlroot[2][ea][1].text))
        else:
            pass 
        
    #SC_Lines is a list of tuples with the line name and service status 
    return SC_lines
    
SC_lines = Return_SC_Lines(root)

### Function that returns a dictionary with lines as keys and 0 (delayed', 'works') as possible values

In [None]:
def dictionary():
    SC_lines=Return_SC_Lines(root)
    d={}
    for i in SC_lines:
        if i[1]=='DELAYS':
            d[i[0]]='delayed'
        if i[1]=='PLANNED WORK':
            d[i[0]]='works'
    return d
            

### Function to take line name and service status XML and return beautiful soup of that line's service status 

In [None]:
def MakeLine_ServiceStatusSoup(line_name,xmlroot): 
    line_iq = line_name
    line_iq_num = ''
    
    for ea in FullLineList:
        if ea[1] == line_iq:
            line_iq_num = ea[0]
    
    # get string and make soup from line in question's status
    status_str = xmlroot[2][line_iq_num][2].text
    linestatus_soup = BeautifulSoup(status_str, 'lxml')
    
    return linestatus_soup


### Functions to intake service status xml and a line with planned work and return description of the planned work

In [None]:
def plannedWork_Simple(line_servicestatus_soup):
    
    tag_options = ['span']
    classname_options = ['TitlePlannedWork']
    subtag_options = ['p']
    pwSimple_text = ''

    for spans in line_servicestatus_soup.find_all(tag_options, {'class':classname_options}):
        subs = spans.find_all_next(subtag_options)
        for each in subs:
            pwSimple_text += each.text
            
    return [pwSimple_text]


# this works for printing summary line, and not when planned work is a span

def plannedWork_Detail(line_servicestatus_soup):
    
    tag_options = ['a']
    classname_options = ['plannedWorkDetailLink']
    subtag_options = ['div','p']
    pwDetail_text_list = []
    
    for a_summ in line_servicestatus_soup.find_all(tag_options, {'class':classname_options}):
        pw_summtext = a_summ.text

        #print(pw_summtext,'\n', next_div, '\n')
        pwDetail_text_list.append(pw_summtext)
    
    return pwDetail_text_list
        


def PlannedWorkText(line_name,xmlroot):
    
    line_status_soup = MakeLine_ServiceStatusSoup(line_name,xmlroot)
    service_string = ''
    pw_text_list = []
    
    line_iq = line_name
    line_iq_num = ''
    
    for ea in FullLineList:
        if ea[1] == line_iq:
            line_iq_num = ea[0]
        
        
    if len(line_status_soup.find_all('a', {'class':'plannedWorkDetailLink'})) < 1:
        pw_text_list = plannedWork_Simple(line_status_soup)
            
    else:
        pw_text_list = plannedWork_Detail(line_status_soup)

        
        
    return [line_name, xmlroot[2][line_iq_num][1].text, pw_text_list]

 

### Function to intake service status xml and a line name and return text of delays on that line, if applicable 

In [None]:
def delays_text(line_name,xmlroot):

    line_status_soup = MakeLine_ServiceStatusSoup(line_name,xmlroot)
    delay_string = ''
    
    delayline_name = line_name
    dealyline_num = ''
    
    for ea in FullLineList:
        if ea[1] == delayline_name:
            dealyline_num = ea[0]


    del_text = ''
    
    for del1 in line_status_soup.find_all('span', {'class': 'TitleDelay'}):
        
        # below checks that the delay description is contained in <p> tags
        if len(del1.find_all('p')) > 0:
            
            delay_deets = del1.find_all('p')
            
            for dels in delay_deets:
                del_text += dels.text + ' '
            
            
        else:
            del_text = ''.join(line_status_soup.find_all(text=True)[3:5]).strip()
            
            
    return del_text

### Function to return list of lines with delays or planned works

In [None]:
def delayedLines(xmlroot):
    SC_lines = Return_SC_Lines(xmlroot)
    l=[]
    for i in SC_lines:
        if i[1]!='GOOD SERVICE':
            l.append(i[0])
    return l

delayed=delayedLines(root)

# Functions Dealing with User Data and Program Flow

### Initialize the User's data

In [None]:
def init():
    data=pd.DataFrame(index=['User','Time','123','456','7','ACE','BDFM','G','JZ','L','NQR','S','SIR'],dtype=str)
    return data

### Function to encode a list of lines as a list of bits, based on the alllines list

In [None]:
def binaryConverter(lines):
    '''
    Function that takes a list of lines and returns a sparse vector with bits equal to:
    -  1 if a line appears in the initial list
    -  0 if not
    @param lines: list of strings. Contains the list of lines.
    output: vector of int of length 11 (there are 11 lines in New York)
    '''
    binary_lines=alllines[:]
    for i in range(len(binary_lines)):
        if binary_lines[i] in lines:
            binary_lines[i]=1
        else:
            binary_lines[i]=0
    return binary_lines

def binaryDecoder(binary_line):
    lines=[]
    for i in range(len(binary_line)):
        if binary_line[i]==1:
            lines.append(alllines[i])
    return lines

binary=binaryConverter(delayed)
regular=binaryDecoder(binary)

### Functions to add or remove a user's data

In [None]:
def addProfile(data,email,time,lines):
    '''
    adds commuting time in the list of all commuting times, adds time and lines for that user
    @param email: user's email
    @param time: commuting time
    @param lines: list of the lines concerned by those commuting times
    '''
    data_column=[email,time]
    binary_lines=binaryConverter(lines)
    for i in binary_lines:
        data_column.append(i)
    try:
        col=max(data.columns)+1
    except ValueError:
        col=0
    data[col]=data_column

def removeUser(data,email):
    p=0
    for user_email in data.iloc[0]:
        if user_email==email:
            del data[p]
        p+=1

def clear(data):
    for i in data.columns:
        del data[i]

### Function that returns list of people to be notified by email, based on the list of delayed lines

In [None]:
def listToNotify(data,affected_lines):
    
    #First get our local time
    present_hour=time.localtime()[3]
    present_minutes=time.localtime()[4]
    #We express our time in minutes
    present_time=present_hour*60+present_minutes
    
    #Intialize the list of people to notify
    notif=[]
    
    #Convert the list of affected lines (i.e with planned works or delays) into binary format
    binary_affected_lines=binaryConverter(affected_lines)
    
    #Do a for loop on all users
    for col in data.columns:  
        column=data[col]
        user_time=column[1].zfill(5) #user_time is in 24hours format hh:mm
        user_hour=int(user_time[0:2]) #gets hh
        user_minutes=int(user_time[3:])   #gets mm
        
        #Handle the situation for commuting times around midnight which is a corner case
        if user_hour==0:
            if user_minutes>=time_window:
                user_commuting_time=user_minutes
            else:
                user_commuting_time=24*60+user_minutes
        else:
            user_commuting_time=user_hour*60+user_minutes

        #Check for the user in data[col] if he is concerned by one of the affected lines. If so, add him to the
        #list of people to notify
        if user_commuting_time - present_time == time_window:
            user_lines=column[2:]
            e=sum(np.logical_and(user_lines,binary_affected_lines)) # Check if the user entered an affected line
            if e>0:
                notif.append((column[0],col))
    return list(set(notif))  # allows to avoid duplicates and returns the list of unique elements in notif

### Function to trigger the emails

In [None]:
def emailTrigger(data,affected_lines):
    trigger_notifList = listToNotify(data,affected_lines)
    
    for user in trigger_notifList:
        sendmail(data,user,affected_lines)
        

In [None]:
def getall_usrNames(usr_df):
    return list(set(usr_df[0,:]))


### Function that takes a list of lines and returns a text listing those lines

In [None]:
def list_lines_txt(lines):
    if len(lines)>2:
        s=''
        for i in range(len(lines)-2):
            s=s+lines[i]+', '
        s=s+lines[-2]+' and '+lines[-1]
    elif len(lines)==2:
        s=lines[0]+' and '+lines[1]
    else:
        s=lines[0]
    return s



### Function that takes a list of affected lines and returns the reason of the delays or works

In [None]:
def detail_affected_lines(affected_lines):
    text=''
    d=dictionary()
    for line in affected_lines:
        if d[line]=='delayed':
            text=text+' Lines '+line+'\n Status: Delayed \n'+delays_text(line,root)+'\n \n'
        if d[line]=='works':
            text=text+' Lines '+line+'\n Status: Planned Work\n'
            for i in PlannedWorkText(line,root)[2]:
                text=text+i+'\n'
            text=text+'\n \n'
    return text+'\n'

### Function to format emails to users with delay or service change info

In [None]:
def sendmail(data,user,affected_lines):
    '''
    Function to send emails
    @param data: our dataframe
    @param user: tuple (email,column). email is the user's email. column is the column where this user is in data 
    '''
    mail=user[0]
    column_number=user[1]
    server = smtplib.SMTP('smtp.gmail.com:587')
    server.ehlo()
    server.starttls()
    server.login("notificationmta@gmail.com", "PyProject1859")
    
    binary_affected_lines=binaryConverter(affected_lines)
    col=data[column_number]
    c=col[2:]
    user_binary_affected_lines=np.logical_and(binary_affected_lines,c)
    user_affected_lines=binaryDecoder(user_binary_affected_lines)
    
    s=list_lines_txt(affected_lines)
    
    s2=list_lines_txt(user_affected_lines)
    
    detail=detail_affected_lines(user_affected_lines)
    #Send the mail
    msg = "\r\n".join([
            "From: subwayhelper@gmail.com",
            "To: "+mail,
            "Subject: ALERT: Service Change on line(s) "+str(s2),
            "",
            '''Dear user, \n \n Please be aware of the following service changes that might affect your commute: \n\n'''
           # '''You indicated that you will take line(s) '''+str(s2)+' in '+str(time_window)+' minutes'+''' \n '''
            +detail+''' Thanks for using our platform, \n \n The subway helper team'''
            ])
        # The /n separates the message from the headers
    server.sendmail("subwayhelper@gmail.com", mail, msg)
    print('Email successfully sent to '+str(mail))
    server.quit()

### Function to return the current user data from the web interface server in dictionary format, if the server is running! 

In [None]:
def get_webUsrData():
    ### Server (Web_Interface.ipynb) must be running or this will throw error 
    result =  requests.get('http://127.0.0.1:8080/curr_user_dict')
    webUsrData_json = json.loads(result.text)
    return webUsrData_json


### Function to return user data in dictionary format from stored text file 

In [None]:
def get_textUsrData():
    if os.path.isfile("UsrData/UsrDataFile.txt"):
            txtusrDatfile = open("UsrData/UsrDataFile.txt", 'r+')
            try: 
                txtusrDatDct = json.load(txtusrDatfile)
            except:
                print('no file returned')
                return None 
            
    return txtusrDatDct

### Function to return user data we have - either from web server or text file - to a dictionary

In [None]:
def get_bothUsrData():
    both_usr_dataDict = {}
    try:
        both_usr_dataDict = get_webUsrData()
    except:
        try:
            both_usr_dataDict = get_textUsrData()
        except:
            return None 
    
    for k in both_usr_dataDict.keys(): both_usr_dataDict[k][0] = both_usr_dataDict[k][0].zfill(5)
    return both_usr_dataDict



### Function to take data dict from web or text file and adds it to existing system dataframe

In [None]:
# addProfile(data,email,time,lines):

def webData_to_DF(system_datafram, dict_of_webData):
    
    for ea in dict_of_webData.keys():
        usr_lines = list(filter(lambda a: a != 0, dict_of_webData[ea][1:]))
        addProfile(system_datafram, ea, dict_of_webData[ea][0], usr_lines)
    


### How many minutes before a user's commute do we check their trains' service status

In [None]:
time_window=5

# Workspace

In [None]:
### Below to be run once a minute
data=init()

while True:
    response = requests.get(SOURCE)
    xml_string = response.text
    root = ET.fromstring(xml_string)
    
    delayed=delayedLines(root)
    web_dataDF = pd.DataFrame.from_dict(get_bothUsrData())
    webData_to_DF(data,web_dataDF)
              
    emailTrigger(data,delayed)
    print(data)
    print('finished a run at', str(time.localtime()[3])+':'+str(time.localtime()[4]) )
    time.sleep(60)
    clear(data)
    
