In [69]:
import os

from google.cloud import bigquery

# Fetch Credentials
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = 'resource/local-receiver-341803-261a3b7beea6.json'

# Build BigQuery Client
client = bigquery.Client()

In [70]:
import pandas as pd

# Query for Position Histories from BigQuery
QUERY = ('SELECT lastTrainReporting.positionHistoryList FROM `local-receiver-341803.PairingSession.TrainDataTable` LIMIT 1')
query_job = client.query(QUERY)

# Wait for job to finish
rows = query_job.result()

# Fetch each position time and milepost
positionMilepostTimesDataFrame = pd.DataFrame()

# For each returned record
for row in rows:
    # Scan each position history
    for positionHistory in row.get('positionHistoryList'):
        # The time that the train was at the specific milepost
        # Record the position time as seconds from epoch to use in regression model
        positionTime = positionHistory.get('positionTime').timestamp()

        # How fast the train is traveling
        speedMPH = positionHistory.get('speedMPH')

        # Milepost location information
        milepostLocation = positionHistory.get('milepostLocation')
        # The unique identifier of the subdivision
        subdivisionId = milepostLocation.get('subdivisionId')
        # The unique identifier fo the milepost
        milepostNumber = milepostLocation.get('milepost').get('milepostNumber')

        # Construct new dataframe record
        dataframeRow = {'PositionTime': positionTime, 'SpeedMPH': speedMPH, 
                        'SubdivisionId': subdivisionId, 'MilepostNumber':milepostNumber}

        # Append the row to the data frame.
        positionMilepostTimesDataFrame = positionMilepostTimesDataFrame.append(dataframeRow, ignore_index = True)

positionMilepostTimesDataFrame


Unnamed: 0,MilepostNumber,PositionTime,SpeedMPH,SubdivisionId
0,278.9611,1648748000.0,-35.0,106.0
1,279.8213,1648748000.0,-37.0,106.0
2,281.4215,1648748000.0,-38.0,106.0
3,282.9552,1648747000.0,-36.0,106.0
4,284.4973,1648747000.0,-38.0,106.0


In [71]:
from sklearn.linear_model import LinearRegression
import numpy as np

# Fetch the position time
X = np.array(positionMilepostTimesDataFrame)[:,1].reshape(-1,1)
y = np.array(positionMilepostTimesDataFrame.drop('PositionTime',axis=1)).reshape(-1, 3)

# Fit the linear regression model
regsr=LinearRegression()
regsr.fit(X,y)

LinearRegression()

In [72]:
import datetime

# Build predicition set
to_predict_x = [datetime.datetime(2022, 3, 31, 17, 20, 23, tzinfo=datetime.timezone.utc).timestamp()]
to_predict_x = np.array(to_predict_x).reshape(-1,1)

# Predict 
predicted_y = regsr.predict(to_predict_x)

m = regsr.coef_
c = regsr.intercept_

print("Predicted y:\n", predicted_y)
print("slope (m): ", m)
print("y-intercept (c): ", c)


Predicted y:
 [[284.48159566 -37.75784677 106.        ]]
slope (m):  [[-0.01033748]
 [ 0.00335616]
 [-0.        ]]
y-intercept (c):  [ 1.70441694e+07 -5.53348977e+06  1.06000000e+02]
