Skip to content

Commit

Permalink
Update tests
Browse files Browse the repository at this point in the history
  • Loading branch information
ekote committed Sep 9, 2016
1 parent 4fe77c7 commit 9785a7f
Show file tree
Hide file tree
Showing 2 changed files with 107 additions and 130 deletions.
182 changes: 107 additions & 75 deletions doc-api-examples/src/main/resources/python/model-arimax/new.rst
Original file line number Diff line number Diff line change
@@ -1,100 +1,132 @@

Consider the following model trained and tested on the sample data set in *frame* 'frame'.
The frame has five columns where "y" is the time series value and "vistors", "wkends",
"incidentRate", and "seasonality" are exogenous inputs.
The frame has five columns where "CO_GT" is the time series value and "C6H6_GT", "PT08_S2_NMHC" and "T" are exogenous inputs.

CO_GT - True hourly averaged concentration CO in mg/m^3
C6H6_GT - True hourly averaged Benzene concentration in microg/m^3
PT08_S2_NMHC - Titania hourly averaged sensor response (nominally NMHC targeted)
T - Temperature in C

Data from Lichman, M. (2013). UCI Machine Learning Repository [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California, School of Information and Computer Science.

<hide>
>>> import trustedanalytics as ta
>>> ta.connect()
-etc-
>>> schema = [("y", ta.float64),("visitors", ta.float64),("wkends", ta.float64),("seasonality", ta.float64),
("incidentRate", ta.float64),("holidayFlag", ta.float64),("postHolidayFlag", ta.float64),("min_temp", ta.float64)]
>>> frame = ta.Frame(ta.UploadRows([[93.0, 416.0, 0.0, 0.006103106, 28.0, 0.0, 0.0, 55.0],
... [82.0, 393.0, 0.0, 0.005381233, 28.0, 0.0, 0.0, 57.0],
... [109.0, 444.0, 0.0, 0.007153103, 28.0, 0.0, 0.0, 53.0],
... [110.0, 445.0, 0.0, 0.007218727, 28.0, 0.0, 0.0, 55.0],
... [109.0, 426.0, 1.0, 0.007153103, 28.0, 0.0, 0.0, 57.0],
... [84.0, 435.0, 1.0, 0.005512483, 28.0, 0.0, 0.0, 50.0],
... [100.0, 471.0, 0.0, 0.006562479, 29.0, 0.0, 0.0, 50.0],
... [91.0, 397.0, 0.0, 0.005971856, 29.0, 0.0, 0.0, 53.0],
... [119.0, 454.0, 0.0, 0.007809351, 29.0, 0.0, 0.0, 51.0],
... [78.0, 416.0, 0.0, 0.005118734, 29.0, 0.0, 0.0, 55.0],
... [99.0, 424.0, 0.0, 0.006496855, 29.0, 0.0, 0.0, 48.0],
... [92.0, 395.0, 1.0, 0.006037481, 29.0, 0.0, 0.0, 46.0],
... [76.0, 401.0, 1.0, 0.004987484, 29.0, 0.0, 0.0, 42.0],
... [99.0, 471.0, 0.0, 0.006496855, 21.0, 0.0, 0.0, 41.0],
... [84.0, 400.0, 0.0, 0.005512483, 21.0, 0.0, 0.0, 48.0],
... [103.0, 418.0, 0.0, 0.006759354, 21.0, 0.0, 0.0, 48.0],
... [107.0, 476.0, 0.0, 0.007021853, 21.0, 0.0, 0.0, 55.0],
... [106.0, 436.0, 0.0, 0.006956228, 21.0, 0.0, 0.0, 59.0],
... [106.0, 442.0, 1.0, 0.006956228, 21.0, 0.0, 0.0, 57.0],
... [89.0, 472.0, 1.0, 0.005840607, 21.0, 0.0, 0.0, 55.0]],
>>> schema = [("CO_GT", ta.float64),("C6H6_GT", ta.float64),("PT08_S2_NMHC", ta.float64),("T", ta.float64)]
>>> frame = ta.Frame(ta.UploadRows([[2.6, 11.9, 1046.0, 13.6],
... [2.0, 9.4, 955.0, 13.3],
... [2.2, 9.0, 939.0, 11.9],
... [2.2, 9.2, 948.0, 11.0],
... [1.6, 6.5, 836.0, 11.2],
... [1.2, 4.7, 750.0, 11.2],
... [1.2, 3.6, 690.0, 11.3],
... [1.0, 3.3, 672.0, 10.7],
... [0.9, 2.3, 609.0, 10.7],
... [0.6, 1.7, 561.0, 10.3],
... [-200.0, 1.3, 527.0, 10.1],
... [0.7, 1.1, 512.0, 11.0],
... [0.7, 1.6, 553.0, 10.5],
... [1.1, 3.2, 667.0, 10.2],
... [2.0, 8.0, 900.0, 10.8],
... [2.2, 9.5, 960.0, 10.5],
... [1.7, 6.3, 827.0, 10.8],
... [1.5, 5.0, 762.0, 10.5],
... [1.6, 5.2, 774.0, 9.5],
... [1.9, 7.3, 869.0, 8.3],
... [2.9, 11.5, 1034.0, 8.0],
... [2.2, 8.8, 933.0, 8.3],
... [2.2, 8.3, 912.0, 9.7],
... [2.9, 11.2, 1020.0, 9.8],
... [4.8, 20.8, 1319.0, 10.3],
... [6.9, 27.4, 1488.0, 9.7],
... [6.1, 24.0, 1404.0, 9.6],
... [3.9, 12.8, 1076.0, 9.1],
... [1.5, 4.7, 749.0, 8.2],
... [1.0, 2.6, 629.0, 8.2],
... [1.7, 5.9, 805.0, 8.3],
... [1.9, 6.4, 829.0, 7.7],
... [1.4, 4.1, 718.0, 7.1],
... [0.8, 1.9, 574.0, 7.0],
... [-200.0, 1.1, 506.0, 6.1],
... [0.6, 1.0, 501.0, 6.3],
... [0.8, 1.8, 571.0, 6.8],
... [1.4, 4.4, 730.0, 6.4],
... [4.4, 17.9, 1236.0, 7.3],
... [-200.0, 22.1, 1353.0, 9.2],
... [3.1, 14.0, 1118.0, 13.2],
... [2.7, 11.6, 1037.0, 14.3],
... [2.1, 10.2, 986.0, 15.0],
... [2.5, 11.0, 1016.0, 16.1],
... [2.7, 12.8, 1078.0, 16.3],
... [2.9, 14.2, 1122.0, 15.8],
... [2.8, 12.7, 1073.0, 15.9],
... [2.4, 11.7, 1041.0, 16.9]],
... schema=schema))
-etc-
</hide>

>>> frame.inspect(columns=["y","visitors","wkends","seasonality","incidentRate"])
[#] y visitors wkends seasonality incidentRate
=======================================================
[0] 93.0 416.0 0.0 0.006103106 28.0
[1] 82.0 393.0 0.0 0.005381233 28.0
[2] 109.0 444.0 0.0 0.007153103 28.0
[3] 110.0 445.0 0.0 0.007218727 28.0
[4] 109.0 426.0 1.0 0.007153103 28.0
[5] 84.0 435.0 1.0 0.005512483 28.0
[6] 100.0 471.0 0.0 0.006562479 29.0
[7] 91.0 397.0 0.0 0.005971856 29.0
[8] 119.0 454.0 0.0 0.007809351 29.0
[9] 78.0 416.0 0.0 0.005118734 29.0
>>> frame.inspect(columns=["CO_GT","C6H6_GT","PT08_S2_NMHC","T"])
[#] CO_GT C6H6_GT PT08_S2_NMHC T
=======================================
[0] 2.6 11.9 1046.0 13.6
[1] 2.0 9.4 955.0 13.3
[2] 2.2 9.0 939.0 11.9
[3] 2.2 9.2 948.0 11.0
[4] 1.6 6.5 836.0 11.2
[5] 1.2 4.7 750.0 11.2
[6] 1.2 3.6 690.0 11.3
[7] 1.0 3.3 672.0 10.7
[8] 0.9 2.3 609.0 10.7
[9] 0.6 1.7 561.0 10.3

>>> model = ta.ArimaxModel()
<progress>

>>> train_output = model.train(frame, "y", ["visitors", "wkends", "seasonality", "incidentRate"], 2, 1, 2, 1, False, False)
>>> train_output = model.train(frame, "CO_GT", ["C6H6_GT","PT08_S2_NMHC","T"], 2, 2, 1, 0, True, True)
<progress>

>>> train_output
{u'ar': [0.20121370919725473, -1.5250755427453089],
u'c': -0.21691372069135137,
u'ma': [-0.5196476378812525, 0.5693147472736498],
u'xreg': [-0.10772949765182113,
-12.376859507565877,
11429596.052220736,
2.003736903435578]}

>>> test_frame = ta.Frame(ta.UploadRows([[100.0, 465.0, 1.0, 0.006562479, 24.0, 1.0, 0.0, 51.0],
... [98.0, 453.0, 1.0, 0.00643123, 24.0, 0.0, 1.0, 54.0],
... [102.0, 472.0, 0.0, 0.006693729, 25.0, 0.0, 0.0, 49.0],
... [98.0, 454.0, 0.0, 0.00643123, 25.0, 0.0, 0.0, 46.0],
... [112.0, 432.0, 0.0, 0.007349977, 25.0, 0.0, 0.0, 42.0],
... [99.0, 431.0, 0.0, 0.006496855, 25.0, 0.0, 0.0, 41.0],
... [99.0, 475.0, 0.0, 0.006496855, 25.0, 0.0, 0.0, 45.0],
... [87.0, 393.0, 1.0, 0.005709357, 25.0, 0.0, 0.0, 46.0],
... [103.0, 437.0, 1.0, 0.006759354, 25.0, 0.0, 0.0, 48.0],
... [115.0, 537.0, 0.0, 0.007546851, 23.0, 0.0, 0.0, 41.0]],
... schema=schema))



>>> predicted_frame = model.predict(test_frame, "y", ["visitors", "wkends", "seasonality", "incidentRate"])
{u'ar': [-0.6876349849133049, -0.33038065385185783],
u'c': -0.9075493080767927,
u'ma': [-1.283039752947022],
u'xreg': [-1.0326823408073342, 0.08721820267076823, -1.8741776454756058]}


>>> test_frame = ta.Frame(ta.UploadRows([[3.9, 19.3, 1277.0, 15.1],
... [3.7, 18.2, 1246.0, 14.4],
... [6.6, 32.6, 1610.0, 12.9],
... [4.4, 20.1, 1299.0, 12.1],
... [3.5, 14.3, 1127.0, 11.0],
... [5.4, 21.8, 1346.0, 9.7],
... [2.7, 9.6, 964.0, 9.5],
... [1.9, 7.4, 873.0, 9.1],
... [1.6, 5.4, 782.0, 8.8],
... [1.7, 5.4, 783.0, 7.8]],
... schema=schema))



>>> predicted_frame = model.predict(test_frame, "CO_GT", ["C6H6_GT","PT08_S2_NMHC","T"])
<progress>

>>> predicted_frame.column_names
[u'y', u'visitors', u'wkends', u'seasonality', u'incidentRate', u'holidayFlag', u'postHolidayFlag', u'min_temp', u'predicted_y']
[u'CO_GT', u'C6H6_GT', u'PT08_S2_NMHC', u'T', u'predicted_y']

>>> predicted_frame.inspect(columns=("y","predicted_y"))
[#] y predicted_y
>>> predicted_frame.inspect(columns=("CO_GT","predicted_y"))
[#] CO_GT predicted_y
=========================
[0] 100.0 104.813706372
[1] 98.0 104.126348745
[2] 102.0 102.225824121
[3] 98.0 102.499768211
[4] 112.0 102.05793437
[5] 99.0 102.208895499
[6] 99.0 102.087246933
[7] 87.0 102.144009625
[8] 103.0 102.107219659
[9] 115.0 102.126621339
[0] 3.9 1.47994006475
[1] 3.7 6.77881520875
[2] 6.6 6.16894546356
[3] 4.4 7.45349002663
[4] 3.5 8.85479025637
[5] 5.4 6.58078264909
[6] 2.7 6.26275769839
[7] 1.9 4.71901417682
[8] 1.6 3.77627384099
[9] 1.7 1.91766708341


>>> model.publish()
<progress>
Expand Down Expand Up @@ -126,5 +158,5 @@ The 'score' value contains an array of predicted y values.

<skip>
>>> r.text
u'{"data":[{"y":[100.0,98.0,102.0],"x_values":[465.0,453.0,472.0,1.0,1.0,1.0,0.006562479,0.00643123,0.006693729,24.0,24.0,25.0],"score":[104.813706372, 104.126348745, 102.225824121]}]}'
u'{"data":[{"y":[3.9,3.7,6.6],"x_values":[19.3,18.2,32.6,1277.0,1246.0,1610.0,15.1,14.4,12.9],"score":[1.47994006475, 6.77881520875, 6.16894546356]}]}'
</skip>
55 changes: 0 additions & 55 deletions integration-tests/tests/model_arimax_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,61 +28,6 @@
ta.connect()

class ModelArimaxTest(unittest.TestCase):
def test_arimax_with_lag(self):
print "define csv file"
schema = [("y", ta.float64),("visitors", ta.float64),("wkends", ta.float64),("seasonality", ta.float64),("incidentRate", ta.float64), ("holidayFlag", ta.float64),("postHolidayFlag", ta.float64),("mintemp", ta.float64)]
csv = ta.CsvFile("/datasets/arx_train.csv", schema=schema, skip_header_lines=1)

print "create training frame"
train_frame = ta.Frame(csv)

print "Initializing a ArimaxModel object"
arimax = ta.ArimaxModel()

print "Training the model on the Frame"
coefficients = arimax.train(train_frame, "y", ["visitors","wkends","seasonality","incidentRate","holidayFlag","postHolidayFlag","mintemp"], 1, 1, 1, 1, False)

expected_coefficients = [{u'ar': [-0.017383421475889338],
u'c': 0.12592884652020608,
u'ma': [-0.9175172681125372],
u'xreg': [0.026948785665512696,
-0.27509641527217865,
-30.04399435207178,
0.23517811763216095,
6.6167555198084225,
0.8706683776904101,
0.20954216832984773]}]

self.assertEqual(coefficients, expected_coefficients)


print "create test frame"
csv2 = ta.CsvFile("/datasets/arx_test.csv", schema=schema, skip_header_lines=1)
test_frame = ta.Frame(csv2)

print "Predicting on the Frame"
p = arimax.predict(test_frame, "y", ["visitors","wkends","seasonality","incidentRate","holidayFlag","postHolidayFlag","mintemp"])
self.assertEqual(p.column_names, ["y","visitors","wkends","seasonality","incidentRate","holidayFlag","postHolidayFlag","mintemp","predicted_y"])

expected_results = [[[107.08170082770327],
[106.14721021492875],
[105.66075720064653],
[104.95697359162283],
[104.88559443691666],
[105.85093277068711],
[106.1696229024144],
[106.70909616071428],
[105.35885193790156],
[105.71779481717215],
[107.09473701831531],
[106.14901905655587],
[107.12955639032205],
[107.4479823114264],
[107.56837582595121]]]

self.assertEqual(expected_results, p.take(p.row_count, 1, "predicted_y"))


def test_arimax_air_quality(self):
# Data from Lichman, M. (2013). UCI Machine Learning Repository [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California, School of Information and Computer Science.
print "Define csv file"
Expand Down

0 comments on commit 9785a7f

Please sign in to comment.