Update tests

trustedanalytics · Sep 9, 2016 · 9785a7f · 9785a7f
1 parent 4fe77c7
commit 9785a7f
Show file tree

Hide file tree

Showing 2 changed files with 107 additions and 130 deletions.
diff --git a/doc-api-examples/src/main/resources/python/model-arimax/new.rst b/doc-api-examples/src/main/resources/python/model-arimax/new.rst
@@ -1,100 +1,132 @@
 
 Consider the following model trained and tested on the sample data set in *frame* 'frame'.
-The frame has five columns where "y" is the time series value and "vistors", "wkends",
-"incidentRate", and "seasonality" are exogenous inputs.
+The frame has five columns where "CO_GT" is the time series value and "C6H6_GT", "PT08_S2_NMHC" and "T" are exogenous inputs.
+
+CO_GT - True hourly averaged concentration CO in mg/m^3
+C6H6_GT - True hourly averaged Benzene concentration in microg/m^3
+PT08_S2_NMHC - Titania hourly averaged sensor response (nominally NMHC targeted)
+T - Temperature in C
+
+Data from Lichman, M. (2013). UCI Machine Learning Repository [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California, School of Information and Computer Science.
 
 <hide>
 >>> import trustedanalytics as ta
 >>> ta.connect()
 -etc-
->>> schema = [("y", ta.float64),("visitors", ta.float64),("wkends", ta.float64),("seasonality", ta.float64),
-("incidentRate", ta.float64),("holidayFlag", ta.float64),("postHolidayFlag", ta.float64),("min_temp", ta.float64)]
->>> frame = ta.Frame(ta.UploadRows([[93.0, 416.0, 0.0, 0.006103106, 28.0, 0.0, 0.0, 55.0],
-...                                 [82.0, 393.0, 0.0, 0.005381233, 28.0, 0.0, 0.0, 57.0],
-...                                 [109.0, 444.0, 0.0, 0.007153103, 28.0, 0.0, 0.0, 53.0],
-...                                 [110.0, 445.0, 0.0, 0.007218727, 28.0, 0.0, 0.0, 55.0],
-...                                 [109.0, 426.0, 1.0, 0.007153103, 28.0, 0.0, 0.0, 57.0],
-...                                 [84.0, 435.0, 1.0, 0.005512483, 28.0, 0.0, 0.0, 50.0],
-...                                 [100.0, 471.0, 0.0, 0.006562479, 29.0, 0.0, 0.0, 50.0],
-...                                 [91.0, 397.0, 0.0, 0.005971856, 29.0, 0.0, 0.0, 53.0],
-...                                 [119.0, 454.0, 0.0, 0.007809351, 29.0, 0.0, 0.0, 51.0],
-...                                 [78.0, 416.0, 0.0, 0.005118734, 29.0, 0.0, 0.0, 55.0],
-...                                 [99.0, 424.0, 0.0, 0.006496855, 29.0, 0.0, 0.0, 48.0],
-...                                 [92.0, 395.0, 1.0, 0.006037481, 29.0, 0.0, 0.0, 46.0],
-...                                 [76.0, 401.0, 1.0, 0.004987484, 29.0, 0.0, 0.0, 42.0],
-...                                 [99.0, 471.0, 0.0, 0.006496855, 21.0, 0.0, 0.0, 41.0],
-...                                 [84.0, 400.0, 0.0, 0.005512483, 21.0, 0.0, 0.0, 48.0],
-...                                 [103.0, 418.0, 0.0, 0.006759354, 21.0, 0.0, 0.0, 48.0],
-...                                 [107.0, 476.0, 0.0, 0.007021853, 21.0, 0.0, 0.0, 55.0],
-...                                 [106.0, 436.0, 0.0, 0.006956228, 21.0, 0.0, 0.0, 59.0],
-...                                 [106.0, 442.0, 1.0, 0.006956228, 21.0, 0.0, 0.0, 57.0],
-...                                 [89.0, 472.0, 1.0, 0.005840607, 21.0, 0.0, 0.0, 55.0]],
+>>> schema = [("CO_GT", ta.float64),("C6H6_GT", ta.float64),("PT08_S2_NMHC", ta.float64),("T", ta.float64)]
+>>> frame = ta.Frame(ta.UploadRows([[2.6, 11.9, 1046.0, 13.6],
+...                                 [2.0, 9.4, 955.0, 13.3],
+...                                 [2.2, 9.0, 939.0, 11.9],
+...                                 [2.2, 9.2, 948.0, 11.0],
+...                                 [1.6, 6.5, 836.0, 11.2],
+...                                 [1.2, 4.7, 750.0, 11.2],
+...                                 [1.2, 3.6, 690.0, 11.3],
+...                                 [1.0, 3.3, 672.0, 10.7],
+...                                 [0.9, 2.3, 609.0, 10.7],
+...                                 [0.6, 1.7, 561.0, 10.3],
+...                                 [-200.0, 1.3, 527.0, 10.1],
+...                                 [0.7, 1.1, 512.0, 11.0],
+...                                 [0.7, 1.6, 553.0, 10.5],
+...                                 [1.1, 3.2, 667.0, 10.2],
+...                                 [2.0, 8.0, 900.0, 10.8],
+...                                 [2.2, 9.5, 960.0, 10.5],
+...                                 [1.7, 6.3, 827.0, 10.8],
+...                                 [1.5, 5.0, 762.0, 10.5],
+...                                 [1.6, 5.2, 774.0, 9.5],
+...                                 [1.9, 7.3, 869.0, 8.3],
+...                                 [2.9, 11.5, 1034.0, 8.0],
+...                                 [2.2, 8.8, 933.0, 8.3],
+...                                 [2.2, 8.3, 912.0, 9.7],
+...                                 [2.9, 11.2, 1020.0, 9.8],
+...                                 [4.8, 20.8, 1319.0, 10.3],
+...                                 [6.9, 27.4, 1488.0, 9.7],
+...                                 [6.1, 24.0, 1404.0, 9.6],
+...                                 [3.9, 12.8, 1076.0, 9.1],
+...                                 [1.5, 4.7, 749.0, 8.2],
+...                                 [1.0, 2.6, 629.0, 8.2],
+...                                 [1.7, 5.9, 805.0, 8.3],
+...                                 [1.9, 6.4, 829.0, 7.7],
+...                                 [1.4, 4.1, 718.0, 7.1],
+...                                 [0.8, 1.9, 574.0, 7.0],
+...                                 [-200.0, 1.1, 506.0, 6.1],
+...                                 [0.6, 1.0, 501.0, 6.3],
+...                                 [0.8, 1.8, 571.0, 6.8],
+...                                 [1.4, 4.4, 730.0, 6.4],
+...                                 [4.4, 17.9, 1236.0, 7.3],
+...                                 [-200.0, 22.1, 1353.0, 9.2],
+...                                 [3.1, 14.0, 1118.0, 13.2],
+...                                 [2.7, 11.6, 1037.0, 14.3],
+...                                 [2.1, 10.2, 986.0, 15.0],
+...                                 [2.5, 11.0, 1016.0, 16.1],
+...                                 [2.7, 12.8, 1078.0, 16.3],
+...                                 [2.9, 14.2, 1122.0, 15.8],
+...                                 [2.8, 12.7, 1073.0, 15.9],
+...                                 [2.4, 11.7, 1041.0, 16.9]],
 ...                                 schema=schema))
 -etc-
 </hide>
 
->>> frame.inspect(columns=["y","visitors","wkends","seasonality","incidentRate"])
-[#]  y      visitors  wkends  seasonality  incidentRate
-=======================================================
-[0]   93.0     416.0     0.0  0.006103106          28.0
-[1]   82.0     393.0     0.0  0.005381233          28.0
-[2]  109.0     444.0     0.0  0.007153103          28.0
-[3]  110.0     445.0     0.0  0.007218727          28.0
-[4]  109.0     426.0     1.0  0.007153103          28.0
-[5]   84.0     435.0     1.0  0.005512483          28.0
-[6]  100.0     471.0     0.0  0.006562479          29.0
-[7]   91.0     397.0     0.0  0.005971856          29.0
-[8]  119.0     454.0     0.0  0.007809351          29.0
-[9]   78.0     416.0     0.0  0.005118734          29.0
+>>> frame.inspect(columns=["CO_GT","C6H6_GT","PT08_S2_NMHC","T"])
+[#]  CO_GT  C6H6_GT  PT08_S2_NMHC  T
+=======================================
+[0]    2.6     11.9        1046.0  13.6
+[1]    2.0      9.4         955.0  13.3
+[2]    2.2      9.0         939.0  11.9
+[3]    2.2      9.2         948.0  11.0
+[4]    1.6      6.5         836.0  11.2
+[5]    1.2      4.7         750.0  11.2
+[6]    1.2      3.6         690.0  11.3
+[7]    1.0      3.3         672.0  10.7
+[8]    0.9      2.3         609.0  10.7
+[9]    0.6      1.7         561.0  10.3
 
 >>> model = ta.ArimaxModel()
 <progress>
 
->>> train_output = model.train(frame, "y", ["visitors", "wkends", "seasonality", "incidentRate"], 2, 1, 2, 1, False, False)
+>>> train_output = model.train(frame, "CO_GT", ["C6H6_GT","PT08_S2_NMHC","T"],  2, 2, 1, 0, True, True)
 <progress>
 
 >>> train_output
-{u'ar': [0.20121370919725473, -1.5250755427453089],
- u'c': -0.21691372069135137,
- u'ma': [-0.5196476378812525, 0.5693147472736498],
- u'xreg': [-0.10772949765182113,
-  -12.376859507565877,
-  11429596.052220736,
-  2.003736903435578]}
-
->>> test_frame = ta.Frame(ta.UploadRows([[100.0, 465.0, 1.0, 0.006562479, 24.0, 1.0, 0.0, 51.0],
-...                                  [98.0, 453.0, 1.0, 0.00643123, 24.0, 0.0, 1.0, 54.0],
-...                                  [102.0, 472.0, 0.0, 0.006693729, 25.0, 0.0, 0.0, 49.0],
-...                                  [98.0, 454.0, 0.0, 0.00643123, 25.0, 0.0, 0.0, 46.0],
-...                                  [112.0, 432.0, 0.0, 0.007349977, 25.0, 0.0, 0.0, 42.0],
-...                                  [99.0, 431.0, 0.0, 0.006496855, 25.0, 0.0, 0.0, 41.0],
-...                                  [99.0, 475.0, 0.0, 0.006496855, 25.0, 0.0, 0.0, 45.0],
-...                                  [87.0, 393.0, 1.0, 0.005709357, 25.0, 0.0, 0.0, 46.0],
-...                                  [103.0, 437.0, 1.0, 0.006759354, 25.0, 0.0, 0.0, 48.0],
-...                                  [115.0, 537.0, 0.0, 0.007546851, 23.0, 0.0, 0.0, 41.0]],
-...                                  schema=schema))
-
-
-
->>> predicted_frame = model.predict(test_frame, "y", ["visitors", "wkends", "seasonality", "incidentRate"])
+{u'ar': [-0.6876349849133049, -0.33038065385185783],
+ u'c': -0.9075493080767927,
+ u'ma': [-1.283039752947022],
+ u'xreg': [-1.0326823408073342, 0.08721820267076823, -1.8741776454756058]}
+
+
+>>> test_frame = ta.Frame(ta.UploadRows([[3.9, 19.3, 1277.0, 15.1],
+...                                      [3.7, 18.2, 1246.0, 14.4],
+...                                      [6.6, 32.6, 1610.0, 12.9],
+...                                      [4.4, 20.1, 1299.0, 12.1],
+...                                      [3.5, 14.3, 1127.0, 11.0],
+...                                      [5.4, 21.8, 1346.0, 9.7],
+...                                      [2.7, 9.6, 964.0, 9.5],
+...                                      [1.9, 7.4, 873.0, 9.1],
+...                                      [1.6, 5.4, 782.0, 8.8],
+...                                      [1.7, 5.4, 783.0, 7.8]],
+...                                      schema=schema))
+
+
+
+>>> predicted_frame = model.predict(test_frame, "CO_GT", ["C6H6_GT","PT08_S2_NMHC","T"])
 <progress>
 
 >>> predicted_frame.column_names
-[u'y', u'visitors', u'wkends', u'seasonality', u'incidentRate', u'holidayFlag', u'postHolidayFlag', u'min_temp', u'predicted_y']
+[u'CO_GT', u'C6H6_GT', u'PT08_S2_NMHC', u'T', u'predicted_y']
 
->>> predicted_frame.inspect(columns=("y","predicted_y"))
-[#]  y      predicted_y
+>>> predicted_frame.inspect(columns=("CO_GT","predicted_y"))
+[#]  CO_GT  predicted_y
 =========================
-[0]  100.0  104.813706372
-[1]   98.0  104.126348745
-[2]  102.0  102.225824121
-[3]   98.0  102.499768211
-[4]  112.0   102.05793437
-[5]   99.0  102.208895499
-[6]   99.0  102.087246933
-[7]   87.0  102.144009625
-[8]  103.0  102.107219659
-[9]  115.0  102.126621339
+[0]    3.9  1.47994006475
+[1]    3.7  6.77881520875
+[2]    6.6  6.16894546356
+[3]    4.4  7.45349002663
+[4]    3.5  8.85479025637
+[5]    5.4  6.58078264909
+[6]    2.7  6.26275769839
+[7]    1.9  4.71901417682
+[8]    1.6  3.77627384099
+[9]    1.7  1.91766708341
+
 
 >>> model.publish()
 <progress>
@@ -126,5 +158,5 @@ The 'score' value contains an array of predicted y values.
 
 <skip>
 >>> r.text
-u'{"data":[{"y":[100.0,98.0,102.0],"x_values":[465.0,453.0,472.0,1.0,1.0,1.0,0.006562479,0.00643123,0.006693729,24.0,24.0,25.0],"score":[104.813706372, 104.126348745, 102.225824121]}]}'
+u'{"data":[{"y":[3.9,3.7,6.6],"x_values":[19.3,18.2,32.6,1277.0,1246.0,1610.0,15.1,14.4,12.9],"score":[1.47994006475, 6.77881520875, 6.16894546356]}]}'
 </skip>
diff --git a/integration-tests/tests/model_arimax_test.py b/integration-tests/tests/model_arimax_test.py
@@ -28,61 +28,6 @@
 ta.connect()
 
 class ModelArimaxTest(unittest.TestCase):
-    def test_arimax_with_lag(self):
-        print "define csv file"
-        schema = [("y", ta.float64),("visitors", ta.float64),("wkends", ta.float64),("seasonality", ta.float64),("incidentRate", ta.float64), ("holidayFlag", ta.float64),("postHolidayFlag", ta.float64),("mintemp", ta.float64)]
-        csv = ta.CsvFile("/datasets/arx_train.csv", schema=schema, skip_header_lines=1)
-
-        print "create training frame"
-        train_frame = ta.Frame(csv)
-
-        print "Initializing a ArimaxModel object"
-        arimax = ta.ArimaxModel()
-
-        print "Training the model on the Frame"
-        coefficients = arimax.train(train_frame, "y", ["visitors","wkends","seasonality","incidentRate","holidayFlag","postHolidayFlag","mintemp"], 1, 1, 1, 1, False)
-
-        expected_coefficients = [{u'ar': [-0.017383421475889338],
-                                  u'c': 0.12592884652020608,
-                                  u'ma': [-0.9175172681125372],
-                                  u'xreg': [0.026948785665512696,
-                                            -0.27509641527217865,
-                                            -30.04399435207178,
-                                            0.23517811763216095,
-                                            6.6167555198084225,
-                                            0.8706683776904101,
-                                            0.20954216832984773]}]
-
-        self.assertEqual(coefficients, expected_coefficients)
-
-
-        print "create test frame"
-        csv2 = ta.CsvFile("/datasets/arx_test.csv", schema=schema, skip_header_lines=1)
-        test_frame = ta.Frame(csv2)
-
-        print "Predicting on the Frame"
-        p = arimax.predict(test_frame, "y", ["visitors","wkends","seasonality","incidentRate","holidayFlag","postHolidayFlag","mintemp"])
-        self.assertEqual(p.column_names, ["y","visitors","wkends","seasonality","incidentRate","holidayFlag","postHolidayFlag","mintemp","predicted_y"])
-
-        expected_results = [[[107.08170082770327],
-                             [106.14721021492875],
-                             [105.66075720064653],
-                             [104.95697359162283],
-                             [104.88559443691666],
-                             [105.85093277068711],
-                             [106.1696229024144],
-                             [106.70909616071428],
-                             [105.35885193790156],
-                             [105.71779481717215],
-                             [107.09473701831531],
-                             [106.14901905655587],
-                             [107.12955639032205],
-                             [107.4479823114264],
-                             [107.56837582595121]]]
-
-        self.assertEqual(expected_results, p.take(p.row_count, 1, "predicted_y"))
-
-
     def test_arimax_air_quality(self):
         # Data from Lichman, M. (2013). UCI Machine Learning Repository [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California, School of Information and Computer Science.
         print "Define csv file"