In [71]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, year, month, dayofmonth, hour, weekofyear
from pyspark.ml.feature import StringIndexer, VectorAssembler
from pyspark.ml.regression import LinearRegression, RandomForestRegressor, LinearRegressionModel
from pyspark.ml.evaluation import RegressionEvaluator
from pyspark.ml import Pipeline


In [72]:
# Task 1: Explore the Data and Develop the Model in Spark Shell

# 1.1 Read Dataset in Spark`
spark = SparkSession.builder.appName("Bike Sharing Demand Forecast").getOrCreate()
train_df = spark.read.csv('hdfs://nameservice1/user/vikbrleey38pedu/train.csv', header=True, inferSchema=True)
test_df = spark.read.csv('hdfs://nameservice1/user/vikbrleey38pedu/test.csv', header=True, inferSchema=True)

In [73]:
# 1.2 Get Summary of Data and Variable Types
train_df.describe().show()
train_df.printSchema()

+-------+------------------+-------------------+------------------+------------------+------------------+-----------------+------------------+------------------+-----------------+------------------+------------------+
|summary|            season|            holiday|        workingday|           weather|              temp|            atemp|          humidity|         windspeed|           casual|        registered|             count|
+-------+------------------+-------------------+------------------+------------------+------------------+-----------------+------------------+------------------+-----------------+------------------+------------------+
|  count|             10886|              10886|             10886|             10886|             10886|            10886|             10886|             10886|            10886|             10886|             10886|
|   mean|2.5066139996325556|0.02856880396839978|0.6808745177291935| 1.418427337865148|20.230859819952173|23.65508405291192| 61.8

In [74]:
# 1.3 Process Data
indexer_season = StringIndexer(inputCol="season", outputCol="season_indexed")
indexer_weather = StringIndexer(inputCol="weather", outputCol="weather_indexed")
train_df = indexer_season.fit(train_df).transform(train_df)
train_df = indexer_weather.fit(train_df).transform(train_df)
train_df = train_df.drop("season", "weather")
#train_df = train_df.na.fill({"some_column": "default_value"})
train_df = train_df.withColumn("hour", hour(col("datetime")))
train_df = train_df.withColumn("day", dayofmonth(col("datetime")))
train_df = train_df.withColumn("month", month(col("datetime")))
train_df = train_df.withColumn("year", year(col("datetime")))
train_df = train_df.drop("datetime")


In [75]:
# 1.4 Data Exploration
train_df.groupBy("hour").agg({"count": "mean"}).show()

+----+------------------+
|hour|        avg(count)|
+----+------------------+
|  12|256.50877192982455|
|  22| 133.5767543859649|
|   1|33.859030837004404|
|  13| 257.7872807017544|
|   6| 76.25934065934067|
|  16|316.37280701754383|
|   3|11.757505773672056|
|  20|228.51754385964912|
|   5| 19.76769911504425|
|  19| 315.2785087719298|
|  15| 254.2982456140351|
|   9|221.78021978021977|
|  17|  468.765350877193|
|   4| 6.407239819004525|
|   8| 362.7692307692308|
|  23| 89.50877192982456|
|   7|213.11648351648353|
|  10|175.09230769230768|
|  21|173.37061403508773|
|  11|210.67472527472526|
+----+------------------+
only showing top 20 rows



In [28]:
train_df.show()

+-------+----------+-----+------+--------+---------+------+----------+-----+--------------+---------------+----+---+-----+----+
|holiday|workingday| temp| atemp|humidity|windspeed|casual|registered|count|season_indexed|weather_indexed|hour|day|month|year|
+-------+----------+-----+------+--------+---------+------+----------+-----+--------------+---------------+----+---+-----+----+
|      0|         0| 9.84|14.395|      81|      0.0|     3|        13|   16|           3.0|            0.0|   0|  1|    1|2011|
|      0|         0| 9.02|13.635|      80|      0.0|     8|        32|   40|           3.0|            0.0|   1|  1|    1|2011|
|      0|         0| 9.02|13.635|      80|      0.0|     5|        27|   32|           3.0|            0.0|   2|  1|    1|2011|
|      0|         0| 9.84|14.395|      75|      0.0|     3|        10|   13|           3.0|            0.0|   3|  1|    1|2011|
|      0|         0| 9.84|14.395|      75|      0.0|     0|         1|    1|           3.0|            0

In [76]:
# Task 2: Model Development
feature_cols = train_df.columns
feature_cols.remove('count')
assembler = VectorAssembler(inputCols=feature_cols, outputCol="features")
train_data = assembler.transform(train_df)
train_data, test_data = train_data.randomSplit([0.8, 0.2])
lr = LinearRegression(featuresCol='features', labelCol='count')
lr_model = lr.fit(train_data)
predictions = lr_model.transform(test_data)
evaluator = RegressionEvaluator(labelCol="count", predictionCol="prediction", metricName="rmse")
rmse = evaluator.evaluate(predictions)
lr_model.write().overwrite().save("hdfs://nameservice1/user/vikbrleey38pedu/model")

In [77]:
from pyspark.sql import SparkSession
from pyspark.ml.regression import LinearRegressionModel

# Initialize Spark session
spark = SparkSession.builder.appName("Bike Sharing Demand Prediction").getOrCreate()

# Load the persisted model from HDFS
model = LinearRegressionModel.load("hdfs://nameservice1/user/vikbrleey38pedu/model")


In [78]:
model

LinearRegression_ccb9d0b99470

In [79]:
# Load input data (assuming it's in CSV format, replace 'path_to_input_data.csv' with your actual path)
input_df = spark.read.csv('hdfs://nameservice1/user/vikbrleey38pedu/test.csv', header=True, inferSchema=True)
input_df.show(1)

+-------------------+------+-------+----------+-------+-----+------+--------+---------+
|           datetime|season|holiday|workingday|weather| temp| atemp|humidity|windspeed|
+-------------------+------+-------+----------+-------+-----+------+--------+---------+
|2011-01-20 00:00:00|     1|      0|         1|      1|10.66|11.365|      56|  26.0027|
+-------------------+------+-------+----------+-------+-----+------+--------+---------+
only showing top 1 row



In [80]:
# Apply the same transformations as done during training
indexer_season = StringIndexer(inputCol="season", outputCol="season_indexed")
indexer_weather = StringIndexer(inputCol="weather", outputCol="weather_indexed")
input_df = indexer_season.fit(input_df).transform(input_df)
input_df = indexer_weather.fit(input_df).transform(input_df)
input_df = input_df.drop("season", "weather")
#input_df = input_df.na.fill({"some_column": "default_value"})
input_df = input_df.withColumn("hour", hour(col("datetime")))
input_df = input_df.withColumn("day", dayofmonth(col("datetime")))
input_df = input_df.withColumn("month", month(col("datetime")))
input_df = input_df.withColumn("year", year(col("datetime")))
input_df = input_df.drop("datetime")

In [94]:
input_df.show()

+-------+----------+-----+------+--------+---------+--------------+---------------+----+---+-----+----+--------------------+
|holiday|workingday| temp| atemp|humidity|windspeed|season_indexed|weather_indexed|hour|day|month|year|            features|
+-------+----------+-----+------+--------+---------+--------------+---------------+----+---+-----+----+--------------------+
|      0|         1|10.66|11.365|      56|  26.0027|           2.0|            0.0|   0| 20|    1|2011|[0.0,1.0,10.66,11...|
|      0|         1|10.66|13.635|      56|      0.0|           2.0|            0.0|   1| 20|    1|2011|[0.0,1.0,10.66,13...|
|      0|         1|10.66|13.635|      56|      0.0|           2.0|            0.0|   2| 20|    1|2011|[0.0,1.0,10.66,13...|
|      0|         1|10.66| 12.88|      56|  11.0014|           2.0|            0.0|   3| 20|    1|2011|[0.0,1.0,10.66,12...|
|      0|         1|10.66| 12.88|      56|  11.0014|           2.0|            0.0|   4| 20|    1|2011|[0.0,1.0,10.66,12...|


In [110]:
# Combine all features into a single vector column
feature_columns = ['holiday', 'workingday', 'temp', 'atemp', 'humidity', 'windspeed', 
                   'hour', 'day', 'month', 'year', 'season_indexed', 'weather_indexed']
assembler = VectorAssembler(inputCols=feature_columns, outputCol="features")
input_df = assembler.transform(input_df)

IllegalArgumentException: 'Output column features already exists.'

In [112]:
# Use the model to make predictions
predictions = model.transform(input_df)
predictions.printSchema()

# Assuming 'df' is your original DataFrame
selected_df = predictions.select("prediction")

# Show the first few rows of the selected DataFrame
selected_df.show()

#selected_df.limit(10).show()




# Define the path in HDFS where you want to save the predictions
#hdfs_path = "hdfs://nameservice1/user/vikbrleey38pedu/output"


# Write the predictions DataFrame to HDFS in Parquet format
#predictions.write.json(hdfs_path, mode="overwrite")




root
 |-- holiday: integer (nullable = true)
 |-- workingday: integer (nullable = true)
 |-- temp: double (nullable = true)
 |-- atemp: double (nullable = true)
 |-- humidity: integer (nullable = true)
 |-- windspeed: double (nullable = true)
 |-- season_indexed: double (nullable = false)
 |-- weather_indexed: double (nullable = false)
 |-- hour: integer (nullable = true)
 |-- day: integer (nullable = true)
 |-- month: integer (nullable = true)
 |-- year: integer (nullable = true)
 |-- features: vector (nullable = true)
 |-- prediction: double (nullable = false)



Py4JJavaError: An error occurred while calling o5382.showString.
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 180.0 failed 1 times, most recent failure: Lost task 0.0 in stage 180.0 (TID 521, localhost, executor driver): org.apache.spark.SparkException: Failed to execute user defined function($anonfun$1: (struct<type:tinyint,size:int,indices:array<int>,values:array<double>>) => double)
	at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)
	at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
	at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$11$$anon$1.hasNext(WholeStageCodegenExec.scala:619)
	at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:255)
	at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:247)
	at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:836)
	at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:836)
	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:121)
	at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:402)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:408)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
	at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.IllegalArgumentException: requirement failed: BLAS.dot(x: Vector, y:Vector) was given Vectors with non-matching sizes: x.size = 12, y.size = 14
	at scala.Predef$.require(Predef.scala:224)
	at org.apache.spark.ml.linalg.BLAS$.dot(BLAS.scala:104)
	at org.apache.spark.ml.regression.LinearRegressionModel.predict(LinearRegression.scala:707)
	at org.apache.spark.ml.regression.LinearRegressionModel.predict(LinearRegression.scala:644)
	at org.apache.spark.ml.PredictionModel$$anonfun$1.apply(Predictor.scala:215)
	at org.apache.spark.ml.PredictionModel$$anonfun$1.apply(Predictor.scala:214)
	... 21 more

Driver stacktrace:
	at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1887)
	at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1875)
	at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1874)
	at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
	at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
	at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1874)
	at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:926)
	at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:926)
	at scala.Option.foreach(Option.scala:257)
	at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:926)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2108)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2057)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2046)
	at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
	at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:737)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2061)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2082)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2101)
	at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:365)
	at org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:38)
	at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collectFromPlan(Dataset.scala:3384)
	at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2545)
	at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2545)
	at org.apache.spark.sql.Dataset$$anonfun$53.apply(Dataset.scala:3365)
	at org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:78)
	at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:125)
	at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:73)
	at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3364)
	at org.apache.spark.sql.Dataset.head(Dataset.scala:2545)
	at org.apache.spark.sql.Dataset.take(Dataset.scala:2759)
	at org.apache.spark.sql.Dataset.getRows(Dataset.scala:255)
	at org.apache.spark.sql.Dataset.showString(Dataset.scala:292)
	at sun.reflect.GeneratedMethodAccessor167.invoke(Unknown Source)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
	at py4j.Gateway.invoke(Gateway.java:282)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.GatewayConnection.run(GatewayConnection.java:238)
	at java.lang.Thread.run(Thread.java:748)
Caused by: org.apache.spark.SparkException: Failed to execute user defined function($anonfun$1: (struct<type:tinyint,size:int,indices:array<int>,values:array<double>>) => double)
	at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)
	at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
	at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$11$$anon$1.hasNext(WholeStageCodegenExec.scala:619)
	at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:255)
	at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:247)
	at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:836)
	at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:836)
	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:121)
	at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:402)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:408)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
	... 1 more
Caused by: java.lang.IllegalArgumentException: requirement failed: BLAS.dot(x: Vector, y:Vector) was given Vectors with non-matching sizes: x.size = 12, y.size = 14
	at scala.Predef$.require(Predef.scala:224)
	at org.apache.spark.ml.linalg.BLAS$.dot(BLAS.scala:104)
	at org.apache.spark.ml.regression.LinearRegressionModel.predict(LinearRegression.scala:707)
	at org.apache.spark.ml.regression.LinearRegressionModel.predict(LinearRegression.scala:644)
	at org.apache.spark.ml.PredictionModel$$anonfun$1.apply(Predictor.scala:215)
	at org.apache.spark.ml.PredictionModel$$anonfun$1.apply(Predictor.scala:214)
	... 21 more


In [57]:
# Define MySQL database connection properties
url = "jdbc:mysql://mysql://ip-10-1-1-204.ap-south-1.compute.internal/vikbrleey38pedu"
properties = {
    "user": "vikbrleey38pedu",
    "password": "PurpleMonkey64$",
    "driver": "/home/vikbrleey38pedu/mysql-connector-java-8.0.11.jar"
}

In [58]:
# Table to store predictions
table_name = "bike_demand_predictions"

# Save predictions to MySQL
predictions.write \
    .jdbc(url=url, table=table_name, mode="append", properties=properties)

Py4JJavaError: An error occurred while calling o2350.jdbc.
: java.lang.ClassNotFoundException: /home/vikbrleey38pedu/mysql-connector-java-8.0.11.jar
	at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
	at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
	at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
	at org.apache.spark.sql.execution.datasources.jdbc.DriverRegistry$.register(DriverRegistry.scala:45)
	at org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions$$anonfun$5.apply(JDBCOptions.scala:99)
	at org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions$$anonfun$5.apply(JDBCOptions.scala:99)
	at scala.Option.foreach(Option.scala:257)
	at org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions.<init>(JDBCOptions.scala:99)
	at org.apache.spark.sql.execution.datasources.jdbc.JdbcOptionsInWrite.<init>(JDBCOptions.scala:193)
	at org.apache.spark.sql.execution.datasources.jdbc.JdbcOptionsInWrite.<init>(JDBCOptions.scala:197)
	at org.apache.spark.sql.execution.datasources.jdbc.JdbcRelationProvider.createRelation(JdbcRelationProvider.scala:45)
	at org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:45)
	at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)
	at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)
	at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:86)
	at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131)
	at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:127)
	at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:155)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
	at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152)
	at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127)
	at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:80)
	at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:80)
	at org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:668)
	at org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:668)
	at org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:78)
	at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:125)
	at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:73)
	at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:668)
	at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:276)
	at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:270)
	at org.apache.spark.sql.DataFrameWriter.jdbc(DataFrameWriter.scala:506)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
	at py4j.Gateway.invoke(Gateway.java:282)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.GatewayConnection.run(GatewayConnection.java:238)
	at java.lang.Thread.run(Thread.java:748)
