In [None]:
from pyspark.sql import types
from pyspark.sql.functions import col
import ts.flint
from ts.flint import FlintContext
from ts.flint import summarizers

In [None]:
fc = FlintContext(sqlContext)

In [None]:
def date_parser(fmt):
    @ts.flint.udf(types.LongType())
    def parse(x):
        dt = types.datetime.datetime.strptime(str(x), fmt)
        return int(dt.strftime("%s%f")) * 1000
    return parse

In [None]:
weather = (sqlContext.read.csv('weather.csv', header=True, inferSchema=True)
           .withColumn('time', date_parser('%Y%m%d')(col('DATE'))))
spy = (sqlContext.read.csv('spy.csv', header=True, inferSchema=True)
       .withColumn('time', date_parser('%Y-%m-%d %H:%M:%S')(col('DATE'))))

In [None]:
weather_df = fc.read.dataframe(weather, is_sorted=False)
spy_df = fc.read.dataframe(spy, is_sorted=False)

In [None]:
joined = spy_df.leftJoin(weather_df, tolerance="3d")
joined = joined.withColumn('change', joined.Close - joined.Open)

In [None]:
joined.summarize(summarizers.linear_regression('change', ['PRCP', 'SNOW'])).toPandas()