# Loading csv file from Amazon S3 into iguazio File system

In [None]:
!pip install v3io_frames

In [None]:
# change the password to your user password
%env V3IO_PASSWORD = 24tango

In [13]:
import pandas as pd
import v3io_frames as v3f
import os
client = v3f.Client('v3io-framesd:8081', password=os.getenv('V3IO_PASSWORD'))
tablename = 'bank'

## Import sample file from S3

In [None]:
%%sh
mkdir /v3io/bigdata/examples/
curl -L "deutsche-boerse-xetra-pds.s3.amazonaws.com/2018-03-26/2018-03-26_BINS_XETR07.csv" > /v3io/bigdata/examples/stocks_example.csv

## Read the file using v3io frames

In [14]:
# read S3 file into a data frame and show its data & metadata
df = pd.read_csv('/v3io/bigdata/examples/stocks_example.csv', sep=',')
df.head()

Unnamed: 0,ISIN,Mnemonic,SecurityDesc,SecurityType,Currency,SecurityID,Date,Time,StartPrice,MaxPrice,MinPrice,EndPrice,TradedVolume,NumberOfTrades
0,AT0000A0E9W5,SANT,S+T AG (Z.REG.MK.Z.)O.N.,Common stock,EUR,2504159,2018-03-26,07:00,20.56,20.56,20.56,20.56,1115,5
1,DE000A0WMPJ6,AIXA,AIXTRON SE NA O.N.,Common stock,EUR,2504428,2018-03-26,07:00,17.035,17.08,16.92,16.98,2892,11
2,DE000A0Z2XN6,RIB,RIB SOFTWARE SE NA EO 1,Common stock,EUR,2504436,2018-03-26,07:00,24.02,24.18,23.94,24.12,5721,34
3,DE000A0Z2ZZ5,FNTN,FREENET AG NA O.N.,Common stock,EUR,2504438,2018-03-26,07:00,24.72,24.72,24.7,24.72,315,2
4,DE000A1EWWW0,ADS,ADIDAS AG NA O.N.,Common stock,EUR,2504471,2018-03-26,07:00,196.35,196.4,195.6,195.9,5616,27


## Write file into iguazio database as key value table using v3io frames

In [16]:
tablename = 'examples/stocks_example_tab'
out = client.write('kv', tablename, df)

## Read and write the file using Spark DF

In [None]:
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName("Iguazio Integration demo").getOrCreate()

# Read the sample stocks.csv file into a Spark DataFrame, and let Spark infer the schema of the CSV file
myDF = spark.read.format("io.iguaz.v3io.spark.sql.kv").option("header", "true").option("inferSchema", "true").csv("v3io://bigdata/examples/stocks_example.csv")

# Write the DataFrame data to a stocks_nosql table under "bigdata" container and define "ISIN" column as a key
myDF.write.format("io.iguaz.v3io.spark.sql.kv").mode("append").option("key", "ISIN").save("v3io://bigdata/examples/stocks_tab_by_spark/")

## Read iguazio table and writing it back as a CSV 

In [None]:
myDF2 = spark.read.format("io.iguaz.v3io.spark.sql.kv").load("v3io://bigdata/examples/stocks_tab_by_spark").where("TradedVolume>20000")

# myDF2.write.csv('v3io://bigdata/examples/stocks_high_volume.csv')
myDF2.coalesce(1).write.csv('v3io://bigdata/examples/stocks_high_volume.csv')

# note that using coalesce(1) is for storing the output as a single file


## Viewing files 

In [17]:
!ls -l /v3io/bigdata/examples/

total 0
-rw-rw-r--. 1 50 users   999016 Dec 19 21:24 stocks.csv
-rw-rw-r--. 1 50 users   999016 Dec 19 15:39 stocks_example.csv
drwxrwxrwx. 2 50 nogroup      0 Dec 20 09:48 stocks_example_tab
drwxrwsrwx. 2 50 nogroup      0 Dec 19 21:25 stocks_tab
drwxrwsr-x. 2 50 nogroup      0 Dec 19 21:25 stocks_tab.parquet


## Remove files

In [None]:
!rm -rf /v3io/bigdata/examples/stock*