#### Internal Stage: PUT/GET Files

In [6]:
## Create Snowpark Dataframe by directly reading a table
sf_df = snpark_conn.table("SNOWFLAKE_SAMPLE_DATA.TPCH_SF10.SUPPLIER").filter(col("S_SUPPKEY") < 200)
sf_df = sf_df.select('S_NAME','S_ACCTBAL','S_SUPPKEY')
sf_df.show(3)

--------------------------------------------------
|"S_NAME"            |"S_ACCTBAL"  |"S_SUPPKEY"  |
--------------------------------------------------
|Supplier#000000001  |5755.94      |1            |
|Supplier#000000002  |4032.68      |2            |
|Supplier#000000003  |4192.40      |3            |
--------------------------------------------------



In [12]:
##### Create Internal Named Stage
snpark_conn.sql("CREATE OR REPLACE STAGE MY_STAGE file_format = (type = 'CSV' FIELD_DELIMITER = ',' SKIP_HEADER = 1)").collect()


[Row(status='Stage area MY_STAGE successfully created.')]

In [13]:
## Write Snowpark DataFrame to Internal named stage as CSV file
csv_file_path = f"@my_stage/supplier_v2.csv"
_ = sf_df.write.copy_into_location(csv_file_path, file_format_type="csv", header=False, overwrite=True, single=True)

In [15]:
#Verify file in internal state
snpark_conn.sql("LIST @MY_STAGE").collect()

[Row(name='my_stage/supplier_v2.csv', size=1648, md5='73fe4a1fd8438bfcf594537bfe859748', last_modified='Thu, 14 Dec 2023 23:56:43 GMT')]

In [8]:
## Write Snowpark DataFrame to Internal named stage as Parquet file
par_file_path = f"@my_stage/supplier_v2.parquet"
_ = sf_df.write.copy_into_location(par_file_path, file_format_type="parquet", header=True, overwrite=True, single=True)

##### Verify file in internal state from Snowflake UI
LIST @my_stage pattern='.*_v2.*';

In [9]:
## Get file from internal stage to local file system
get_result = snpark_conn.file.get("@my_stage/supplier_v2.csv","file://C:\\temp\\supplier_v2.csv")

In [10]:
## Create Dataframe from csv file stored in internal stage
from snowflake.snowpark.functions import asc, desc, avg, sum, col, lit
from snowflake.snowpark.types import DataType, StructType, StructField, IntegerType, StringType, FloatType, LongType, DecimalType

df_schema = StructType([
                        StructField("S_NAME", StringType()),
                        StructField("S_ACCTBAL", DecimalType()),
                        StructField("S_SUPPKEY", LongType())
                    ])
df_reader = snpark_conn.read.schema(df_schema)
df_reader = df_reader.option("field_delimiter", ",")
df_csv = df_reader.csv("@my_stage/supplier_v2.csv")
df_csv.show(3)

--------------------------------------------------
|"S_NAME"            |"S_ACCTBAL"  |"S_SUPPKEY"  |
--------------------------------------------------
|Supplier#000000001  |5756         |1            |
|Supplier#000000002  |4033         |2            |
|Supplier#000000003  |4192         |3            |
--------------------------------------------------



#### Create simple UDF

In [16]:
##################################################################
## Define the function for the UDF

def multiply_by_three(input_int_py: int):
  return input_int_py*3

##################################################################
## Register UDF in Snowflake

### Add packages and data types
from snowflake.snowpark.types import IntegerType

### Upload UDF to Snowflake
snpark_conn.udf.register(
    func = multiply_by_three
  , return_type = IntegerType()
  , input_types = [IntegerType()]
  , is_permanent = True
  , name = 'SNOWPARK_MULTIPLY_INTEGER_BY_THREE'
  , replace = True
  , stage_location = '@my_stage'
)

<snowflake.snowpark.udf.UserDefinedFunction at 0x276530f9b50>

In [19]:
snpark_conn.sql("SELECT SNOWPARK_MULTIPLY_INTEGER_BY_THREE(9)").show()

-------------------------------------------
|"SNOWPARK_MULTIPLY_INTEGER_BY_THREE(9)"  |
-------------------------------------------
|27                                       |
-------------------------------------------



#### Import Supported Third-Party Libraries

In [23]:
##################################################################
## Define the function for the UDF

### Import the required modules 
from faker import Faker

### Define main function which generates a fake name
def generate_fake_name():
  fake = Faker()
  return fake.name()

##################################################################
## Register UDF in Snowflake

### Add packages and data types
from snowflake.snowpark.types import StringType
snpark_conn.add_packages('faker')

### Upload UDF to Snowflake
snpark_conn.udf.register(
    func = generate_fake_name
  , return_type = StringType()
  , input_types = []
  , is_permanent = True
  , name = 'SNOWPARK_GENERATE_FAKE_NAME'
  , replace = True
  , stage_location = '@my_stage'
)

##Installation Steps
#1. Did not install 'faker' package on local environment.

##Warning / Call to Function failed
#Package 'faker' is not installed in the local environment. Your UDF might not 
#work when the package is installed on the server but not on your local environment.

#2. This time version is different in local env. vs server

##Warning / Call to Function completed successfully
#The version of package 'faker' in the local environment is 21.0.0, which does not 
#fit the criteria for the requirement 'faker'. Your UDF might not work when the package version is different between the server and your local environment.


The version of package 'faker' in the local environment is 21.0.0, which does not fit the criteria for the requirement 'faker'. Your UDF might not work when the package version is different between the server and your local environment.


<snowflake.snowpark.udf.UserDefinedFunction at 0x276547c8910>

In [24]:
snpark_conn.sql("SELECT SNOWPARK_GENERATE_FAKE_NAME()").show()

-----------------------------------
|"SNOWPARK_GENERATE_FAKE_NAME()"  |
-----------------------------------
|Bill Olson                       |
-----------------------------------

