In [2]:
from snowflake.snowpark import Session
# Define - connection_parameters as mentioned in creating session  
session = Session.builder.configs(connection_parameters).create()

In [3]:
# To join DataFrame objects, call the join method:
lhs_df = session.create_dataframe([[1,"snow"],[2,"flake"]], schema= ["key","value1"])
lhs_df.show()

--------------------
|"KEY"  |"VALUE1"  |
--------------------
|1      |snow      |
|2      |flake     |
--------------------



In [4]:
rhs_df = session.create_dataframe([[1,"AWS"],[2,"Azure"],[3,"GCP"]], schema = ["key", "value2"])
rhs_df.show()

--------------------
|"KEY"  |"VALUE2"  |
--------------------
|1      |AWS       |
|2      |Azure     |
|3      |GCP       |
--------------------



In [5]:
df_joined = lhs_df.join(rhs_df, lhs_df.col("key") == rhs_df.col("key"))
df_joined.show()

-----------------------------------------------------
|"l_zdof_KEY"  |"VALUE1"  |"r_368e_KEY"  |"VALUE2"  |
-----------------------------------------------------
|1             |snow      |1             |AWS       |
|2             |flake     |2             |Azure     |
-----------------------------------------------------



In [6]:
joined_df = lhs_df.join(rhs_df, lhs_df.col("key") == rhs_df.col("key")).select(lhs_df["key"].as_("key"),"value1","value2")
joined_df.show()

-------------------------------
|"KEY"  |"VALUE1"  |"VALUE2"  |
-------------------------------
|1      |snow      |AWS       |
|2      |flake     |Azure     |
-------------------------------



In [7]:
# If both DataFrames have the same column to join on, we can use the following example syntax:
lhs_df.join(rhs_df,["key"]).show()


-------------------------------
|"KEY"  |"VALUE1"  |"VALUE2"  |
-------------------------------
|1      |snow      |AWS       |
|2      |flake     |Azure     |
-------------------------------



In [11]:
#  The & operator to connect join expressions:
df = lhs_df.join(rhs_df,(lhs_df["key"] == rhs_df["key"]) & (lhs_df["key"] < 2))
df.show()

-----------------------------------------------------
|"l_awka_KEY"  |"VALUE1"  |"r_02hy_KEY"  |"VALUE2"  |
-----------------------------------------------------
|1             |snow      |1             |AWS       |
-----------------------------------------------------



In [12]:
df = lhs_df.join(rhs_df,(lhs_df["key"] == rhs_df["key"]) & (lhs_df["key"] < 2)).select(lhs_df["key"].as_("key"), "value1", "value2")
df.show()

-------------------------------
|"KEY"  |"VALUE1"  |"VALUE2"  |
-------------------------------
|1      |snow      |AWS       |
-------------------------------



In [13]:
# self-join
from copy import copy
df_lhs_copied = copy(lhs_df)
joined_df = lhs_df.join(df_lhs_copied, ["key"])
joined_df.show()

---------------------------------------------
|"KEY"  |"l_udf8_VALUE1"  |"r_3tlm_VALUE1"  |
---------------------------------------------
|1      |snow             |snow             |
|2      |flake            |flake            |
---------------------------------------------



In [14]:
#Can't join this way
copied_lhs_df = lhs_df
lhs_df.join(copied_lhs_df, ["key"]).show()

SnowparkJoinException: (1103): You cannot join a DataFrame with itself because the column references cannot be resolved correctly. Instead, create a copy of the DataFrame with copy.copy(), and join the DataFrame with this copy.

In [16]:
# Rename overlapping columns using Column.alias:
joined_df = lhs_df.join(df_lhs_copied, ["key"]).select("key", lhs_df.col("value1").alias("value1"), df_lhs_copied["value1"].as_("value2"))
joined_df.show()

-------------------------------
|"KEY"  |"VALUE1"  |"VALUE2"  |
-------------------------------
|1      |snow      |snow      |
|2      |flake     |flake     |
-------------------------------



In [18]:
# Specify a suffix to append to the overlapping columns:
joined_df = lhs_df.join(df_lhs_copied, ["key"], lsuffix="_left", rsuffix="_right")
joined_df.show()


------------------------------------------
|"KEY"  |"VALUE1_LEFT"  |"VALUE1_RIGHT"  |
------------------------------------------
|1      |snow           |snow            |
|2      |flake          |flake           |
------------------------------------------



In [None]:
session.close()