In [0]:
import pytest
from pyspark.sql.functions import col, lit

@pytest.fixture
def sample_data(spark_session):
    data = [("John", 30, "Engineer"), ("Alice", 25, "Doctor")]
    columns = ["Name", "Age", "Profession"]
    return spark_session.createDataFrame(data, columns)

def test_read_csv_remove_row(spark_session):
    path = "test.csv"
    test_df = ReadWrite.read_csv_remove_row(spark_session, path)
    assert test_df.count() == 1

def test_schema_apply_change_col(spark_session, sample_data):
    df_schema = sample_data.schema
    clm_rename_dic = {"Name": "Full_Name", "Age": "Years", "Profession": "Job"}
    result_df = ReadWrite.schema_apply_change_col(sample_data, df_schema, clm_rename_dic)
    assert sorted(result_df.columns) == sorted(["Full_Name", "Years", "Job"])

def test_result_dfs(sample_data):
    merge_col = ["Name", "Age"]
    result_df = sample_data.withColumn("load_date", lit("2024-03-20"))
    null_df, not_null_df = ReadWrite.result_dfs(merge_col, result_df)
    assert null_df.count() == 0
    assert not_null_df.count() == 2

def test_add_load_date(sample_data):
    schedule_date = "2024-03-20"
    result_df = ReadWrite.add_load_date(sample_data, schedule_date)
    assert "load_date" in result_df.columns
    assert result_df.filter(col("load_date") == lit(schedule_date)).count() == sample_data.count()

def test_get_schema_rename_clm():
    with pytest.raises(Exception) as excinfo:
        ReadWrite.get_schema_rename_clm("non_existing_table")
    assert "Given Table name not found" in str(excinfo.value)
