In [1]:
import unittest
import os
import csv
from pyspark.sql import SparkSession

In [2]:
class TestDados(unittest.TestCase):
    def setUp(self):
        self.spark = SparkSession.builder \
            .appName("Extract and Load Test") \
            .master("spark://spark-master:7077") \
            .config("spark.jars", "/opt/bitnami/spark/jars/mysql-connector-j-8.0.33.jar") \
            .config("spark.log.level", "OFF") \
            .getOrCreate()
        self.jdbc_url = "jdbc:mysql://mysql:3306/desafio?useUnicode=true&characterEncoding=UTF-8&useLegacyDatetimeCode=false&serverTimezone=UTC"
        self.jdbc_properties = {
            "user": "sparkuser",
            "password": "sparkpass",
            "driver": "com.mysql.cj.jdbc.Driver",
            "useUnicode": "true",
            "characterEncoding": "UTF-8"
        }

    def tearDown(self):
        if self.spark is not None:
            self.spark.stop()
            self.spark = None
           
    def test_associado_existe(self):
        query = "(SELECT 1 FROM information_schema.tables WHERE table_schema = 'desafio' AND table_name = 'associado') AS t"
        df = self.spark.read.jdbc(self.jdbc_url, query, properties=self.jdbc_properties)
        self.assertEqual(df.count(), 1)

    def test_conta_existe(self):
        query = "(SELECT 1 FROM information_schema.tables WHERE table_schema = 'desafio' AND table_name = 'conta') AS t"
        df = self.spark.read.jdbc(self.jdbc_url, query, properties=self.jdbc_properties)
        self.assertEqual(df.count(), 1)

    def test_cartao_existe(self):
        query = "(SELECT 1 FROM information_schema.tables WHERE table_schema = 'desafio' AND table_name = 'cartao') AS t"
        df = self.spark.read.jdbc(self.jdbc_url, query, properties=self.jdbc_properties)
        self.assertEqual(df.count(), 1)

    def test_movimento_existe(self):
        query = "(SELECT 1 FROM information_schema.tables WHERE table_schema = 'desafio' AND table_name = 'movimento') AS t"
        df = self.spark.read.jdbc(self.jdbc_url, query, properties=self.jdbc_properties)
        self.assertEqual(df.count(), 1)

    def test_associado_dados(self):
        df = self.spark.read.jdbc(self.jdbc_url, "associado", properties=self.jdbc_properties)
        self.assertGreaterEqual(df.count(), 5)

    def test_conta_dados(self):
        df = self.spark.read.jdbc(self.jdbc_url, "conta", properties=self.jdbc_properties)
        self.assertGreaterEqual(df.count(), 5)

    def test_cartao_dados(self):
        df = self.spark.read.jdbc(self.jdbc_url, "cartao", properties=self.jdbc_properties)
        self.assertGreaterEqual(df.count(), 5)

    def test_movimento_dados(self):
        df = self.spark.read.jdbc(self.jdbc_url, "movimento", properties=self.jdbc_properties)
        self.assertGreaterEqual(df.count(), 5)

    def test_associado_colunas(self):
        df = self.spark.read.jdbc(self.jdbc_url, "associado", properties=self.jdbc_properties)
        colunas_esperadas = ["id", "nome", "sobrenome", "idade", "email"]
        self.assertEqual(sorted(df.columns), sorted(colunas_esperadas))

    def test_conta_colunas(self):
        df = self.spark.read.jdbc(self.jdbc_url, "conta", properties=self.jdbc_properties)
        colunas_esperadas = ["id", "tipo_conta", "data_criacao", "id_associado"]
        self.assertEqual(sorted(df.columns), sorted(colunas_esperadas))

    def test_cartao_colunas(self):
        df = self.spark.read.jdbc(self.jdbc_url, "cartao", properties=self.jdbc_properties)
        colunas_esperadas = ["id", "num_cartao", "nom_impresso", "id_associado", "id_conta"]
        self.assertEqual(sorted(df.columns), sorted(colunas_esperadas))

    def test_movimento_colunas(self):
        df = self.spark.read.jdbc(self.jdbc_url, "movimento", properties=self.jdbc_properties)
        colunas_esperadas = ["id", "vlr_transacao", "des_transacao", "data_movimento", "id_cartao"]
        self.assertEqual(sorted(df.columns), sorted(colunas_esperadas))

In [3]:
loader = unittest.TestLoader()
suite = loader.loadTestsFromTestCase(TestDados)
runner = unittest.TextTestRunner()
runner.run(suite)

25/03/29 15:28:53 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
Setting Spark log level to "OFF".
.Setting Spark log level to "OFF".
.Setting Spark log level to "OFF".                                              
.Setting Spark log level to "OFF".                                              
.Setting Spark log level to "OFF".
.Setting Spark log level to "OFF".                                              
.Setting Spark log level to "OFF".                                              
.Setting Spark log level to "OFF".
.Setting Spark log level to "OFF".                                              
.Setting Spark log level to "OFF".                                              
.Setting Spark log level to "OFF".
.Setting Spark log level to "OFF".                                 

<unittest.runner.TextTestResult run=12 errors=0 failures=0>