In [21]:
from pyspark.sql import SparkSession
from pyspark.sql.types import *

In [22]:
spark = SparkSession.builder.getOrCreate()

In [23]:
data_path = '/home/antoniojose/python_workspace'

In [24]:
schema = StructType([
    StructField("ID", IntegerType(), True),
    StructField("Nome", StringType(), True),
    StructField("Email", StringType(), True),
    StructField("Telefone", StringType(), True),
    StructField("Cidade", StringType(), True),
    StructField("Aniversario", StringType(), True),
    StructField("Sexo", StringType(), True)
])

In [25]:
file_path_csv = data_path + '/data/pessoas.csv'
people_csv = spark.read.csv(file_path_csv, header=True, schema=schema)

In [26]:
people_csv.createOrReplaceTempView('people')

In [27]:
people_csv.show()

+---+-----------------+------------------+---------+--------------+-----------+---------+
| ID|             Nome|             Email| Telefone|        Cidade|Aniversario|     Sexo|
+---+-----------------+------------------+---------+--------------+-----------+---------+
|  1|       João Silva|    joao@email.com|123456789|     São Paulo| 01/01/1990|Masculino|
|  2|   Maria Oliveira|   maria@email.com|987654321|Rio de Janeiro| 15/05/1985| Feminino|
|  3|     Carlos Souza|  carlos@email.com|555555555|Belo Horizonte| 10/11/1982|Masculino|
|  4|       Ana Santos|     ana@email.com|999888777|      Salvador| 20/03/1995| Feminino|
|  5|      Pedro Costa|   pedro@email.com|111222333|      Brasília| 05/09/1978|Masculino|
|  6| Fernanda Pereira|fernanda@email.com|777888999|        Recife| 25/07/1989| Feminino|
|  7|  Thiago Oliveira|  thiago@email.com|444333222|      Curitiba| 30/12/1980|Masculino|
|  8|    Gabriela Lima|gabriela@email.com|666777888|     Fortaleza| 12/06/1992| Feminino|
|  9|Ricar

In [28]:
select_all = spark.sql('SELECT * FROM people')
select_all.show()

+---+-----------------+------------------+---------+--------------+-----------+---------+
| ID|             Nome|             Email| Telefone|        Cidade|Aniversario|     Sexo|
+---+-----------------+------------------+---------+--------------+-----------+---------+
|  1|       João Silva|    joao@email.com|123456789|     São Paulo| 01/01/1990|Masculino|
|  2|   Maria Oliveira|   maria@email.com|987654321|Rio de Janeiro| 15/05/1985| Feminino|
|  3|     Carlos Souza|  carlos@email.com|555555555|Belo Horizonte| 10/11/1982|Masculino|
|  4|       Ana Santos|     ana@email.com|999888777|      Salvador| 20/03/1995| Feminino|
|  5|      Pedro Costa|   pedro@email.com|111222333|      Brasília| 05/09/1978|Masculino|
|  6| Fernanda Pereira|fernanda@email.com|777888999|        Recife| 25/07/1989| Feminino|
|  7|  Thiago Oliveira|  thiago@email.com|444333222|      Curitiba| 30/12/1980|Masculino|
|  8|    Gabriela Lima|gabriela@email.com|666777888|     Fortaleza| 12/06/1992| Feminino|
|  9|Ricar

In [29]:
select_all_ordering = spark.sql('SELECT * FROM people ORDER BY Nome ASC')
select_all_ordering.show()

+---+-----------------+------------------+---------+--------------+-----------+---------+
| ID|             Nome|             Email| Telefone|        Cidade|Aniversario|     Sexo|
+---+-----------------+------------------+---------+--------------+-----------+---------+
|  4|       Ana Santos|     ana@email.com|999888777|      Salvador| 20/03/1995| Feminino|
|  3|     Carlos Souza|  carlos@email.com|555555555|Belo Horizonte| 10/11/1982|Masculino|
|  6| Fernanda Pereira|fernanda@email.com|777888999|        Recife| 25/07/1989| Feminino|
|  8|    Gabriela Lima|gabriela@email.com|666777888|     Fortaleza| 12/06/1992| Feminino|
|  1|       João Silva|    joao@email.com|123456789|     São Paulo| 01/01/1990|Masculino|
|  2|   Maria Oliveira|   maria@email.com|987654321|Rio de Janeiro| 15/05/1985| Feminino|
| 10|   Marina Almeida|  marina@email.com|333444555|        Manaus| 18/10/1983| Feminino|
|  5|      Pedro Costa|   pedro@email.com|111222333|      Brasília| 05/09/1978|Masculino|
|  9|Ricar

In [30]:
select_all_male = spark.sql('SELECT * FROM people WHERE Sexo = "Masculino" ORDER BY Nome ASC')
select_all_male.show()

+---+-----------------+-----------------+---------+--------------+-----------+---------+
| ID|             Nome|            Email| Telefone|        Cidade|Aniversario|     Sexo|
+---+-----------------+-----------------+---------+--------------+-----------+---------+
|  3|     Carlos Souza| carlos@email.com|555555555|Belo Horizonte| 10/11/1982|Masculino|
|  1|       João Silva|   joao@email.com|123456789|     São Paulo| 01/01/1990|Masculino|
|  5|      Pedro Costa|  pedro@email.com|111222333|      Brasília| 05/09/1978|Masculino|
|  9|Ricardo Fernandes|ricardo@email.com|222111444|  Porto Alegre| 08/04/1987|Masculino|
|  7|  Thiago Oliveira| thiago@email.com|444333222|      Curitiba| 30/12/1980|Masculino|
+---+-----------------+-----------------+---------+--------------+-----------+---------+



In [31]:
select_all_woman = spark.sql('SELECT * FROM people WHERE Sexo = "Feminino" ORDER BY Nome ASC')
select_all_woman.show()

+---+----------------+------------------+---------+--------------+-----------+--------+
| ID|            Nome|             Email| Telefone|        Cidade|Aniversario|    Sexo|
+---+----------------+------------------+---------+--------------+-----------+--------+
|  4|      Ana Santos|     ana@email.com|999888777|      Salvador| 20/03/1995|Feminino|
|  6|Fernanda Pereira|fernanda@email.com|777888999|        Recife| 25/07/1989|Feminino|
|  8|   Gabriela Lima|gabriela@email.com|666777888|     Fortaleza| 12/06/1992|Feminino|
|  2|  Maria Oliveira|   maria@email.com|987654321|Rio de Janeiro| 15/05/1985|Feminino|
| 10|  Marina Almeida|  marina@email.com|333444555|        Manaus| 18/10/1983|Feminino|
+---+----------------+------------------+---------+--------------+-----------+--------+

