
## Overview

Este notebook contém o tratamento da tabela de Empresas Inidôneas e Suspensas, mantida pela CGU. O objetivo é verificar se há gastos no cartão corporativo efetuado com alguma empresa sancionada. Para limitar o escopo, só serão mantidas as Pessoas Jurídicas.

In [0]:
# File location and type
file_location = "/FileStore/tables/CEIS.csv"
file_type = "csv"

# CSV options
infer_schema = "false"
first_row_is_header = "true"
delimiter = ";"
encode = 'latin1'


# The applied options are for CSV files. For other file types, these will be ignored.
df = spark.read.format(file_type) \
  .option("inferSchema", infer_schema) \
  .option("header", first_row_is_header) \
  .option("sep", delimiter) \
  .option("encoding", encode) \
  .load(file_location)

display(df)

In [0]:
df = df.filter(df["TIPO DE PESSOA"] == "J")

In [0]:
# retirar colunas desnecessárias
cols_to_drop = ["CADASTRO", "NOME INFORMADO PELO ÓRGÃO SANCIONADOR", 
                "NOME FANTASIA - CADASTRO RECEITA", "DETALHAMENTO", 
                "DATA DO TRÂNSITO EM JULGADO", "ABRAGÊNCIA DEFINIDA EM DECISÃO JUDICIAL"]
df = df.drop(*cols_to_drop)

In [0]:
new_col_names = ["cadastro", "tipo_pessoa", "cnpj_sanc", 
                 "nome_sanc", "razao_social", "n_processo", 
                 "categ_sancao", "dt_in_san", "dt_fin_san", 
                 "dt_pub", "pub", "org_sancionador", "uf_orgao",
                 "esfera_org", "fund_legal"]

df = df.toDF(*new_col_names)

In [0]:
# Create a view or table

# temp_table_name = "CEIS_csv"

# df.createOrReplaceTempView(temp_table_name)

In [0]:
# With this registered as a temp view, it will only be available to this particular notebook. If you'd like other users to be able to query this table, you can also create a table from the DataFrame.
# Once saved, this table will persist across cluster restarts as well as allow various users across different notebooks to query this data.
# To do so, choose your table name and uncomment the bottom line.

permanent_table_name = "CEIS"
# df.write.format("parquet").saveAsTable(permanent_table_name)
df.write.mode("overwrite").saveAsTable(permanent_table_name)

In [0]:
%sql
select * from ceis limit 10

In [0]:
%sql
select * from fato_cpgf limit 5

In [0]:
%sql
select
  a.razao_social, a.n_processo, a.dt_in_san, a.dt_fin_san, a.categ_sancao, b.dt_trans, b.vlr_trans
from
  ceis a
inner join
  fato_cpgf b
on
  a.cnpj_sanc = b.cnpj_cpf_fav

In [0]:
%sql
select
  a.razao_social, a.n_processo, a.dt_in_san, a.dt_fin_san, a.categ_sancao, b.dt_trans, b.vlr_trans
from
  ceis a
inner join
  fato_cpgf b
on
  a.cnpj_sanc = b.cnpj_cpf_fav
where
  date(b.dt_trans) between date(a.dt_in_san) and date(a.dt_fin_san)