## Overview

This notebook will show you how to create and query a table or DataFrame that you uploaded to DBFS. [DBFS](https://docs.databricks.com/user-guide/dbfs-databricks-file-system.html) is a Databricks File System that allows you to store data for querying inside of Databricks. This notebook assumes that you have a file already inside of DBFS that you would like to read from.

This notebook is written in **Python** so the default cell type is Python. However, you can use different languages by using the `%LANGUAGE` syntax. Python, Scala, SQL, and R are all supported.

In [0]:
# File location and type
file_location = "/FileStore/tables/employees_earning.csv"
file_type = "csv"

# CSV options
infer_schema = "false"
first_row_is_header = "false"
delimiter = ","

# The applied options are for CSV files. For other file types, these will be ignored.
df = spark.read.format(file_type) \
  .option("inferSchema", infer_schema) \
  .option("header", first_row_is_header) \
  .option("sep", delimiter) \
  .load(file_location)

display(df)

_c0,_c1,_c2,_c3,_c4,_c5,_c6,_c7,_c8,_c9,_c10,_c11,_c12,_c13
"Abadi,Kidani A",,Assessing Department,Property Officer,"$46,291.98",,$300.00,,,,,"$46,591.98",2118,M
"Abasciano,Joseph",,Boston Police Department,Police Officer,"$6,933.66",,$850.00,$205.92,"$74,331.86",,"$15,258.44","$97,579.88",2132,M
"Abban,Christopher John",,Boston Fire Department,Fire Fighter,"$103,442.22",,$550.00,"$15,884.53",,"$4,746.50",,"$124,623.25",2132,M
"Abbasi,Sophia",,Green Academy,Manager (C) (non-ac),"$18,249.83",,,,,,,"$18,249.83",2148,M
"Abbate-Vaughn,Jorgelina",,BPS Ellis Elementary,Teacher,"$84,410.28",,"$1,250.00",,,,,"$85,660.28",2481,M
"Abberton,James P",,Public Works Department,Maint Mech (Carpenter)##,"$41,449.16",,$81.00,"$8,807.47",,,,"$50,337.63",2127,M
"Abbott,Erin Elizabeth",,Hurley K-8,Teacher,"$80,413.68",,,,,,,"$80,413.68",2081,M
"Abbott,John R.",,BPS Snowden International Hi,Teacher,"$99,264.10",,$656.04,,,,,"$99,920.14",2445,M
"Abbruzzese,Angela",,BPS Clap Elementary,Lunch Hour Monitors,"$5,000.90",,,,,,,"$5,000.90",2125,M
"Abbruzzese,Donna",,BPS Clap Elementary,Lunch Hour Monitors,$621.90,,,,,,,$621.90,2125,M


In [0]:
# Create a view or table

temp_table_name = "employees_earning"

df.createOrReplaceTempView(temp_table_name)

In [0]:
%sql

/* Query the created temp table in a SQL cell */

select * from employees_earning;

_c0,_c1,_c2,_c3,_c4,_c5,_c6,_c7,_c8,_c9,_c10,_c11,_c12,_c13
"Abadi,Kidani A",,Assessing Department,Property Officer,"$46,291.98",,$300.00,,,,,"$46,591.98",2118,M
"Abasciano,Joseph",,Boston Police Department,Police Officer,"$6,933.66",,$850.00,$205.92,"$74,331.86",,"$15,258.44","$97,579.88",2132,M
"Abban,Christopher John",,Boston Fire Department,Fire Fighter,"$103,442.22",,$550.00,"$15,884.53",,"$4,746.50",,"$124,623.25",2132,M
"Abbasi,Sophia",,Green Academy,Manager (C) (non-ac),"$18,249.83",,,,,,,"$18,249.83",2148,M
"Abbate-Vaughn,Jorgelina",,BPS Ellis Elementary,Teacher,"$84,410.28",,"$1,250.00",,,,,"$85,660.28",2481,M
"Abberton,James P",,Public Works Department,Maint Mech (Carpenter)##,"$41,449.16",,$81.00,"$8,807.47",,,,"$50,337.63",2127,M
"Abbott,Erin Elizabeth",,Hurley K-8,Teacher,"$80,413.68",,,,,,,"$80,413.68",2081,M
"Abbott,John R.",,BPS Snowden International Hi,Teacher,"$99,264.10",,$656.04,,,,,"$99,920.14",2445,M
"Abbruzzese,Angela",,BPS Clap Elementary,Lunch Hour Monitors,"$5,000.90",,,,,,,"$5,000.90",2125,M
"Abbruzzese,Donna",,BPS Clap Elementary,Lunch Hour Monitors,$621.90,,,,,,,$621.90,2125,M


In [0]:
%sql

select count(_c3) as Gender_F_M
from employees_earning
group by _c13;

Gender_F_M
5844
16202


In [0]:
%sql

select _c2 as Departemnt, _c3 as profession,count(_c13) as Females
from employees_earning
where _c13 = 'F'
group by _c2,_c3;


Departemnt,profession,Females
Cemetery Division,Grave Digger,5
BPS Boston Arts Academy,Community Field Coordinator,2
Boston Fire Department,Chemist,1
Elderly Commission,Dep Commissioner of Operations,1
Environment Department,Commissioner,1
Tech Boston Academy,Paraprofessional,1
Curley K-8,Part-Time Cafeteria Attendant,1
Innovation Department,Managing Partner,1
BPS Clap Elementary,Teacher,6
Public Facilities Department,Clerk of Works (PFD),2


In [0]:
%sql

select _c2 as Departemnt, _c3 as profession,count(_c13) as Males
from employees_earning
where _c13 = 'M'
group by _c2,_c3;

Departemnt,profession,Males
Cemetery Division,Grave Digger,12
Dpt of Innovation & Technology,Data Librarian,1
Curley K-8,Part-Time Cafeteria Attendant,7
BPS Burke High,Library Paraprofessional,2
ASD Office Of Labor Relation,Asst Corp Counsel III,7
BPS Lee Elementary,Prin Clerk/School Sec 19,1
BPS Boston Arts Academy,Community Field Coordinator,4
BPS South Boston HS - Excel,Director (Basas 10B),1
BPS Boston Arts Academy,Coordinator (C),1
Frederick Pilot Middle,Principal Middle,1


In [0]:
%sql

SELECT  _c3 as profession,COUNT(_c12)  as Postal
FROM employees_earning
GROUP BY _c3;




profession,Postal
Fire Captain Administration,9
FF (EMS Coordinator),1
Service Writer,2
Dir of Human Resources (BPD),1
Dog Offr(AnimalControlOffcr)##,6
Supn-Park Maint,6
Supv Of Contracts (PWD),1
Reasearch & Development Anl,2
Admin Anl (AsArchivCity/Clrk),1
Research Services Team Leader,1


In [0]:
display(df)

_c0,_c1,_c2,_c3,_c4,_c5,_c6,_c7,_c8,_c9,_c10,_c11,_c12,_c13
"Abadi,Kidani A",,Assessing Department,Property Officer,"$46,291.98",,$300.00,,,,,"$46,591.98",2118,M
"Abasciano,Joseph",,Boston Police Department,Police Officer,"$6,933.66",,$850.00,$205.92,"$74,331.86",,"$15,258.44","$97,579.88",2132,M
"Abban,Christopher John",,Boston Fire Department,Fire Fighter,"$103,442.22",,$550.00,"$15,884.53",,"$4,746.50",,"$124,623.25",2132,M
"Abbasi,Sophia",,Green Academy,Manager (C) (non-ac),"$18,249.83",,,,,,,"$18,249.83",2148,M
"Abbate-Vaughn,Jorgelina",,BPS Ellis Elementary,Teacher,"$84,410.28",,"$1,250.00",,,,,"$85,660.28",2481,M
"Abberton,James P",,Public Works Department,Maint Mech (Carpenter)##,"$41,449.16",,$81.00,"$8,807.47",,,,"$50,337.63",2127,M
"Abbott,Erin Elizabeth",,Hurley K-8,Teacher,"$80,413.68",,,,,,,"$80,413.68",2081,M
"Abbott,John R.",,BPS Snowden International Hi,Teacher,"$99,264.10",,$656.04,,,,,"$99,920.14",2445,M
"Abbruzzese,Angela",,BPS Clap Elementary,Lunch Hour Monitors,"$5,000.90",,,,,,,"$5,000.90",2125,M
"Abbruzzese,Donna",,BPS Clap Elementary,Lunch Hour Monitors,$621.90,,,,,,,$621.90,2125,M


In [0]:
df.show(3)

In [0]:
my_schema = df.schema

In [0]:
df.createOrReplaceTempView("employees_earning")

In [0]:
sqlWay = spark.sql('''
 
SELECT _c3, count(1) 
FROM employees_earning 
GROUP BY _c3
 
 
''')



In [0]:
dataFrameWay = df\
              .groupBy("_c3")\
              .count()

In [0]:
spark.sql("SELECT max(_c12) from employees_earning Group by _c3").take(3)

In [0]:
import pyspark.sql.functions as spark_func
import pyspark.sql.types as spark_types
 
df.select(spark_func.max("_c12")).show(1)

In [0]:
import pyspark.sql.functions as F

In [0]:
df.show(5,truncate=False)

In [0]:


df.groupBy('_c3').agg(F.count('_c13').alias('Gender')).limit(5)\
.show()
 
  

In [0]:


df.groupBy('_c13').agg(F.count('_c2').alias('Gender')).limit(5)\
.show()
 
  

In [0]:
df.groupBy('_c3').agg(F.count('_c13')).limit(10)\
.where(F.col('_c13')=='F').show()

#flightData2015.groupBy('DEST_COUNTRY_NAME').agg(F.sum('count').alias('sum')).orderBy(F.col('sum').desc()).limit(5)\
#.where(F.col('sum')<9000).show()
