# Python Environment To Demonstrate Transform & Load on Oracle DW

#### Important Note: Kindly run the cells one by one. Some cells give output, some don't.

In [None]:
### SET UP THE PYTHON QUERYING ENVIRONMENT ###

## Run this cell only once. Expect no output ##

!pip install cx_Oracle
import cx_Oracle as cx
import pandas as pd
import warnings as wn
wn.filterwarnings('ignore')
conn = cx.connect(user="SYS as SYSDBA", password="myPASS1234", dsn="orcl")
cursor = conn.cursor()

# By running this cell, connection is made from this Python environment to an Oracle database instance already on the machine or network.

### Transform Phase

In [None]:
### EXAMINE THE dw_water_quality STAGING TABLE THAT HAS BEEN PREPARED ###

## You can run this cell more than once, but don't re-run this cell (and subsequent ones) after the LOAD PHASE. Expect a dataframe ##

query = """

SELECT * FROM dw_water_quality;

        """

df = pd.read_sql_query(query, conn)
df

# 2348 records. 7 fields.

In [None]:
### CHECK THE samplesamplingPointlabel COLUMN FOR DISTINCT VALUES ###

## You can run this cell more than once. Expect a dataframe ##

query = """

SELECT DISTINCT samplesamplingPointlabel
    FROM dw_water_quality;
    
    """

df = pd.read_sql_query(query, conn)
df

# 81 distinct locations.

In [None]:
### CHECK THE samplesamplingPointlabel COLUMN FOR NULL VALUES ###

## You can run this cell more than once. Expect an empty dataframe ##

query = """

SELECT * FROM dw_water_quality
    WHERE samplesamplingPointlabel IS NULL;
    
        """

df = pd.read_sql_query(query, conn)
df

# No null values.

In [None]:
### CHECK THE samplesamplingPointlabel COLUMN FOR INCONSISTENT VALUES ###

## You can run this cell more than once. Expect an empty dataframe ##

query = """

SELECT * FROM dw_water_quality
    WHERE samplesamplingPointlabel NOT IN 
        (
        SELECT DISTINCT samplesamplingPointlabel
            FROM dw_water_quality
        );
        
        """

df = pd.read_sql_query(query, conn)
df

# No inconsistent records.

In [None]:
### CHECK THE samplesamplingPointlabel COLUMN FOR OUTLIERS ###

## You can run this cell more than once. Expect a dataframe ##

query = """

SELECT samplesamplingPointlabel, COUNT(samplesamplingPointlabel) cnt
    FROM dw_water_quality
GROUP BY samplesamplingPointlabel 
ORDER BY cnt;

        """

df = pd.read_sql_query(query, conn)
df

# Up to 20 distinct locations appear less than three times.

In [None]:
### CHECK THE samplesampleDateTime COLUMN FOR DISTINCT VALUES ###

## You can run this cell more than once. Expect a dataframe ##

query = """

SELECT DISTINCT samplesampleDateTime
    FROM dw_water_quality;
    
        """

df = pd.read_sql_query(query, conn)
df

# 1077 distinct timestamps.

In [None]:
### CHECK THE samplesampleDateTime COLUMN FOR NULL VALUES ###

## You can run this cell more than once. Expect an empty dataframe ##

query = """

SELECT * FROM dw_water_quality
    WHERE samplesampleDateTime IS NULL;
    
        """

df = pd.read_sql_query(query, conn)
df

# No null records.

In [None]:
### CHECK THE determinandlabel AND determinanddefinition COLUMNS FOR DISTINCT VALUES ###

## You can run this cell more than once. Expect a dataframe ##

query = """

SELECT DISTINCT determinandlabel, determinanddefinition
    FROM dw_water_quality;
    
        """

df = pd.read_sql_query(query, conn)
df

# 173 distinct sensor types.

In [None]:
### CHECK THE determinandlabel AND determinanddefinition COLUMNS FOR NULL VALUES

## You can run this cell more than once. Expect an empty dataframe ##

query = """

SELECT * FROM dw_water_quality
    WHERE determinandlabel IS NULL
    OR determinanddefinition IS NULL;
    
        """

df = pd.read_sql_query(query, conn)
df

# No null records.

In [None]:
### CHECK THE determinandlabel COLUMN FOR INCONSISTENT VALUES ###

## You can run this cell more than once. Expect an empty dataframe ##

query = """

SELECT * FROM dw_water_quality
    WHERE determinandlabel NOT IN 
        (
        SELECT DISTINCT determinandlabel
            FROM dw_water_quality
        );
        
        """

df = pd.read_sql_query(query, conn)
df

# No inconsistent records.

In [None]:
### CHECK THE determinandlabel COLUMN FOR OUTLIERS ###

## You can run this cell more than once. Expect a dataframe ##

query = """

SELECT determinandlabel, COUNT(determinandlabel) cnt
    FROM dw_water_quality
GROUP BY determinandlabel 
ORDER BY cnt;

        """

df = pd.read_sql_query(query, conn)
df

# Up to 95 sensors out of 173 appear less than three times. Click to the left of the dataframe to scroll.

In [None]:
### CHECK THE determinandunitlabel COLUMN FOR DISTINCT VALUES ###

## You can run this cell more than once. Expect a dataframe ##

query = """

SELECT DISTINCT determinandunitlabel
    FROM dw_water_quality;
    
        """

df = pd.read_sql_query(query, conn)
df

# 12 unique units.

In [None]:
### CHECK THE determinandunitlabel COLUMN FOR NULL VALUES ###

## You can run this cell more than once. Expect an empty dataframe ##

query = """

SELECT * FROM dw_water_quality
    WHERE determinandunitlabel IS NULL;
    
        """

df = pd.read_sql_query(query, conn)
df

# No null records.

In [None]:
### CHECK THE determinandunitlabel COLUMN FOR INCONSISTENT VALUES ###

## You can run this cell more than once. Expect an empty dataframe ##

query = """

SELECT * FROM dw_water_quality
    WHERE determinandunitlabel NOT IN 
        (
        SELECT DISTINCT determinandunitlabel
            FROM dw_water_quality
        );
        
        """

df = pd.read_sql_query(query, conn)
df

# No inconsistent records.

In [None]:
### EXAMINE THE determinandunitlabel COLUMN IN THE STAGING TABLE ###

## You can run this cell more than once. Expect a dataframe ##

query = """

SELECT determinandunitlabel, COUNT(determinandunitlabel) cnt
    FROM dw_water_quality
GROUP BY determinandunitlabel
ORDER BY cnt;

        """

df = pd.read_sql_query(query, conn)
df

# 5 out of 12 units appear in less than 50 records.

In [None]:
### CHECK THE determinandunitlabel COLUMN FOR TOTAL OUTLIERS ###

## You can run this cell more than once. Expect a dataframe ##

query = """

SELECT * FROM dw_water_quality
    WHERE determinandunitlabel = 'unitless'
        UNION
SELECT * FROM dw_water_quality
    WHERE determinandunitlabel = 'text'
        UNION
SELECT * FROM dw_water_quality
    WHERE determinandunitlabel = 'coded';
    
        """

df = pd.read_sql_query(query, conn)
df

# These 49 records are to be deleted because they seem not to be important considering 
# their corresponding values in the determinanddefinition column.

In [None]:
### CHECK THE determinanddefinition COLUMN INFO OF determinandunitlabel COLUMN ENTRIES THAT LOOKS ODD ###

## You can run this cell more than once. Expect a dataframe ##

query = """

SELECT DISTINCT s.determinanddefinition FROM
    (
    SELECT * FROM dw_water_quality
        WHERE determinandunitlabel = 'unitless'
            UNION
    SELECT * FROM dw_water_quality
        WHERE determinandunitlabel = 'text'	
            UNION
    SELECT * FROM dw_water_quality
        WHERE determinandunitlabel = 'coded'
    ) s;
    
        """

df = pd.read_sql_query(query, conn)
df

# The result summary below shows the reason why these 49 records seem not to be important.

In [None]:
### CHECK THE result COLUMN FOR DISTINCT VALUES ###

## You can run this cell more than once. Expect a dataframe ##

query = """

SELECT DISTINCT result
    FROM dw_water_quality
ORDER BY result;
        
        """

df = pd.read_sql_query(query, conn)
df

# 1190 unique results.

In [None]:
### CHECK THE result COLUMN FOR NULL VALUES ###

## You can run this cell more than once. Expect an empty dataframe ##

query = """

SELECT * FROM dw_water_quality
    WHERE result IS NULL;
    
        """

df = pd.read_sql_query(query, conn)
df

# No null records.

In [None]:
### CHECK THE result COLUMN FOR INCONSISTENT VALUES ###

## You can run this cell more than once. Expect an empty dataframe ##

query = """

SELECT * FROM dw_water_quality
    WHERE result NOT IN 
        (
        SELECT DISTINCT result
            FROM dw_water_quality
        );
        
        """

df = pd.read_sql_query(query, conn)
df

# No inconsistent records.

In [None]:
### DATA CLEANING: TO REMOVE UNNECESSARY RECORDS ###

## Run this cell only once. Expect no output ##

cursor.execute("""

DELETE FROM dw_water_quality 
    WHERE id IN
        (
        SELECT s.id FROM
            (
            SELECT * FROM dw_water_quality
                WHERE determinandunitlabel = 'unitless'
                    UNION
            SELECT * FROM dw_water_quality
                WHERE determinandunitlabel = 'text'	
                    UNION
            SELECT * FROM dw_water_quality
                WHERE determinandunitlabel = 'coded'
            ) s
        );
        
            """)

conn.commit()

In [None]:
### EXAMINE THE RESIDUAL STAGING TABLE SO FAR ###

## You can run this cell more than once. Expect a dataframe ##

query = """

SELECT * FROM dw_water_quality;

        """

df = pd.read_sql_query(query, conn)
df

# 49 out of 2348 records were deleted. 2299 records remaining.

### Load Phase

In [None]:
### TO CREATE DIMENSION TABLES AND FACT TABLE AND LOAD THEM ###

## Run this cell only once. Expect no output ##


cursor.execute("""

--- To rename the columns ---

ALTER TABLE dw_water_quality 
    RENAME COLUMN samplesamplingPointlabel TO measurementLocation;
    
ALTER TABLE dw_water_quality 
    RENAME COLUMN samplesampleDateTime TO measurementDateTime;

ALTER TABLE dw_water_quality 
    RENAME COLUMN determinandlabel TO sensorType;

ALTER TABLE dw_water_quality 
    RENAME COLUMN determinanddefinition TO sensorTypeDefinition;

ALTER TABLE dw_water_quality 
    RENAME COLUMN result TO measurement;

ALTER TABLE dw_water_quality 
    RENAME COLUMN determinandunitlabel TO measurementUnit;

            """)

conn.commit()



cursor.execute("""

--- To add measurementYear (INTEGER), measurementWeek (INTEGER), and measurementMonth (VARCHAR2) columns ---

ALTER TABLE dw_water_quality ADD (
    measurementYear INTEGER,
    measurementWeek INTEGER,
    measurementMonth VARCHAR2(255)
    );

            """)

conn.commit()



cursor.execute("""

--- To populate the measurementYear column with year data from measurementDateTime column ---

UPDATE dw_water_quality SET measurementYear = EXTRACT(YEAR FROM TO_DATE(SUBSTR(measurementdatetime, 1, 10), 'YYYY-MM-DD')) WHERE ID = ID;
                
--- or simply, UPDATE dw_water_quality SET measurementYear = SUBSTR(measurementdatetime, 6, 2) WHERE ID = ID;


--- To populate the measurementWeek column with week data from measurementDateTime column ---

UPDATE dw_water_quality SET measurementWeek = TO_CHAR(TO_DATE(SUBSTR(measurementdatetime, 1, 10), 'YYYY-MM-DD'), 'IW') WHERE ID = ID;


--- To populate the measurementMonth column with month data from measurementDateTime column ---

UPDATE dw_water_quality SET measurementMonth = TO_CHAR(TO_DATE(SUBSTR(measurementdatetime, 1, 10), 'YYYY-MM-DD'), 'MONTH') WHERE ID = ID;

            """)

conn.commit()



cursor.execute("""

--- To create 7 dimension tables and a fact table ---

DROP TABLE factMeasurementsTable;
DROP TABLE dimLocationTable;
DROP TABLE dimSensorTable;
DROP TABLE dimUnitTable;
DROP TABLE dimTimeTable;
DROP TABLE dimYearTable;
DROP TABLE dimWeekTable;
DROP TABLE dimMonthTable;

CREATE TABLE dimLocationTable (
    locationID INTEGER GENERATED ALWAYS AS IDENTITY,
    measurementLocation VARCHAR2(255) NOT NULL,
    CONSTRAINT pk_locationID PRIMARY KEY (locationID)
    );

CREATE TABLE dimSensorTable (
    sensorID INTEGER GENERATED ALWAYS AS IDENTITY,
    sensorType VARCHAR2(255) NOT NULL,
    sensorTypeDefinition VARCHAR2(255) NOT NULL,
    CONSTRAINT pk_sensorID PRIMARY KEY (sensorID)
    );

CREATE TABLE dimUnitTable (
    unitID INTEGER GENERATED ALWAYS AS IDENTITY,
    measurementUnit VARCHAR2(255) NOT NULL,
    CONSTRAINT pk_unitID PRIMARY KEY (unitID)
    );

CREATE TABLE dimYearTable (
    yearID INTEGER GENERATED ALWAYS AS IDENTITY,
    measurementYear INTEGER NOT NULL,
    CONSTRAINT pk_yearID PRIMARY KEY (yearID)
    );

CREATE TABLE dimWeekTable (
    weekID INTEGER GENERATED ALWAYS AS IDENTITY,
    measurementWeek INTEGER NOT NULL,
    CONSTRAINT pk_weekID PRIMARY KEY (weekID)
    );

CREATE TABLE dimMonthTable (
    monthID INTEGER GENERATED ALWAYS AS IDENTITY,
    measurementMonth VARCHAR2(15) NOT NULL,
    CONSTRAINT pk_monthID PRIMARY KEY (monthID)
    );

CREATE TABLE dimTimeTable (
    dateTimeID INTEGER GENERATED ALWAYS AS IDENTITY,
    measurementDateTime VARCHAR2(255) NOT NULL,
    yearID INTEGER REFERENCES dimYearTable(yearID),
    weekID INTEGER REFERENCES dimWeekTable(weekID),
    monthID INTEGER REFERENCES dimMonthTable(monthID),
    CONSTRAINT pk_dateTimeID PRIMARY KEY (dateTimeID)
    );

CREATE TABLE factMeasurementsTable (
    factID INTEGER GENERATED ALWAYS AS IDENTITY,
    locationID INTEGER REFERENCES dimLocationTable(locationID),
    dateTimeID INTEGER REFERENCES dimTimeTable(dateTimeID),
    sensorID INTEGER REFERENCES dimSensorTable(sensorID),
    unitID INTEGER REFERENCES dimUnitTable(unitID),
    measurement FLOAT NOT NULL,
    CONSTRAINT pk_factID PRIMARY KEY (factID)
    );
  
            """)

conn.commit()



cursor.execute("""

--- To add reference ID columns with INTEGER datatype to the staging table from the 7 dimension tables ---

ALTER TABLE dw_water_quality ADD (
    locationID INTEGER,
    dateTimeID INTEGER,
    sensorID INTEGER,
    unitID INTEGER,
    yearID INTEGER,
    monthID INTEGER,
    weekID INTEGER
    );

            """)

conn.commit()



cursor.execute("""

--- To load each of the 7 dimension tables with data as well as update the staging table with their IDs ---

INSERT INTO dimLocationTable(measurementLocation)
    SELECT DISTINCT measurementLocation 
        FROM dw_water_quality 
    ORDER BY measurementLocation;

MERGE INTO dw_water_quality
USING dimLocationTable
ON (dw_water_quality.measurementLocation = dimLocationTable.measurementLocation)
WHEN MATCHED THEN UPDATE
SET dw_water_quality.locationID = dimLocationTable.locationID;

INSERT INTO dimUnitTable(measurementUnit)
    SELECT DISTINCT measurementUnit 
        FROM dw_water_quality 
    ORDER BY measurementUnit;

MERGE INTO dw_water_quality
USING dimUnitTable
ON (dw_water_quality.measurementUnit = dimUnitTable.measurementUnit)
WHEN MATCHED THEN UPDATE
SET dw_water_quality.unitID = dimUnitTable.unitID;

INSERT INTO dimSensorTable(sensorType, sensorTypeDefinition)
    SELECT DISTINCT sensorType, sensorTypeDefinition 
        FROM dw_water_quality 
    ORDER BY sensorType;

MERGE INTO dw_water_quality
USING dimSensorTable
ON (dw_water_quality.sensorType = dimSensorTable.sensorType)
WHEN MATCHED THEN UPDATE
SET dw_water_quality.sensorID = dimSensorTable.sensorID;

INSERT INTO dimYearTable(measurementYear)
    SELECT DISTINCT measurementYear 
        FROM dw_water_quality 
    ORDER BY measurementYear;

MERGE INTO dw_water_quality
USING dimYearTable
ON (dw_water_quality.measurementYear = dimYearTable.measurementYear)
WHEN MATCHED THEN UPDATE
SET dw_water_quality.yearID = dimYearTable.yearID;

INSERT INTO dimWeekTable(measurementWeek)
    SELECT DISTINCT measurementWeek 
        FROM dw_water_quality 
    ORDER BY measurementWeek;

MERGE INTO dw_water_quality
USING dimWeekTable
ON (dw_water_quality.measurementWeek = dimWeekTable.measurementWeek)
WHEN MATCHED THEN UPDATE
SET dw_water_quality.weekID = dimWeekTable.weekID;

INSERT INTO dimMonthTable(measurementMonth)
    SELECT s.measurementMonth FROM 
        (
        SELECT DISTINCT measurementMonth, 
    TO_CHAR(TO_DATE(SUBSTR(measurementdatetime, 1, 10), 'YYYY-MM-DD'), 'MONTH'),  
    EXTRACT(MONTH FROM TO_DATE(SUBSTR(measurementdatetime, 1, 10), 'YYYY-MM-DD')) c
FROM dw_water_quality) s 
    ORDER BY c ASC;
    
MERGE INTO dw_water_quality
USING dimMonthTable
ON (dw_water_quality.measurementMonth = dimMonthTable.measurementMonth)
WHEN MATCHED THEN UPDATE
SET dw_water_quality.monthID = dimMonthTable.monthID;

INSERT INTO dimTimeTable(measurementDateTime, yearID, weekID, monthID)
    SELECT DISTINCT measurementDateTime, yearID, weekID, monthID 
        FROM dw_water_quality 
    ORDER BY yearID ASC;

MERGE INTO dw_water_quality
USING dimTimeTable
ON (dw_water_quality.measurementDateTime = dimTimeTable.measurementDateTime
    AND dw_water_quality.yearID = dimTimeTable.yearID
    AND dw_water_quality.monthID = dimTimeTable.monthID
    AND dw_water_quality.weekID = dimTimeTable.weekID)
WHEN MATCHED THEN UPDATE
SET dw_water_quality.dateTimeID = dimTimeTable.dateTimeID;

INSERT INTO factMeasurementsTable (
    locationID, dateTimeID, sensorID, unitID, measurement
    ) 
    SELECT 
        l.locationID, t.dateTimeID, s.sensorID, u.unitID, wq.measurement
    FROM dw_water_quality wq
        INNER JOIN dimLocationTable l 
    ON wq.measurementLocation = l.measurementLocation
        INNER JOIN dimSensorTable s 
    ON wq.sensorType = s.sensorType
        INNER JOIN dimUnitTable u 
    ON wq.measurementUnit = u.measurementUnit
        INNER JOIN dimTimeTable t 
    ON wq.measurementDateTime = t.measurementDateTime
        INNER JOIN dimYearTable y 
    ON wq.measurementYear = y.measurementYear
        INNER JOIN dimWeekTable w 
    ON wq.measurementWeek = w.measurementWeek
        INNER JOIN dimMonthTable m 
    ON wq.measurementMonth = m.measurementMonth;

            """)

conn.commit()

##### The journey so far: 7 dimension tables (including the Time table), and the fact table

In [None]:
### THE factMeasurementsTable ###

## You can run this cell more than once. Expect a dataframe ##

query = """

SELECT * FROM factMeasurementsTable;

        """

df = pd.read_sql_query(query, conn)
df

In [None]:
### THE dimLocationTable ###

## You can run this cell more than once. Expect a dataframe ##

query = """

SELECT * FROM dimLocationTable;

        """

df = pd.read_sql_query(query, conn)
df

In [None]:
### THE dimSensorTable ###

## You can run this cell more than once. Expect a dataframe ##

query = """

SELECT * FROM dimSensorTable;

        """

df = pd.read_sql_query(query, conn)
df

In [None]:
### THE dimUnitTable ###

## You can run this cell more than once. Expect a dataframe ##

query = """

SELECT * FROM dimUnitTable;

        """

df = pd.read_sql_query(query, conn)
df

In [None]:
### THE dimTimeTable ###

## You can run this cell more than once. Expect a dataframe ##

query = """

SELECT * FROM dimTimeTable;

        """

df = pd.read_sql_query(query, conn)
df

In [None]:
### THE dimYearTable ###

## You can run this cell more than once. Expect a dataframe ##

query = """

SELECT * FROM dimYearTable;

        """

df = pd.read_sql_query(query, conn)
df

In [None]:
### THE dimWeekTable ###

## You can run this cell more than once. Expect a dataframe ##

query = """

SELECT * FROM dimWeekTable;

        """

df = pd.read_sql_query(query, conn)
df

In [None]:
### THE dimMonthTable ###

## You can run this cell more than once. Expect a dataframe ##

query = """

SELECT * FROM dimMonthTable;

        """

df = pd.read_sql_query(query, conn)
df

### Querying our Data Warehouse Implementation To Get Information

In [None]:
### The list of water sensors measured by type by month ###

## You can run this cell more than once. Expect a dataframe ##

query = """

SELECT 
    s.measurementMonth, s.sensorType, s.numberOfSensors FROM
    (
    SELECT 
        mt.measurementMonth, 
        st.sensorType, 
        COUNT(measurement) numberOfSensors
    FROM 
        dimSensorTable st 
        INNER JOIN factMeasurementsTable fm ON st.sensorID = fm.sensorID
        INNER JOIN dimTimeTable tt ON fm.dateTimeID = tt.dateTimeID
        INNER JOIN dimMonthTable mt ON tt.monthID = mt.monthID
    GROUP BY 
        st.sensorType, mt.measurementMonth 
    ) s 
JOIN dimMonthTable mt ON s.measurementMonth = mt.measurementMonth
ORDER BY 
    mt.monthID, s.sensorType;
    
        """

df = pd.read_sql_query(query, conn)
df

In [None]:
### The number of sensor measurements collected by type by week ###

## You can run this cell more than once. Expect a dataframe ##

query = """

SELECT 
    wt.measurementWeek, 
    st.sensorType, 
    COUNT(measurement) numberOfMeasurements
FROM 
    dimSensorTable st 
    INNER JOIN factMeasurementsTable fm ON st.sensorID = fm.sensorID
    INNER JOIN dimTimeTable tt ON fm.dateTimeID = tt.dateTimeID
    INNER JOIN dimWeekTable wt ON tt.weekID = wt.weekID
GROUP BY 
    st.sensorType, wt.measurementWeek
ORDER BY
    wt.measurementWeek, st.sensorType;
    
        """

df = pd.read_sql_query(query, conn)
df

In [None]:
### The number of measurements made by location by month ###

## You can run this cell more than once. Expect a dataframe ##

query = """

SELECT 
    s.measurementMonth, s.measurementLocation, s.numberOfMeasurements FROM
    (
    SELECT 
        mt.measurementMonth, 
        lt.measurementLocation, 
        COUNT(measurement) numberOfMeasurements
    FROM 
        dimLocationTable lt 
        INNER JOIN factMeasurementsTable fm ON lt.locationID = fm.locationID
        INNER JOIN dimTimeTable tt ON fm.dateTimeID = tt.dateTimeID
        INNER JOIN dimMonthTable mt ON tt.monthID = mt.monthID
    GROUP BY 
        lt.measurementLocation, mt.measurementMonth
    ) s
JOIN dimMonthTable mt ON s.measurementMonth = mt.measurementMonth
ORDER BY 
    mt.monthID, s.measurementLocation;
    
        """

df = pd.read_sql_query(query, conn)
df

In [None]:
### The average number of measurements covered for pH by year ###

## You can run this cell more than once. Expect a dataframe ##

query = """

SELECT 
    yt.measurementYear, 
    COUNT(measurement) numberOfMeasurements
FROM 
    dimSensorTable st 
    INNER JOIN factMeasurementsTable fm ON st.sensorID = fm.sensorID
    INNER JOIN dimTimeTable tt ON fm.dateTimeID = tt.dateTimeID
    INNER JOIN dimYearTable yt ON tt.yearID = yt.yearID
WHERE 
    st.sensorType = 'pH'
GROUP BY 
    yt.measurementYear
ORDER BY 
    yt.measurementYear;
    
        """

df = pd.read_sql_query(query, conn)
df

In [None]:
### The average value of nitrate measurements by locations by year ###

## You can run this cell more than once. Expect a dataframe ##

query = """

SELECT 
    yt.measurementYear,
    lt.measurementLocation,
    ROUND(AVG(measurement), 2) averageValuesOfNitrate
FROM 
    dimLocationTable lt 
    INNER JOIN factMeasurementsTable fm ON lt.locationID = fm.locationID
    INNER JOIN dimSensorTable st ON fm.sensorID = st.sensorID
    INNER JOIN dimTimeTable tt ON fm.dateTimeID = tt.dateTimeID
    INNER JOIN dimYearTable yt ON tt.yearID = yt.yearID
WHERE 
    st.sensorType = 'Nitrate-N'
GROUP BY 
    lt.measurementLocation, yt.measurementYear
ORDER BY 
    yt.measurementYear, lt.measurementLocation;
    
        """

df = pd.read_sql_query(query, conn)
df

In [None]:
### Endeavour to run this cell in order to close the connection to the database.

conn.close()

##### End of demonstration. Thank you for staying with me on this!