# PI Tag Pulls - UNDER DEVELOPMENT
This workbook is used to pull data and create an output table for the desired tags in either a csv or excel format.  Reason for the program:
* Process engineers prefer Excel Workbooks,
* Excel connection to PI is slow,
* Excel crashes with Big data pulls.
* Current methods use Alteryx with an output that is slow and not formatted very well.

 Process
 1. The desired PI tags are first listed in an Excel workbook,
 2. The program will extract the PI tags and use the PiConnect to connect to PI AF,
 3. Data is extracted and placed into a flat file format
 4. Data is output into another excel workbook/worksheet or csv file.  

Created by: TW  
Created on: 2025-05-07  
Environment: python_20240807

## Libraries

In [12]:

#connect to PI Server
import  PIconnect as PI
from  PIconnect.PIConsts import SummaryType

#manage time
import pytz
from datetime import datetime

#stardard libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

#other libraries
import re
import pathlib
import xlsxwriter
import openpyxl

## Definitions

#### Extract PI Tags from Excel
Convert an Excel spreadsheet that has the tag name and PI tag in to a dict. Note that process engineers love their lists of stuff in Excel spreadsheets.  Not efficient but that is easiest solution for them.

In [None]:
### Add logging and unit tests
###Add output of removed records
def fn_PI_tags_from_excel(file_name: str ):
    """This function pulls PI_tags that are listed in an Excel Spreadsheet into a dataframe.  Excel spreadsheet should have 2 columns with the first column as the "Tag Name" and second column as the "Tag".

    Args:
        file_name (str): the name of hte excel workbook with the PI tags listed.  If it is not in the same directory, then the this is the file path for the Excel workbook.
    """
    # Check for excel format
    try:
        df_PI_tags = pd.read_excel(file_name, sheet_name="data_dictionary")
    except:
        raise TypeError("fn_PI_tags_from_excel: This must be an Excel file.  Check file name and path.")

    #clean PI tags
    df_PI_tags_cln = df_PI_tags.dropna(axis=0, how="any")
    df_PI_tags_cln = df_PI_tags_cln.drop_duplicates(keep="first")

    #create a df of removed records
    df_PI_tags_removed = df_PI_tags.merge(df_PI_tags_cln, on=['Tag Name', 'Tag', 'Summary Type'], how="outer", indicator= True)

    df_PI_tags_removed = df_PI_tags_removed[df_PI_tags_removed['_merge'] != 'both']

    df_PI_tags_removed = df_PI_tags_removed.drop(columns=['_merge'], axis=1)

    #Reset Indexes
    df_PI_tags_cln = df_PI_tags_cln.reset_index(drop=1)
    df_PI_tags_removed = df_PI_tags_removed.reset_index(drop=1)

    return df_PI_tags_cln, df_PI_tags_removed



#### Generate dataframe with requested data

In [3]:
###  Needs logging and unit testing
###### add sum_type in second row or onto tag name
######## verify function  types

def fn_create_results_dataframe(df: pd.DataFrame,
                           start_time: str | datetime = "-30d",
                           end_time: str | datetime = "00:00:00",
                           interval: str = "15m"):
                           #sum_type: str = 'AVERAGE' #sum type from spreadsheet instead of input

    """This function takes in a dataframe that contains a column with a 'tag name' and a second column with the 'tag'.  It creates a new dataframe that where the tag name becomes the name of the columns.  The tag is the path to the PI attribute which is used to look up the requested data.

    Args:
        df (pd.DataFrame): a dataframe that contain the 'tag name' in the first column and the (PI) 'tag' in the second column.

        start_time ([str, datetime], optional): The start time for the data collection. Uses PI System Time abbreviations. Defaults to "-30d".

        end_time ([str, datetime], optional): The end time for the data collection. Uses PI System Time abbreviations. Defaults to "00:00".

        interval (str, optional): The  time interval between each record. Uses PI System Time abbreviations. Defaults to "15m".

        sum_type (str, optional): The PI SummaryTypes. Some common values are: "AVERAGE", "COUNT", "MAXIMUM", "MINIMUM", "RANGE", "STD_DEV", and "TOTAL". Defaults to 'AVERAGE'.

    Returns:
        pd.DataFrame: A dataframe that contains columns of data.  The column headers are the tag names provided and the data in each column is based upon the function parameter request.

    Notes:
        index is listed as local Denver timezone
    """

    # # verify parameters

    #### HASHED Sum_type as it is taken from  Excel Spreadsheet
    # lst_sum_type = ["ALL", "ALL_FOR_NON_NUMERIC", "AVERAGE", "COUNT", "MAXIMUM", "MINIMUM", 'NONE', "PERCENT_GOOD", "POP_STD_DEV", "RANGE", "STD_DEV", "TOTAL", "TOTAL_WITH_UOM"]

    # try:
    #     lst_sum_type.index(sum_type)
    # except:
    #     raise TypeError("SummaryType is not from provided list.")

    #create result dataframe
    df_results = pd.DataFrame()

    #iterate through the tags
    for i in df.index:
        lbl = df.iloc[i,0] #tag names to pd column titles
        tag = str(df.iloc[i,1]) # make ta a string for python readability
        tag_split = tag.rsplit(sep="|") #split the PI element and attribute
        sum_type = df.iloc[i,2]

        #collect the data from PI and adds to results dataframe
        tag_data = db.descendant(tag_split[-2]).attributes[tag_split[-1]].summaries(start_time, end_time, interval, SummaryType.AVERAGE)

        df_results[lbl] = tag_data

    #Adjust to Denver timezone
    df_results = df_results.tz_convert('America/Denver')
    df_results.index = df_results.index.strftime('%Y-%m-%d %H:%M:%S')

    return df_results




#### Create Excel or CSV Output

In [None]:
#clean me up
#### Needs logging and unit testing
#### needs parameter checking
# need to have parameters addeded and not hard coded
def fn_create_output_table(df: pd.DataFrame, file_name: str, output_type: str = "excel"):
    """This function takes in a dataframe and creates an output file.  The output file can be a csv or an excel workbook.

    Args:
        df (pd.DataFrame): A dataframe that contains the data to be output.

        output_type (str, optional): The type of output file.  Options are "csv" or "excel". Defaults to "excel".

        file_name (str, optional): The name of the output file.  If it is not in the same directory, then the this is the file path for the output file. Defaults to "PI_data.csv".

    Returns:
        None: This function does not return anything.
    """
    #check for valid output type
    if output_type == "csv":
        df.to_csv(file_name, index=True)
    elif output_type == "excel":
        with pd.ExcelWriter(file_name,
                            engine='openpyxl',
                            mode='a',
                            if_sheet_exists="replace") as writer: #fix if sheet_exists
            df.to_excel(writer, sheet_name='PI_data_output', index=True) #fix sheet name hard coding
    else:
        raise TypeError("fn_create_output: Output type must be 'csv' or 'excel'.")

## Main function

In [18]:
#Needs to be functionalized,
#needs output to excel or a csv.
#needs unit testing and logging.

#inputs
filename = "PI_tag_list.xlsx"

# make connection to the PI database
with PI.PIAFDatabase() as db:
    print(f"Connected to {db.server_name}")

#crate data frame of PI_tags
df_PI_tags = fn_PI_tags_from_excel(file_name=filename)
df_PI_tags_removed = df_PI_tags[1]
df_PI_tags_cln = df_PI_tags[0]

# PI request and result table creation
df_results = fn_create_results_dataframe(df=df_PI_tags_cln)

#save results to excel/csv
fn_create_output_table(df=df_results, file_name="PI_data.xlsx", output_type="excel")





Connected to APPLEPI_AF


In [19]:
df_results


Unnamed: 0,SSE Total Phosphorus,SSE Soluble Phosphorus,SSE pH
2025-04-07 00:00:00,0.32,0.08,7.2
2025-04-07 00:15:00,0.32,0.08,7.2
2025-04-07 00:30:00,0.32,0.08,7.2
2025-04-07 00:45:00,0.32,0.08,7.2
2025-04-07 01:00:00,0.32,0.08,7.2
...,...,...,...
2025-05-06 22:45:00,0.19,0.08,7.1
2025-05-06 23:00:00,0.19,0.08,7.1
2025-05-06 23:15:00,0.19,0.08,7.1
2025-05-06 23:30:00,0.19,0.08,7.1
