# State Tax Information
* Purpose: Be able to display tax information for a given state and year based on user input.
* Input: csv data from multiple files
* Output: Statistics of data requested by user
* Author: Yani Hadzhiev
* Date: November 16, 2023

In [None]:
# Import pandas as alias pd
import pandas as pd
# Import numpy as alias np
import numpy as np

# Read the CSV files
fy2019 = pd.read_csv('FY2019.csv')
fy2020 = pd.read_csv('FY2020.csv')
fy2021 = pd.read_csv('FY2021.csv')
fy2022 = pd.read_csv('FY2022-STC-Detailed-Table.csv')
taxCodes = pd.read_csv('taxitemcodesanddescriptions.csv')

# Add a 'year' column to each dataframe
fy2019['year'] = 2019
fy2020['year'] = 2020
fy2021['year'] = 2021
fy2022['year'] = 2022

# Merge tax codes with data frames
fy2019Merged = pd.merge(fy2019, taxCodes, left_on='item', right_on='Item Code')
fy2020Merged = pd.merge(fy2020, taxCodes, left_on='item', right_on='Item Code')
fy2021Merged = pd.merge(fy2021, taxCodes, left_on='item', right_on='Item Code')
fy2022Merged = pd.merge(fy2022, taxCodes, left_on='item', right_on='Item Code')

# Concatenate data frames vertically
mergedData = pd.concat([fy2019Merged, fy2020Merged, fy2021Merged, fy2022Merged], axis=0, ignore_index=False)
# fill empty space with 0s and set the index to be Description for easier printing later
mergedData = mergedData.fillna(0).set_index('Description')

# Get user input for state abbreviation
stateInput = input("Enter state abbreviation (e.g., NY) to see the tax information: ").upper()
# Input validation if state entered by user is not valid
while stateInput not in mergedData:
    print('Please enter a valid abbreviation')
    stateInput = input("Enter state abbreviation (e.g., NY): ").upper()

# Once state abbreviation is valid, proceed to year
yearInput = input("Enter the year (2019, 2020, 2021, or 2022): ").strip()
# Input validation if year entered by user is not valid
while yearInput not in ('2019', '2020', '2021', '2022'):
    print ('Currently, only the data for 2019, 2020, 2021, and 2022 are available, please enter again: ')
    yearInput = input("Enter the year (2019, 2020, 2021, or 2022): ").strip()
yearInput = int(yearInput)

# Filter data based on user input
# Use masking to filter the year that only the user selected
maskOne = mergedData['year'] == yearInput
# Use masking to filter the state that only the user selected 
# Use notnull() to get opposite of isnull() and be able to display the 0s in the data
maskTwo = mergedData[stateInput].notnull()
# Use masking to filter
finalData = mergedData[(maskOne) & (maskTwo)]

# Filter out what column I need in result with the use of fancy indexing
finalData = finalData.loc[:, [stateInput]]

# Display the result
print(finalData)