In [None]:
import os
from datetime import datetime, timedelta  # For working with dates
import matplotlib.dates as mdates

import pandas as pd  # For working with DataFrames
from dotenv import load_dotenv
from matplotlib.ticker import FuncFormatter
from openpyxl import load_workbook
from openpyxl.styles import NamedStyle, PatternFill, Border, Side
from openpyxl.utils import get_column_letter
from sqlalchemy import create_engine  # For crea
import calendar
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter

##################### LOADING IMPORTANT DATA ######################
# Load environment variables from the .env file
env_file_path = 'D:/Projects/.env'
load_dotenv(env_file_path)
# Giving output file name
output_file_path = 'ZAKAZ.xlsx'
# Load data from different sheets in 'promotion.xlsx' into DataFrames
promotion_path = r'D:\Projects\promotion.xlsx'
region_df = pd.read_excel(promotion_path, sheet_name='Region')
aksiya_df = pd.read_excel(promotion_path, sheet_name='Aksiya')
paket_df = pd.read_excel(promotion_path, sheet_name='Paket')
types_df = pd.read_excel(promotion_path, sheet_name='TYPES')

##################### ACCESS ENV VARIABLES ######################
db_server = os.getenv("DB_SERVER")
db_database = os.getenv("DB_DATABASE_ASKGLOBAL")
db_user = os.getenv("DB_USER")
db_password = os.getenv("DB_PASSWORD")
db_port = os.getenv("DB_PORT")
db_driver_name = os.getenv("DB_DRIVER_NAME")

##################### PROCEDURE NAME ######################
procedure_name = os.getenv("MONTHLY")  # THIS IS HOURLY DATA GATHERING

##################### DATE - JANUARY ######################
CURRENT_MONTH = datetime.now().month
CURRENT_YEAR = datetime.now().year
_, days_in_month = calendar.monthrange(CURRENT_YEAR, CURRENT_MONTH)

date_begin = datetime(CURRENT_YEAR, CURRENT_MONTH, 1).strftime('%Y%m%d')
date_end = datetime(CURRENT_YEAR, CURRENT_MONTH, days_in_month).strftime('%Y%m%d')
##################### CONNECTION STRING AND SQL QUERY ######################
# Construct the connection string

conn_str = f"mssql+pyodbc://{db_user}:{db_password}@{db_server}:{db_port}/{db_database}?driver={db_driver_name}"
engine = create_engine(conn_str)

sql_query: str = f"""
DECLARE @DateBegin DATE = ?;
DECLARE @DateEnd DATE = ?;

EXEC {procedure_name}
@DataBegin = @DateBegin,
@DataEnd = @DateEnd;
"""

#####################  EXECUTION  ######################
df = pd.read_sql_query(sql_query, engine, params=(date_begin, date_end))

In [None]:
# Filtering basic ones
df['DataEntered'] = pd.to_datetime(df['DataEntered'])
df = df[(df['DataEntered'].dt.month == CURRENT_MONTH) &
        (df['DataEntered'].dt.year == CURRENT_YEAR) &
        df['DocName'].isin(['Оптовая реализация', 'Финансовая скидка']) &
        ~(df['InvoiceManager'].isin(['Бочкарева Альвина'])) &
        (df['OutPrice'] >= 20_000)]
df.info()

In [None]:
result_df = df.copy()

In [None]:
def format_large_numbers(value, *pos):
    if value >= 1e9:  # If the value is in billions
        return f'{value / 1e9:.1f}B'
    elif value >= 1e6:  # If the value is in millions
        return f'{value / 1e6:.1f}M'
    elif value >= 1e3:  # If the value is in thousands
        return f'{value / 1e3:.1f}K'
    else:  # For values less than 1000
        return str(int(value))


def abbreviate_good_name(name, max_words=3):
    words = name.split()
    if len(words) <= max_words:
        return name
    else:
        return ' '.join(words[:max_words]) + '...'



In [None]:
result_df = pd.merge(result_df, region_df[['ClientMan', 'Region']], left_on='ClientManager', right_on='ClientMan',
                     how='left')

result_df['inn_temp'] = pd.to_numeric(result_df['INN'], errors='coerce')
types_df['INN_temp'] = pd.to_numeric(types_df['INN'], errors='coerce')
result_df = pd.merge(result_df, types_df[['INN_temp', 'TYPE', 'RegionType']], left_on='inn_temp', right_on='INN_temp',
                     how='left')

result_df.fillna({'TYPE': 'ROZ'}, inplace=True)

result_df.loc[result_df['TYPE'] == 'ROZ', 'RegionType'] = result_df['Region']

result_df['OXVAT'] = result_df['INN'].map(result_df['INN'].value_counts())

categorical_columns = ['Office', 'DocName', 'GoodId', 'Good', 'Producer', 'INN', 'Client', 'City', 'ClientType',
                       'InvoiceManager', 'ClientManager', 'Store', 'StoreDep', 'DownPayment', 'PaymentTerm', 'Region',
                       'RegionType', 'TYPE']

result_df[categorical_columns] = result_df[categorical_columns].astype('category')
# Assuming df is your DataFrame

In [None]:
columns_to_drop = ([col for col in result_df.columns if col.endswith('_temp')] +
                   ['ClientMan', 'Region', 'YY', 'MM', 'Data',
                    'SerialNo', 'City', 'ClientType', 'Store', 'StoreDep',
                    'InClient', 'Number',
                    'Postavshik'
                    ])
# 
# # Drop the identified columns
result_df.drop(columns=columns_to_drop, inplace=True)

In [None]:
# Group by 'Good' and sum the 'OutKolich' for each good
goods_totals = result_df.groupby('Good', observed=False)['OutKolich'].sum().reset_index()

# Sort goods based on total quantity in descending order
top_goods = goods_totals.sort_values(by='OutKolich', ascending=False).tail(10)

# Group by 'Good' and the date part of 'DataEntered', and sum the 'OutKolich' for each group
daily_totals = result_df[result_df['Good'].isin(top_goods['Good'])].groupby(['Good', result_df['DataEntered'].dt.date],
                                                                            observed=False)[
    'OutKolich'].sum().reset_index()

# Create the figure and axis objects
fig, ax = plt.subplots(figsize=(12, 8))

# Iterate over top 20 goods and plot a line for each
for good, group in daily_totals.groupby('Good', observed=True):
    line = ax.plot(group['DataEntered'], group['OutKolich'], label=good if good in top_goods['Good'].values else "",
                   marker='o')[0]

    # Annotate each point with its value
    for i, (date, quantity) in enumerate(zip(group['DataEntered'], group['OutKolich'])):
        ax.text(date, quantity, f'{format_large_numbers(quantity)}', color=line.get_color(), fontsize=14,
                ha='left' if i % 2 == 0 else 'right', va='bottom')

# ax.xaxis.set_major_formatter(FuncFormatter(lambda x, _: x.strftime('%d-%b')))
plt.xticks(rotation=45, ha='right')  # Rotate x-axis labels for better readability
# Set the x-axis formatter
ax.xaxis.set_major_locator(mdates.DayLocator())  # Specify the locator for days (you may need to import mdates)
ax.xaxis.set_major_formatter(DateFormatter('%d-%b'))  # Use DateFormatter to format dates

# Set the y-axis formatter to the custom function
ax.yaxis.set_major_formatter(FuncFormatter(format_large_numbers))

# Customize other plot properties
ax.set_title('Total Quantity of Top 20 Goods Over 5 Days')
ax.set_xlabel('Date')
ax.set_ylabel('Total Quantity (OutKolich)')
ax.legend()
ax.grid(True)

# Show the plot
plt.show()