In [1]:
import pandas as pd
import numpy as np
import re
import math

In [2]:
MSAD_PERIODS = [3, 5, 10, 25, 50]

In [3]:
df = pd.read_excel('../src/stock_data.xlsx', sheet_name='Si')

In [6]:
def prepare_columns(df: pd.DataFrame) -> pd.DataFrame:
    '''
    Подготовка датасета. Переименование столбцов.
    '''

    df = df.rename(columns={c: re.sub("[<>]", "", c) for c in df.columns})
    df["DATE"] = pd.to_datetime(df["DATE"], format="%y%m%d")
    df["TIME"] = pd.to_datetime(df["TIME"], format="%H%M%S").dt.time
    df["DATETIME"] = pd.to_datetime(
        df["DATE"].astype(str) + ' ' + df["TIME"].astype(str)
    )

    df.drop(
        [
            "DATE",
            "TIME",
            "DATETIME_KEY",
            "TICKER",
            "PER"
        ],
        axis=1,
        inplace=True
    )

    for col in [
        "OPEN",
        "CLOSE",
        "HIGH",
        "LOW",
        "VOL"
    ]:
        df[col] = df[col].astype('float32')

    return df

def calc_candle_color(df: pd.DataFrame) -> pd.DataFrame:
    '''
    Функция рассчитывает цвет свечи.
    - Close > Open - зелёный
    - Close < Open - красный
    Если Close == Open, то цвет определяется по последней цветной свече.
    '''

    df["CANDLE_COLOR"] = np.where(
        df["CLOSE"] > df["OPEN"],
        "green",
        np.where(
            df["CLOSE"] < df["OPEN"],
            "red",
            None
        )
    )
    df = df.ffill()
    return df


def add_pivot(df: pd.DataFrame) -> pd.DataFrame:
    '''
    Рассчитывает кривую Pivot ((High + Low + Close) / 3).
    '''

    df['PIVOT'] = (df['HIGH'] + df['LOW'] + df['CLOSE']) / 3
    return df


def add_movings(df: pd.DataFrame) -> pd.DataFrame:
    '''
    Добавляет скользящие средние к датафрейму.
    Скользящие средние считаются по среднему и стандартному отклонению.
    '''

    for per in MSAD_PERIODS:
        df[f'MSAD_{per}_mean'] = df['PIVOT'].rolling(per).mean()
        df[f'MSAD_{per}_std'] = df['PIVOT'].rolling(per).std()
        df[f'VOL_MA_{per}_mean'] = df['VOL'].rolling(per).mean()
        df[f'VOL_MA_{per}_std'] = df['VOL'].rolling(per).std()

    return df

def add_fractals(df: pd.DataFrame) -> pd.DataFrame:
    '''
    Функция считает фракталы Up и Down.
    Условия фрактала Up, вар.1:
        - Candle[1] High > Candle[0] High
        - Candle[1] High > Candle[2] High
    Условия фрактала Up, вар.2:
        Candle[1] High == Candle[0] High
        Candle[2] High < Candle[1] High
        Candle[2] Close < Candle[2] Open (красная свеча).
    Условия фрактала Down, вар.1:
        Candle[1] Low < Candle[0] Low
        Candle[1] Low < Candle[2] Low
    Условия фрактала Down, вар.2:
        Candle[1] Low == Candle[0] Low
        Candle[1] Low < Candle[2] Low
        Candle[2] Close > Candle[2] Open (зелёная свеча).
    '''

    df["IS_FRACTAL_DOWN"] = np.where(
        (
            df["LOW"] < df.shift()["LOW"] and
            df["LOW"] < df.shift(-1)["LOW"]
        ) or
        (
            df["LOW"] == df.shift()["LOW"] and
            df["LOW"] < df.shift(-1)["LOW"] and
            df.shift(-1)["CANDLE_COLOR"] == "green"
        ),
        1,
        0
    )
    df["IS_FRACTAL_UP"] = np.where(
        (
            df["HIGH"] > df.shift()["HIGH"] and
            df["HIGH"] > df.shift(-1)["HIGH"]
        ) or
        (
            df["HIGH"] == df.shift()["HIGH"] and
            df["HIGH"] > df.shift(-1)["HIGH"] and
            df.shift(-1)["CANDLE_COLOR"] == "red"
        )
    )
    return df

def calc_end_correction(df: pd.DataFrame) -> pd.DataFrame:
    '''
    Функция рассчитывает конечную коррекцию в абсолютных значениях
    и в %. Верхняя тень - для зелёных свечей, нижняя тень - для красных свечей.
    Если Open == Close (дожи) - тогда конечная коррекция
    определяется цветом предыдущей свечи.
    '''

    df["END_CORRECTION"] = np.where(
        df["CANDLE_COLOR"] == "green",
        df["HIGH"] - df["CLOSE"],
        df["CLOSE"] - df["LOW"]
    )
    df["END_CORRECTION_PERC"] = (df["END_CORRECTION"] /
                                 math.abs(df["CLOSE"] - df["OPEN"]))
    return df


In [7]:
d1 = prepare_columns(df)
d1.head()

Unnamed: 0,OPEN,HIGH,LOW,CLOSE,VOL,DATETIME
0,70105.0,70500.0,70105.0,70378.0,17203.0,2023-01-03 09:00:00
1,70383.0,70392.0,70205.0,70286.0,5659.0,2023-01-03 09:05:00
2,70282.0,70360.0,70163.0,70296.0,4963.0,2023-01-03 09:10:00
3,70298.0,70350.0,70233.0,70257.0,3186.0,2023-01-03 09:15:00
4,70258.0,70313.0,70100.0,70122.0,4688.0,2023-01-03 09:20:00


In [8]:
d1 = calc_candle_color(d1)

In [20]:
d1 = add_pivot(d1)

In [21]:
d1 = add_movings(d1)

In [24]:
d1

Unnamed: 0,OPEN,HIGH,LOW,CLOSE,VOL,DATETIME,CANDLE_COLOR,PIVOT,MSAD_3_mean,MSAD_3_std,...,VOL_MA_10_mean,VOL_MA_10_std,MSAD_25_mean,MSAD_25_std,VOL_MA_25_mean,VOL_MA_25_std,MSAD_50_mean,MSAD_50_std,VOL_MA_50_mean,VOL_MA_50_std
0,70105.0,70500.0,70105.0,70378.0,17203.0,2023-01-03 09:00:00,green,70327.664062,,,...,,,,,,,,,,
1,70383.0,70392.0,70205.0,70286.0,5659.0,2023-01-03 09:05:00,red,70294.335938,,,...,,,,,,,,,,
2,70282.0,70360.0,70163.0,70296.0,4963.0,2023-01-03 09:10:00,green,70273.000000,70298.333333,27.550396,...,,,,,,,,,,
3,70298.0,70350.0,70233.0,70257.0,3186.0,2023-01-03 09:15:00,red,70280.000000,70282.445312,10.876131,...,,,,,,,,,,
4,70258.0,70313.0,70100.0,70122.0,4688.0,2023-01-03 09:20:00,red,70178.335938,70243.778646,56.783017,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
65538,87101.0,87119.0,87101.0,87119.0,153.0,2024-06-28 23:25:00,green,87113.000000,87103.445312,8.301519,...,184.2,91.200390,87185.973437,61.318723,277.28,211.961184,87198.759844,53.424954,404.16,385.335918
65539,87118.0,87120.0,87100.0,87119.0,485.0,2024-06-28 23:30:00,green,87113.000000,87108.000000,8.660254,...,209.3,131.891412,87179.586875,60.218288,293.12,212.101257,87197.106563,54.784468,383.40,350.186190
65540,87120.0,87121.0,87100.0,87121.0,144.0,2024-06-28 23:35:00,green,87114.000000,87113.333333,0.577350,...,203.6,133.511714,87173.906875,59.401853,288.24,214.144865,87195.466563,56.031433,358.06,318.738996
65541,87121.0,87160.0,87109.0,87124.0,342.0,2024-06-28 23:40:00,green,87131.000000,87119.333333,10.115994,...,205.0,135.025923,87169.093438,57.717861,289.48,214.371780,87193.993281,56.748527,340.08,292.164466


In [37]:
d2 = d1.copy()
# d2 = d2.shift(-1)

In [39]:
d2.shift(-1)["PIVOT"]

0        70294.335938
1        70273.000000
2        70280.000000
3        70178.335938
4        70119.335938
             ...     
65538    87113.000000
65539    87114.000000
65540    87131.000000
65541    87138.664062
65542             NaN
Name: PIVOT, Length: 65543, dtype: float32

In [15]:
d1.iloc[149]

OPEN                        71940.0
HIGH                        71960.0
LOW                         71938.0
CLOSE                       71941.0
VOL                           427.0
DATETIME        2023-01-03 21:45:00
CANDLE_COLOR                  green
Name: 149, dtype: object

In [16]:
d1.loc[d1["OPEN"] > d1["CLOSE"]]

Unnamed: 0,OPEN,HIGH,LOW,CLOSE,VOL,DATETIME,CANDLE_COLOR
1,70383.0,70392.0,70205.0,70286.0,5659.0,2023-01-03 09:05:00,red
3,70298.0,70350.0,70233.0,70257.0,3186.0,2023-01-03 09:15:00,red
4,70258.0,70313.0,70100.0,70122.0,4688.0,2023-01-03 09:20:00,red
5,70120.0,70178.0,70085.0,70095.0,4355.0,2023-01-03 09:25:00,red
8,70330.0,70370.0,70192.0,70231.0,3348.0,2023-01-03 09:40:00,red
...,...,...,...,...,...,...,...
65530,87151.0,87154.0,87120.0,87120.0,201.0,2024-06-28 22:45:00,red
65531,87118.0,87148.0,87113.0,87117.0,328.0,2024-06-28 22:50:00,red
65534,87124.0,87125.0,87110.0,87110.0,77.0,2024-06-28 23:05:00,red
65535,87111.0,87117.0,87095.0,87095.0,319.0,2024-06-28 23:10:00,red


In [None]:
df = add_pivot(df)
df = add_movings(df)
df = add_fractals(df)
df = calc_end_correction(df)

In [None]:
df.shape

In [None]:
df.head(10)

In [None]:
df['END_CORRECTION'].value_counts()

In [48]:
def check(x: str) -> str:
    '''
    Checks card validity.
    '''

    try:
        if "-" in x:
            x1 = x.split("-")
            res = ""
            for i in x1:
                if len(i) != 4:
                    return "Invalid"
                res += i
        else:
            res = int(x)
    except Exception:
        return "Invalid"

    res = str(res)
    x0 = res[0] not in (4, 5, 6)
    x_len = len(res) == 16

    if not x0 or not x_len:
        return "Invalid"
    
    # check numbers in row
    cnt = 1
    for i in range(1, x_len):
        if res[i] == res[i-1]:
            cnt += 1
        else:
            cnt = 1
        if cnt >= 4:
            return "Invalid"
    return "Valid"

    
    

print("Test 1 ", check('4253625879615786') == 'Valid')
print("Test 2 ", check('4424424424442444') == 'Valid')
print("Test 3 ", check('5122-2368-7954-3214') == 'Valid')

print("Test 4 ", check('42536258796157867') == 'Invalid')      #17 digits in card number → Invalid 
print("Test 5 ", check('4424444424442444')   == 'Invalid')     #Consecutive digits are repeating 4 or more times → Invalid
print("Test 6 ", check('5122-2368-7954 - 3214') == 'Invalid')  #Separators other than '-' are used → Invalid
print("Test 7 ", check('44244x4424442444') == 'Invalid')       #Contains non digit characters → Invalid
print("Test 8 ", check('0525362587961578')  == 'Invalid')      #Doesn't start with 4, 5 or 6 → Invalid


print("Test 9 ", check('4123456789123456') == 'Valid')
print("Test 10 ", check('5123-4567-8912-3456') == 'Valid') 
print("Test 11 ", check('61234-567-8912-3456') == 'Invalid')  #Invalid, because the card number is not divided into equal groups of 4.
print("Test 12 ", check('4123356789123456') == 'Valid')
print("Test 13 ", check('5133-3367-8912-3456') == 'Invalid')  #Invalid, consecutive digits 3333 is repeating  times.
print("Test 14 ", check('5123 - 3567 - 8912 - 3456') == 'Invalid')  #Invalid, because space '  ' and - are used as separators.

Test 1  True
Test 2  True
Test 3  True
Test 4  True
Test 5  False
Test 6  True
Test 7  True
Test 8  True
Test 9  True
Test 10  True
Test 11  True
Test 12  True
Test 13  False
Test 14  True
