In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from xgboost import XGBRegressor
from sklearn.impute import KNNImputer
from sklearn.preprocessing import MinMaxScaler, StandardScaler, OrdinalEncoder, LabelEncoder, RobustScaler
from sklearn.feature_selection import mutual_info_regression
from sklearn.decomposition import PCA
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import VotingRegressor
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from matplotlib.colors import Normalize
from catboost import CatBoostRegressor
import plotly.graph_objects as go

# Load Data
df = pd.read_csv("C:/Users/coimb/Documents/EBAC/csv_data_dol_og.csv", sep=";")  # Load dataset
df.columns = df.columns.map(str)  # Ensure column names are strings
df = df.reset_index(drop=True)  # Reset index

# Convert Data Column
df["Data"] = pd.to_datetime(df["Data"], format="%d/%m/%Y %H:%M:%S.%f")
df["Valor"] = pd.to_numeric(df["Valor"].str.replace(".", "").str.replace(",", "."), errors='coerce')  # Convert "Valor" column to numeric

# Debug: Check Data
df.info()

# Set datetime index
df.set_index("Data", inplace=True)

# Resample Data to 1-minute Intervals and Calculate OHLC
ohlc = df["Valor"].resample("15T").ohlc()

# Drop NaN values
ohlc.dropna(inplace=True)

# Flatten MultiIndex Columns
ohlc.columns = ['open', 'high', 'low', 'close']

# Debug: Print Data After Resampling
print("Data After Resampling:\n", ohlc.head())

# Plot Candlestick Chart
fig = go.Figure(data=[go.Candlestick(
    x=ohlc.index,
    open=ohlc['open'],
    high=ohlc['high'],
    low=ohlc['low'],
    close=ohlc['close'],
    
)])

# Customize Layout
fig.update_layout(
    title="Candlestick Chart - Plotly",
    xaxis_title="Time",
    yaxis_title="Price",
    xaxis_rangeslider_visible=False,
    width=1024,
    height=768
)

# Show Plot
fig.show()



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29002 entries, 0 to 29001
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   Data        29002 non-null  datetime64[ns]
 1   Compradora  29002 non-null  object        
 2   Valor       29002 non-null  float64       
 3   Quantidade  29002 non-null  float64       
 4   Vendedora   29002 non-null  object        
 5   Agressor    29002 non-null  object        
dtypes: datetime64[ns](1), float64(2), object(3)
memory usage: 1.3+ MB
Data After Resampling:
                        open    high     low   close
Data                                               
2024-02-06 09:00:00  4989.0  4989.5  4982.0  4984.0
2024-02-06 09:15:00  4984.5  4988.0  4983.0  4987.5
2024-02-06 09:30:00  4987.0  4988.0  4981.5  4985.0
2024-02-06 09:45:00  4986.0  4987.0  4982.5  4984.0
2024-02-06 10:00:00  4983.5  4985.0  4976.5  4978.0


In [76]:
print((ohlc.low[10] >ohlc.high[12]))

True



Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`



In [85]:

for i in range(0, len(ohlc)-1):
    if i<len(ohlc)-2:
        print(ohlc.low.iloc[i]  > ohlc.high.iloc[i+2],"fgv down")
        print(ohlc.high.iloc[i] < ohlc.low.iloc[i+2],"fgv up")

False fgv down
False fgv up
False fgv down
False fgv up
False fgv down
False fgv up
True fgv down
False fgv up
False fgv down
False fgv up
False fgv down
False fgv up
False fgv down
False fgv up
False fgv down
False fgv up
False fgv down
False fgv up
True fgv down
False fgv up
True fgv down
False fgv up
False fgv down
False fgv up
False fgv down
False fgv up
False fgv down
False fgv up
False fgv down
False fgv up
False fgv down
False fgv up
False fgv down
False fgv up
False fgv down
False fgv up
False fgv down
False fgv up
False fgv down
False fgv up
False fgv down
False fgv up
False fgv down
False fgv up
False fgv down
False fgv up
False fgv down
False fgv up
False fgv down
True fgv up
False fgv down
False fgv up
False fgv down
False fgv up
False fgv down
False fgv up
False fgv down
False fgv up
False fgv down
False fgv up
False fgv down
False fgv up
False fgv down
False fgv up
False fgv down
False fgv up
False fgv down
False fgv up
False fgv down
False fgv up
False fgv down
False fgv

df was divided in boxes of 15 mins, what if wanted to use the pure scale of the original data