# Code analysis (프로젝트 코드 분석)
## This is for personal study by winterstar67


## Libraries:
1. streamlit
2. pandas_profiling
3. pycaret

### The entire code (전체 코드)

In [None]:
import streamlit as st
import pandas as pd
import os

# Data Profiling 도구 import
import pandas_profiling
from streamlit_pandas_profiling import st_profile_report

# ML 도구들 import
from pycaret.classification import setup, compare_models, pull, save_model


with st.sidebar:
    st.image("https://www.zdnet.com/a/img/resize/28d3f004b94bd200eea41399d2be2fff7505906a/2018/04/13/36c52953-7ab9-4608-a848-71d1d538856e/td-deep-learning.jpg?auto=webp&fit=crop&height=675&width=1200")
    st.title("AutoStreamML")
    choice = st.radio("Navigation", ["Upload","Profiling","ML","Download"])
    st.info("This application allows you to build an automated ML pipline using Streamlit, Pandas Profiling and PyCaret.")

if os.path.exists("sourcedata.csv"):
    df = pd.read_csv("sourcedata.csv", index_col=None)

if choice == "Upload":
    st.title("Upload Your Data for Modelling")
    file = st.file_uploader("Upload Your Dataset")
    if file:
        df = pd.read_csv(file, index_col=None)
        df.to_csv("sourcedata.csv", index=None)
        st.dataframe(df) 
        # do something 
        pass

if choice == "Profiling":
    st.title("Automated Exproratory Data Analysis")
    profile_report =df.profile_report()
    st_profile_report(profile_report)



if choice == "ML":
    st.title("Machine learning go ~~~")
    target = st.selectbox("Select Your Target", df.columns)
    if st.button("Train model"):
        setup(df, target=target)
        setup_df = pull()
        st.info("This is the ML Experiment settings")
        st.dataframe(setup_df)
        best_model = compare_models()
        compare_df = pull()
        st.info("This is the ML model")
        st.dataframe(compare_df)
        best_model
        save_model(best_model, 'best_model')


if choice == "Download":
    with open("best_model.pkl", 'rb') as f:
        st.download_button("Download the Model", f, "trained_model.pkl")
    pass

#### 1. Import part

In [None]:
import streamlit as st # For machine learning web application 
import pandas as pd  
import os  

# Data Profiling tool import  
import pandas_profiling # For automated EDA (but mayby we need to do data analysis by ourselves for real problem)  
from streamlit_pandas_profiling import st_profile_report # For using pandas_profiling on streamlit  

# ML tool import  
from pycaret.classification import setup, compare_models, pull, save_model # For automated Machine learning workflow

- https://mokeya.tistory.com/147
- https://wikidocs.net/47193
- https://cslife.tistory.com/m/entry/Pandas-Profiling-%ED%8C%90%EB%8B%A4%EC%8A%A4-%ED%94%84%EB%A1%9C%ED%8C%8C%EC%9D%BC%EB%A7%81-Streamlit%EC%97%90%EC%84%9C-%EA%B5%AC%ED%98%84%ED%95%98%EA%B8%B0
- https://dsbook.tistory.com/360


#### 2. Basing setting for Web

In [None]:
with st.sidebar: # st.sidebar: create sidebar on the left side in Web
    st.image("https://www.zdnet.com/a/img/resize/28d3f004b94bd200eea41399d2be2fff7505906a/2018/04/13/36c52953-7ab9-4608-a848-71d1d538856e/td-deep-learning.jpg?auto=webp&fit=crop&height=675&width=1200") # image insert
    st.title("AutoStreamML") # set title
    choice = st.radio("Navigation", ["Upload","Profiling","ML","Download"]) # creating radio butten
        # Radio button is a button that only one element can be selected 
        
    st.info("This application allows you to build an automated ML pipline using Streamlit, Pandas Profiling and PyCaret.") # information message, you can check the position of the text by running the code


#### 3. Upload function

In [None]:
if choice == "Upload": # if the button is set on "Upload"
    st.title("Upload Your Data for Modelling") # set title like this
    file = st.file_uploader("Upload Your Dataset") # Display a file uploader widget (files are limited to 200MB). It takes uploaded file
    if file: # if uploaded fiile is not None
        df = pd.read_csv(file, index_col=None) # read this file as csv file and save it to variable "df"
        df.to_csv("sourcedata.csv", index=None) # save this file as a csv file with name "sourcedata.csv"
        st.dataframe(df) # Display a dataframe
        pass

#### 4. Profiling

In [None]:
if choice == "Profiling": # if the button is set on "Profiling"
    st.title("Automated Exproratory Data Analysis") # set tile like this
    profile_report =df.profile_report() # Save the profiling result to "profile_report" , it's a function existed in Dataframe objective
    st_profile_report(profile_report) # Display the pandas_profiling report result on streamlit web (? I'm not sure whether it's correct...)
    

#### 5, ML model training

In [None]:
if choice == "ML": # if the button is set on "ML"
    st.title("Machine learning go ~~~") # set tile like this
    target = st.selectbox("Select Your Target", df.columns) # creating seletbox with title "Select Your Target", and the selecting option is one of the df.columns
    if st.button("Train model"): # creating button whose name is "Train model"
        setup(df, target=target) # This function have to be called before excuting any other function, takes two parapeters "data" and "target"
        setup_df = pull() # This function returns the latest displayed table as dataframe
        st.info("This is the ML Experiment settings")
        st.dataframe(setup_df) # Display a latest displayed table on streamlit Web
        best_model = compare_models() # train and evalutae performance of all estimators in the model using cross validation
        # output of compare_models() is score grid with average cross validated scores
        compare_df = pull()
        st.info("This is the ML model")
        st.dataframe(compare_df)
        best_model
        save_model(best_model, 'best_model') # save trained model object in current working directory as pickle file

#### 6. Model download

In [None]:
if choice == "Download":
    with open("best_model.pkl", 'rb') as f:
        st.download_button("Download the Model", f, "trained_model.pkl") # display a download button widget whose title is "Download the Model", open "best_model.pkl" file and download it as "trained_model.pkl"
        # this button makes the user be able to download a file


`-` streamlit:
- https://zzsza.github.io/mlops/2021/02/07/python-streamlit-dashboard/

`-` pycaret:
- https://pycaret.readthedocs.io/en/stable/api/classification.html#pycaret.classification.compare_models
- https://dsbook.tistory.com/360
- setup function: https://pycaret.gitbook.io/docs/get-started/functions/initialize#setting-up-environment