In [2]:
import glob
import json
import pandas as pd
import numpy as np
import time
import joblib
from sklearn.metrics import classification_report, accuracy_score
from flask import Flask, request, jsonify
import threading
import sys
import nest_asyncio
from werkzeug.serving import run_simple
from flask_cors import CORS
from fastai.tabular.all import df_shrink

In [4]:
def load_constant(file):
    
    print(f"Loading file: {file}...")
    with open(f"commons/{file}", "r") as f:
        features = json.load(f)
        
    return features
    
def pre_process(file, df):
    print(f"Processing file: {file}...")
    print(f"\tDimensions before process: {df.shape}")
    
    print(f"\tStrip columns name...")
    df.columns = df.columns.str.strip()
    
    print(f"\tRename columns...")
    df.rename(columns=mapper_features, inplace=True)

    # backup origin dataframe
    origin_df = df.copy()
    
    print(f"\tDrop columns...")
    df.drop(columns=drop_features, inplace=True)
    
    print(f"\tReplace 'infinity value' by 'nan'...")
    df.replace(to_replace=[np.inf, -np.inf], value=np.nan, inplace=True)
    
    # print(f"\tDrop rows having 'nan' value...")
    # print(f"\t...has been droping {df.isna().any(axis=1).sum()} rows")
    # df.dropna(inplace=True)
    
    # print(f"\tDrop duplicate rows...")
    # print(f"\t...has been droping {df.duplicated().sum()} rows...")
    # df.drop_duplicates(inplace=True)

    # print(f"\tReset index...")
    # df.reset_index(inplace=True, drop=True)

    print(f"\tShrink data-frame type...")
    df = df_shrink(df)
    print(f"\tDimensions after process: {df.shape}")

    return df, origin_df

def predict_process(origin_df, df, file, model):
    print(f"Dataframe belong to file: {file}")
    print(f"Start predicting...")
    X = df.drop(columns='Label')
    y_pred = model.predict(X)

    arr = np.array(y_pred)
    unique_values, counts = np.unique(arr, return_counts=True)
    for value, count in zip(unique_values, counts):
        print(f'{value}: {count}')

    print(f"Saving result...")
    origin_df['Label'] = y_pred
    origin_df.to_csv(f"predict_result/{file.split('\\')[-1]}", index=False)

    result = df.to_dict(orient='records')
    with open(f"commons/data/{file.split('\\')[-1].replace('.csv', '.json')}", 'w') as json_file:
        json.dump(result, json_file, indent=4)
    
    return origin_df

def load_model(file):
    print(f"Loading model from {file}...")
    model = joblib.load(file, mmap_mode='r')
    
    return model

In [None]:
app = Flask(__name__)
CORS(app)

mapper_features = load_constant("mapper_for_predicting.json")
drop_features = load_constant("drop_for_predicting.json")
model = load_model('modelset/model-100-20.joblib')

@app.route('/api/files', methods=['GET'])
def list_files():
    try:
        files = glob.glob('share/*.csv')
        return jsonify({"files": files}), 200
    except Exception as e:
        return jsonify({"error": str(e)}), 500
    
@app.route('/api/predict', methods=['POST'])
def predict():
    try:
        data = request.json
        file_path = data.get('filepath')
        print(file_path)
        file = glob.glob(f"{file_path}")[0]
        print(file)
        df, origin_df = pre_process(file, pd.read_csv(file))
        DATA = predict_process(origin_df, df, file, model)

        return jsonify({'message':'successfully'})
    except Exception as e:
        return jsonify({"error": str(e)}), 500

@app.route('/api/result', methods=['POST'])
def read_excel_file():
    try:
        req_body = request.get_json()
        file_path = req_body.get('filepath')
        page = int(req_body.get('page', 1))
        limit = int(req_body.get('limit', 10))
        feature_name = req_body.get('featureName')
        feature_value = req_body.get('featureValue')
        label = req_body.get('label')

        df = pd.read_csv(f"predict_result/{file_path.split('\\')[-1]}")
        
        filtered_df = df
        if feature_name and feature_value:
            if feature_name in df.columns:
                filtered_df = filtered_df[filtered_df[feature_name].astype(str).str.contains(feature_value, case=False, na=False)]
            else:
                return jsonify({'error': f"Feature '{feature_name}' not found in data"}), 400

        if label:
            filtered_df = filtered_df[filtered_df['Label'] == label]
    
        # Pagination
        total_items = len(filtered_df)
        start = (page - 1) * limit
        end = start + limit
        paginated_df = filtered_df[start:end]

        print(f"Start: {start}")
        print(f"End: {end}")
        # print(paginated_df)
        result = paginated_df.to_dict(orient='records')
    
        # Return JSON response
        return jsonify({
            'page': page,
            'limit': limit,
            'totalItems': total_items,
            'totalPages': (total_items + limit - 1) // limit,
            'data': result
        })

    
    except Exception as e:
        return jsonify({"error": str(e)}), 500

@app.route('/api/statistic', methods=['POST'])
def get_counts():
    try:
        req_body = request.get_json()
        file_path = req_body.get('filepath')
        counts = {}
        features = ['Source IP', 'Source Port', 'Destination IP', 'Destination Port', 'Protocol', 'Label', 'Timestamp']
        df = pd.read_csv(f"predict_result/{file_path.split('\\')[-1]}")
        
        for feature in features:
            counts[feature] = df[feature].value_counts().to_dict()
            
        return jsonify(counts)
        
    except Exception as e:
        return jsonify({"error": str(e)}), 500

run_simple('localhost', 5000, app)

Loading file: mapper_for_predicting.json...
Loading file: drop_for_predicting.json...
Loading model from modelset/model-100-20.joblib...


 * Running on http://localhost:5000
Press CTRL+C to quit
