# Data Science with Data Mining 

##### Data mining is the process of finding useful patterns and information in large amounts of data. It's like looking for hidden treasures in a big pile of information.

#### Python tools for libraries for Data Mining

In [19]:
# Pandas is a powerful and widely used Python library for data manipulation and analysis.
# Scikit-learn is a popular Python library for machine learning.

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

# Load the Iris dataset
data = load_iris()
X = data.data  # Features
y = data.target  # Labels

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train the model
model = RandomForestClassifier(n_estimators=100)  # Create the RandomForestClassifier
model.fit(X_train, y_train)  # Train the model

# Make predictions
y_pred = model.predict(X_test)

# Print predictions
print("Predictions:",y_test, y_pred)




Predictions: [1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 1 2 1 1 2 0 2 0 2 2 2 2 2 0 0 0 0 1 0 0 2 1
 0 0 0 2 1 1 0 0] [1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 1 2 1 1 2 0 2 0 2 2 2 2 2 0 0 0 0 1 0 0 2 1
 0 0 0 2 1 1 0 0]


## Calculate metrics to evaluate the model

In [21]:
from sklearn.metrics import accuracy_score, classification_report

# Accuracy score
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Detailed classification report
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 1.0
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      1.00      1.00        13
           2       1.00      1.00      1.00        13

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45



In [24]:
# PyCaret - is an open source machine learning library that simplifies the process of building models

In [None]:
# Importing necessary libraries
from pycaret.datasets import get_data
from pycaret.classification import *
import pycaret

# Load the dataset (e.g., Iris dataset)
data = get_data('iris')

# Initialize the setup for PyCaret
setup(data=data, target='species')

# Compare models and select the best one
best_model = compare_models()

# Create the model (e.g., Random Forest)
rf_model = create_model('rf')

# Train the model
rf_model_trained = finalize_model(rf_model)

# Make predictions
predictions = predict_model(rf_model_trained)

# Evaluate the model
evaluate_model(rf_model_trained)

### Crime Classification using PyCaret

In [38]:
# pip install pycaret
# location , time_of_day, day_of_week, crime_type

import pandas as pd

# Example dataset (replace with your actual data)
data = {
    'location': ['Downtown', 'Suburb', 'Downtown', 'Suburb', 'City Center', 'Downtown'],
    'time_of_day': ['Morning', 'Evening', 'Afternoon', 'Morning', 'Evening', 'Afternoon'],
    'day_of_week': ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday'],
    'crime_type': ['Theft', 'Assault', 'Vandalism', 'Theft', 'Assault', 'Vandalism']
}

df = pd.DataFrame(data)


In [None]:
# setup() - prepares the data and performs necessary preprocessing like encoding categorical variables, 
# handling missing values, and scaling features

from pycaret.classification import setup

#classification
setup(data=df, target='crime_type', session_id=123)

In [None]:
# compare_models - function automatically trains and evaluates multiple models using cross-validation 
from pycaret.classification import compare_models

# Compare different models
best_model = compare_models()

# based the performances of machine learning models(like Logistic Regression, Random Forest)
# It returns the best-performing model based on accuracy or another relevant metric

In [None]:
# creating and train the best model

from pycaret.classification import create_model

# Create the best model (e.g., Random Forest)
model = create_model('rf')  # 'rf' stands for Random Forest


In [None]:
from pycaret.classification import evaluate_model

# Evaluate the model
evaluate_model(model)


In [None]:
from pycaret.classification import predict_model

# Example of new data for prediction
new_data = pd.DataFrame({
    'location': ['Downtown'],
    'time_of_day': ['Morning'],
    'day_of_week': ['Monday']
})

# Predict crime type
predictions = predict_model(model, data=new_data)
print(predictions)


In [None]:
from pycaret.classification import save_model

# Save the trained model
save_model(model, 'crime_prediction_model')


##### Summary

In [46]:
#By using PyCaret, you can quickly build and evaluate machine learning models to solve problems in public security

####SciPy

In [None]:
#SciPy is an open-source Python library used for scientific and technical computing