In [4]:
#import libraries
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
import pandas as pd
import numpy as np   
import matplotlib.pyplot as plt

#load the dataset from the csv file using pandas
data = pd.read_csv('creditcard.csv')

data = data.sample(frac=0.1, random_state = 1)


# Only use the 'Amount' and 'V1', ..., 'V28' features
features = ['Amount'] + ['V%d' % number for number in range(1, 29)]

# The target variable which we would like to predict, is the 'Class' variable
target = 'Class'

# Create an X variable (containing the features) and an y variable (containing only the target variable)
X = data[features]
y = data[target]

def normalize(X):
    """
    Make the distribution of the values of each variable similar by subtracting the mean and by dividing by the standard deviation.
    """
    for feature in X.columns:
        X[feature] -= X[feature].mean()
        X[feature] /= X[feature].std()
    return X

# Define the model
model = LogisticRegression()

# Define the splitter for splitting the data in a train set and a test set
splitter = StratifiedShuffleSplit(n_splits=1, test_size=0.5, random_state=0)

# Loop through the splits (only one)
for train_indices, test_indices in splitter.split(X, y):
    # Select the train and test data
    X_train, y_train = X.iloc[train_indices], y.iloc[train_indices]
    X_test, y_test = X.iloc[test_indices], y.iloc[test_indices]
    
    # Normalize the data
    X_train = normalize(X_train)
    X_test = normalize(X_test)
    
    # Fit and predict!
    model.fit(X_train, y_train)
    
    y_pred = model.predict(X_test)
    
    n_errors = (y_pred != y_test).sum()
    
    # And finally: show the result
    
    print('{}:{}'.format("Logistic Regression", n_errors))
    
    print(accuracy_score(y_test, y_pred))
    print(classification_report(y_test, y_pred))



    

#class field = 0 when not fraudulent
#class field = 1 when fraudulent
#iloc = index location

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Logistic Regression:15
0.9989467031809564
             precision    recall  f1-score   support

          0       1.00      1.00      1.00     14216
          1       0.81      0.52      0.63        25

avg / total       1.00      1.00      1.00     14241

