In [1]:
from time import sleep

print("This is my file to demonstrate best practices.")

def process_data(data):
    print("Beginning data processing...")
    modified_data = data + " that has been modified"
    sleep(3)
    print("Data processing finished.")
    return modified_data

def main():
    data = "My data read from the Web"
    print(data)
    modified_data = process_data(data)
    print(modified_data)

if __name__ == "__main__":
    main()

This is my file to demonstrate best practices.
My data read from the Web
Beginning data processing...
Data processing finished.
My data read from the Web that has been modified


In [2]:
from time import sleep

print("This is my file to demonstrate best practices.")

def process_data(data):
    print("Beginning data processing...")
    modified_data = data + " that has been modified"
    sleep(3)
    print("Data processing finished.")
    return modified_data

def main():
    data = "My data read from the Web"
    print(data)
    modified_data = process_data(data)
    print(modified_data)

This is my file to demonstrate best practices.


To help understand how this code will execute, you should first understand how the Python interpreter sets __name__ depending on how the code is being executed.

There are two primary ways that you can instruct the Python interpreter to execute or use code:

You can execute the Python file as a script using the command line.
You can import the code from one Python file into another file or into the interactive interpreter.
You can read a lot more about these approaches in How to Run Your Python Scripts. No matter which way of running your code you’re using, Python defines a special variable called __name__ that contains a string whose value depends on how the code is being used.

In [3]:
print("This is my file to test Python's execution methods.")
print("The variable __name__ tells me which context this file is running in.")
print("The value of __name__ is:", repr(__name__))

This is my file to test Python's execution methods.
The variable __name__ tells me which context this file is running in.
The value of __name__ is: '__main__'


In Python, repr() displays the printable representation of an object. This example uses repr() to emphasize that the value of __name__ is a string. You can read more about repr() in the Python documentation.

In [5]:
import execution_methods

ModuleNotFoundError: No module named 'execution_methods'

When the Python interpreter imports code, the value of __name__ is set to be the same as the name of the module that is being imported. You can see this in the third line of output above. __name__ has the value 'execution_methods', which is the name of the .py file that Python is importing from.

In [20]:
import sys

def load_data():
    
    import re
    import numpy as np
    import pandas as pd
    from sqlalchemy import create_engine
    engine = create_engine('sqlite:///Disaster_data.db')
    df = pd.read_sql("SELECT * FROM Disaster_data", engine)

    df = df.head(50)
    category_columns = ['related', 'request', 'offer', 'aid_related', 'medical_help', 'medical_products', 'search_and_rescue', 'security', 'military', 'child_alone', 'water', 'food', 'shelter', 'clothing', 'money', 'missing_people', 'refugees', 'death', 'other_aid', 'infrastructure_related', 'transport', 'buildings', 'electricity', 'tools', 'hospitals', 'shops', 'aid_centers', 'other_infrastructure', 'weather_related', 'floods', 'storm', 'fire', 'earthquake', 'cold', 'other_weather', 'direct_report']
    X = df['message']
    Y = df[category_columns]

    return X,Y

def tokenize(text):
    
    from nltk.tokenize import word_tokenize
    from nltk.stem import WordNetLemmatizer
    from nltk.corpus import stopwords

    # Remove punctuation characters
    import re
    text = re.sub(r"[^a-zA-Z0-9]", " ", text) 
    
    # tokenize
    raw_tokens = word_tokenize(text)
    
    # remove stop words
    tokens = [w for w in raw_tokens if w not in stopwords.words("english")]
    
    # lemmatize and normalize
    lemmatizer = WordNetLemmatizer()
    clean_tokens = []
    for tok in tokens:
        clean_tok = lemmatizer.lemmatize(tok).lower().strip()
        clean_tokens.append(clean_tok)

    return clean_tokens


def build_model():
    
    from sklearn.metrics import confusion_matrix
    from sklearn.model_selection import RandomizedSearchCV
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.multioutput import MultiOutputClassifier
    from sklearn.model_selection import train_test_split
    from sklearn.pipeline import Pipeline, FeatureUnion
    from sklearn.base import BaseEstimator, TransformerMixin
    from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer

    pipeline = Pipeline([
        ('vect', CountVectorizer(tokenizer=tokenize)),
        ('tfidf', TfidfTransformer()),
        ('clf', MultiOutputClassifier(RandomForestClassifier()))
    ])
    
    parameters = {'clf__estimator__n_estimators': [20, 30, 40],
    'clf__estimator__min_samples_split': [2,5,8]}

    model = RandomizedSearchCV(pipeline, param_distributions=parameters, n_jobs=4, verbose=2)

    return model
    

def evaluate_model():
    
    #Train/test split
    X, y = load_data()
    X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.2)
    
    #Train Model and 
    model = build_model()
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    from sklearn.metrics import classification_report
    print(classification_report(y_test, y_pred,target_names = category_columns))


In [22]:
load_data()

OperationalError: (sqlite3.OperationalError) no such table: Disaster_data
[SQL: SELECT * FROM Disaster_data]
(Background on this error at: http://sqlalche.me/e/e3q8)