# Data Collection
This noteboook is responsible for autonomously collecting Twitch follower, concurrent view, and total view data.

## Imports

In [20]:
#SQL connection and queries
import MySQLdb as mdb
#Connection failure exiting
import sys
#Getting current time (after data is scraped) for stream table
from time import gmtime, strftime

## File data
Below the database credentials are read in. These are used for logging into the database, creating the necessary tables (if not already created), and storing data.

In [10]:
db_user = ""
db_pass = ""
db_name = ""
db_host = "localhost"
with open("database_credentials.txt") as f:
    db_user = f.readline().strip()
    db_pass = f.readline().strip()
    db_name = f.readline().strip()
#print(db_user, db_pass, db_name)

## Database
### Versioning
Below the database is accessed using the specified username, password, and database name above. Note if the below code errors out, the rest of the database operations in this file will not produce expeted results (most likely will flat out not work).

In [12]:
#Connect
def mdb_get_version():
    """
    Connects to the database above using the obtained credentials.
    """
    con = None
    try:
        con = mdb.connect(host=db_host, user=db_user, passwd=db_pass, db=db_name)
        cur = con.cursor()
        cur.execute("SELECT VERSION()")
        data = cur.fetchone()
        print("Database version: {}".format(data))
    except mdb.Error, e:
        print("Error {}: {}".format(e.args[0], e.args[1]))
        sys.exit(1)
    finally:
        if con:
            con.close()
mdb_get_version()

Database version: ('5.7.21-log',)


### Table Creation
The below functions create the necessary tables for data storage if they do not already exist.

In [35]:
def create_tables():
    """
    Creates the necessary tables (if not already created) for Twitch data storage.
    """
    con = mdb.connect(host=db_host, user=db_user, passwd=db_pass, db=db_name)
    
    with con:
        cur = con.cursor()
        
        #Twitch game/creative/irl categories
        sql = """
        CREATE TABLE IF NOT EXISTS categories (
        id INT PRIMARY KEY AUTO_INCREMENT,
        category VARCHAR(255) NOT NULL UNIQUE
        )
        """
        cur.execute(sql)
        
        #Twitch streamers
        sql = """
        CREATE TABLE IF NOT EXISTS streamers (
        id INT PRIMARY KEY AUTO_INCREMENT,
        streamer VARCHAR(255) NOT NULL UNIQUE
        )
        """
        cur.execute(sql)
        
        #Individual stream
        """
        It's important to note here that any game, like Overwatch, will be streamed by multiple streamers.
        Similarly, a streamer may stream multiple games, or even stream Overwatch at different times.
        The goal of the learning agents will be to see how streaming variables, like stream time, affect
        concurrent viewership, if at all.
        """
        sql = """
        CREATE TABLE IF NOT EXISTS streams (
        id INT PRIMARY KEY AUTO_INCREMENT,
        current_viewers INT NOT NULL DEFAULT -1,
        created_at DATETIME NOT NULL DEFAULT current_timestamp,
        followers INT NOT NULL DEFAULT -1,
        total_views INT NOT NULL DEFAULT -1,
        streamer_id INT NOT NULL DEFAULT -1,
        FOREIGN KEY (streamer_id)
            REFERENCES streamers(id)
            ON DELETE CASCADE,
        category_id INT NOT NULL DEFAULT -1,
        FOREIGN KEY (category_id)
            REFERENCES categories(id)
            ON DELETE CASCADE
        )
        """
        cur.execute(sql)
create_tables()



In [None]:

#Tables map
tables = {}

#Streamers
"""
    primary autoincrement int key
    twitch username
    current viewers
    **foreign key category id
    current time
    total followers
    total views
"""
tables["streamers"] = """
    CREATE TABLE streamers()
"""

#Categories
"""
    primary autoincrement int key
    category name
"""
tables["categories"] = """
    CREATE TABLE categories()
"""

In [19]:
from time import localtime, strftime
strftime("%Y-%m-%d %H:%M:%S", localtime())

'2018-02-21 15:55:23'