From 13cdc2f42bfd15397f52e184804da2484893c52b Mon Sep 17 00:00:00 2001
From: Karen Copeland <karen@copelandpa.com>
Date: Wed, 19 Jun 2019 18:23:59 +0200
Subject: [PATCH] Machine Learning Exercise Edits

---
 chierici_practical_part1.ipynb | 1858 ++++++++++++++++++++++++++++++--
 chierici_practical_part2.ipynb |  758 ++++++++++++-
 2 files changed, 2497 insertions(+), 119 deletions(-)
diff --git a/chierici_practical_part1.ipynb b/chierici_practical_part1.ipynb
index 1a9668c..856f034 100644
--- a/chierici_practical_part1.ipynb
+++ b/chierici_practical_part1.ipynb
@@ -55,7 +55,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {
     "colab": {},
     "colab_type": "code",
@@ -73,9 +73,322 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "    <div class=\"bk-root\">\n",
+       "        <a href=\"https://bokeh.pydata.org\" target=\"_blank\" class=\"bk-logo bk-logo-small bk-logo-notebook\"></a>\n",
+       "        <span id=\"1001\">Loading BokehJS ...</span>\n",
+       "    </div>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/javascript": [
+       "\n",
+       "(function(root) {\n",
+       "  function now() {\n",
+       "    return new Date();\n",
+       "  }\n",
+       "\n",
+       "  var force = true;\n",
+       "\n",
+       "  if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n",
+       "    root._bokeh_onload_callbacks = [];\n",
+       "    root._bokeh_is_loading = undefined;\n",
+       "  }\n",
+       "\n",
+       "  var JS_MIME_TYPE = 'application/javascript';\n",
+       "  var HTML_MIME_TYPE = 'text/html';\n",
+       "  var EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n",
+       "  var CLASS_NAME = 'output_bokeh rendered_html';\n",
+       "\n",
+       "  /**\n",
+       "   * Render data to the DOM node\n",
+       "   */\n",
+       "  function render(props, node) {\n",
+       "    var script = document.createElement(\"script\");\n",
+       "    node.appendChild(script);\n",
+       "  }\n",
+       "\n",
+       "  /**\n",
+       "   * Handle when an output is cleared or removed\n",
+       "   */\n",
+       "  function handleClearOutput(event, handle) {\n",
+       "    var cell = handle.cell;\n",
+       "\n",
+       "    var id = cell.output_area._bokeh_element_id;\n",
+       "    var server_id = cell.output_area._bokeh_server_id;\n",
+       "    // Clean up Bokeh references\n",
+       "    if (id != null && id in Bokeh.index) {\n",
+       "      Bokeh.index[id].model.document.clear();\n",
+       "      delete Bokeh.index[id];\n",
+       "    }\n",
+       "\n",
+       "    if (server_id !== undefined) {\n",
+       "      // Clean up Bokeh references\n",
+       "      var cmd = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n",
+       "      cell.notebook.kernel.execute(cmd, {\n",
+       "        iopub: {\n",
+       "          output: function(msg) {\n",
+       "            var id = msg.content.text.trim();\n",
+       "            if (id in Bokeh.index) {\n",
+       "              Bokeh.index[id].model.document.clear();\n",
+       "              delete Bokeh.index[id];\n",
+       "            }\n",
+       "          }\n",
+       "        }\n",
+       "      });\n",
+       "      // Destroy server and session\n",
+       "      var cmd = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n",
+       "      cell.notebook.kernel.execute(cmd);\n",
+       "    }\n",
+       "  }\n",
+       "\n",
+       "  /**\n",
+       "   * Handle when a new output is added\n",
+       "   */\n",
+       "  function handleAddOutput(event, handle) {\n",
+       "    var output_area = handle.output_area;\n",
+       "    var output = handle.output;\n",
+       "\n",
+       "    // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n",
+       "    if ((output.output_type != \"display_data\") || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n",
+       "      return\n",
+       "    }\n",
+       "\n",
+       "    var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n",
+       "\n",
+       "    if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n",
+       "      toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n",
+       "      // store reference to embed id on output_area\n",
+       "      output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n",
+       "    }\n",
+       "    if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n",
+       "      var bk_div = document.createElement(\"div\");\n",
+       "      bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n",
+       "      var script_attrs = bk_div.children[0].attributes;\n",
+       "      for (var i = 0; i < script_attrs.length; i++) {\n",
+       "        toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n",
+       "      }\n",
+       "      // store reference to server id on output_area\n",
+       "      output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n",
+       "    }\n",
+       "  }\n",
+       "\n",
+       "  function register_renderer(events, OutputArea) {\n",
+       "\n",
+       "    function append_mime(data, metadata, element) {\n",
+       "      // create a DOM node to render to\n",
+       "      var toinsert = this.create_output_subarea(\n",
+       "        metadata,\n",
+       "        CLASS_NAME,\n",
+       "        EXEC_MIME_TYPE\n",
+       "      );\n",
+       "      this.keyboard_manager.register_events(toinsert);\n",
+       "      // Render to node\n",
+       "      var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n",
+       "      render(props, toinsert[toinsert.length - 1]);\n",
+       "      element.append(toinsert);\n",
+       "      return toinsert\n",
+       "    }\n",
+       "\n",
+       "    /* Handle when an output is cleared or removed */\n",
+       "    events.on('clear_output.CodeCell', handleClearOutput);\n",
+       "    events.on('delete.Cell', handleClearOutput);\n",
+       "\n",
+       "    /* Handle when a new output is added */\n",
+       "    events.on('output_added.OutputArea', handleAddOutput);\n",
+       "\n",
+       "    /**\n",
+       "     * Register the mime type and append_mime function with output_area\n",
+       "     */\n",
+       "    OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n",
+       "      /* Is output safe? */\n",
+       "      safe: true,\n",
+       "      /* Index of renderer in `output_area.display_order` */\n",
+       "      index: 0\n",
+       "    });\n",
+       "  }\n",
+       "\n",
+       "  // register the mime type if in Jupyter Notebook environment and previously unregistered\n",
+       "  if (root.Jupyter !== undefined) {\n",
+       "    var events = require('base/js/events');\n",
+       "    var OutputArea = require('notebook/js/outputarea').OutputArea;\n",
+       "\n",
+       "    if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n",
+       "      register_renderer(events, OutputArea);\n",
+       "    }\n",
+       "  }\n",
+       "\n",
+       "  \n",
+       "  if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n",
+       "    root._bokeh_timeout = Date.now() + 5000;\n",
+       "    root._bokeh_failed_load = false;\n",
+       "  }\n",
+       "\n",
+       "  var NB_LOAD_WARNING = {'data': {'text/html':\n",
+       "     \"<div style='background-color: #fdd'>\\n\"+\n",
+       "     \"<p>\\n\"+\n",
+       "     \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n",
+       "     \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n",
+       "     \"</p>\\n\"+\n",
+       "     \"<ul>\\n\"+\n",
+       "     \"<li>re-rerun `output_notebook()` to attempt to load from CDN again, or</li>\\n\"+\n",
+       "     \"<li>use INLINE resources instead, as so:</li>\\n\"+\n",
+       "     \"</ul>\\n\"+\n",
+       "     \"<code>\\n\"+\n",
+       "     \"from bokeh.resources import INLINE\\n\"+\n",
+       "     \"output_notebook(resources=INLINE)\\n\"+\n",
+       "     \"</code>\\n\"+\n",
+       "     \"</div>\"}};\n",
+       "\n",
+       "  function display_loaded() {\n",
+       "    var el = document.getElementById(\"1001\");\n",
+       "    if (el != null) {\n",
+       "      el.textContent = \"BokehJS is loading...\";\n",
+       "    }\n",
+       "    if (root.Bokeh !== undefined) {\n",
+       "      if (el != null) {\n",
+       "        el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n",
+       "      }\n",
+       "    } else if (Date.now() < root._bokeh_timeout) {\n",
+       "      setTimeout(display_loaded, 100)\n",
+       "    }\n",
+       "  }\n",
+       "\n",
+       "\n",
+       "  function run_callbacks() {\n",
+       "    try {\n",
+       "      root._bokeh_onload_callbacks.forEach(function(callback) {\n",
+       "        if (callback != null)\n",
+       "          callback();\n",
+       "      });\n",
+       "    } finally {\n",
+       "      delete root._bokeh_onload_callbacks\n",
+       "    }\n",
+       "    console.debug(\"Bokeh: all callbacks have finished\");\n",
+       "  }\n",
+       "\n",
+       "  function load_libs(css_urls, js_urls, callback) {\n",
+       "    if (css_urls == null) css_urls = [];\n",
+       "    if (js_urls == null) js_urls = [];\n",
+       "\n",
+       "    root._bokeh_onload_callbacks.push(callback);\n",
+       "    if (root._bokeh_is_loading > 0) {\n",
+       "      console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n",
+       "      return null;\n",
+       "    }\n",
+       "    if (js_urls == null || js_urls.length === 0) {\n",
+       "      run_callbacks();\n",
+       "      return null;\n",
+       "    }\n",
+       "    console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n",
+       "    root._bokeh_is_loading = css_urls.length + js_urls.length;\n",
+       "\n",
+       "    function on_load() {\n",
+       "      root._bokeh_is_loading--;\n",
+       "      if (root._bokeh_is_loading === 0) {\n",
+       "        console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n",
+       "        run_callbacks()\n",
+       "      }\n",
+       "    }\n",
+       "\n",
+       "    function on_error() {\n",
+       "      console.error(\"failed to load \" + url);\n",
+       "    }\n",
+       "\n",
+       "    for (var i = 0; i < css_urls.length; i++) {\n",
+       "      var url = css_urls[i];\n",
+       "      const element = document.createElement(\"link\");\n",
+       "      element.onload = on_load;\n",
+       "      element.onerror = on_error;\n",
+       "      element.rel = \"stylesheet\";\n",
+       "      element.type = \"text/css\";\n",
+       "      element.href = url;\n",
+       "      console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n",
+       "      document.body.appendChild(element);\n",
+       "    }\n",
+       "\n",
+       "    for (var i = 0; i < js_urls.length; i++) {\n",
+       "      var url = js_urls[i];\n",
+       "      var element = document.createElement('script');\n",
+       "      element.onload = on_load;\n",
+       "      element.onerror = on_error;\n",
+       "      element.async = false;\n",
+       "      element.src = url;\n",
+       "      console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n",
+       "      document.head.appendChild(element);\n",
+       "    }\n",
+       "  };var element = document.getElementById(\"1001\");\n",
+       "  if (element == null) {\n",
+       "    console.error(\"Bokeh: ERROR: autoload.js configured with elementid '1001' but no matching script tag was found. \")\n",
+       "    return false;\n",
+       "  }\n",
+       "\n",
+       "  function inject_raw_css(css) {\n",
+       "    const element = document.createElement(\"style\");\n",
+       "    element.appendChild(document.createTextNode(css));\n",
+       "    document.body.appendChild(element);\n",
+       "  }\n",
+       "\n",
+       "  var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.2.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.2.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.2.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.2.0.min.js\"];\n",
+       "  var css_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.2.0.min.css\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.2.0.min.css\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.2.0.min.css\"];\n",
+       "\n",
+       "  var inline_js = [\n",
+       "    function(Bokeh) {\n",
+       "      Bokeh.set_log_level(\"info\");\n",
+       "    },\n",
+       "    \n",
+       "    function(Bokeh) {\n",
+       "      \n",
+       "    },\n",
+       "    function(Bokeh) {} // ensure no trailing comma for IE\n",
+       "  ];\n",
+       "\n",
+       "  function run_inline_js() {\n",
+       "    \n",
+       "    if ((root.Bokeh !== undefined) || (force === true)) {\n",
+       "      for (var i = 0; i < inline_js.length; i++) {\n",
+       "        inline_js[i].call(root, root.Bokeh);\n",
+       "      }if (force === true) {\n",
+       "        display_loaded();\n",
+       "      }} else if (Date.now() < root._bokeh_timeout) {\n",
+       "      setTimeout(run_inline_js, 100);\n",
+       "    } else if (!root._bokeh_failed_load) {\n",
+       "      console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n",
+       "      root._bokeh_failed_load = true;\n",
+       "    } else if (force !== true) {\n",
+       "      var cell = $(document.getElementById(\"1001\")).parents('.cell').data().cell;\n",
+       "      cell.output_area.append_execute_result(NB_LOAD_WARNING)\n",
+       "    }\n",
+       "\n",
+       "  }\n",
+       "\n",
+       "  if (root._bokeh_is_loading === 0) {\n",
+       "    console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n",
+       "    run_inline_js();\n",
+       "  } else {\n",
+       "    load_libs(css_urls, js_urls, function() {\n",
+       "      console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n",
+       "      run_inline_js();\n",
+       "    });\n",
+       "  }\n",
+       "}(window));"
+      ],
+      "application/vnd.bokehjs_load.v0+json": "\n(function(root) {\n  function now() {\n    return new Date();\n  }\n\n  var force = true;\n\n  if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n    root._bokeh_onload_callbacks = [];\n    root._bokeh_is_loading = undefined;\n  }\n\n  \n\n  \n  if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n    root._bokeh_timeout = Date.now() + 5000;\n    root._bokeh_failed_load = false;\n  }\n\n  var NB_LOAD_WARNING = {'data': {'text/html':\n     \"<div style='background-color: #fdd'>\\n\"+\n     \"<p>\\n\"+\n     \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n     \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n     \"</p>\\n\"+\n     \"<ul>\\n\"+\n     \"<li>re-rerun `output_notebook()` to attempt to load from CDN again, or</li>\\n\"+\n     \"<li>use INLINE resources instead, as so:</li>\\n\"+\n     \"</ul>\\n\"+\n     \"<code>\\n\"+\n     \"from bokeh.resources import INLINE\\n\"+\n     \"output_notebook(resources=INLINE)\\n\"+\n     \"</code>\\n\"+\n     \"</div>\"}};\n\n  function display_loaded() {\n    var el = document.getElementById(\"1001\");\n    if (el != null) {\n      el.textContent = \"BokehJS is loading...\";\n    }\n    if (root.Bokeh !== undefined) {\n      if (el != null) {\n        el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n      }\n    } else if (Date.now() < root._bokeh_timeout) {\n      setTimeout(display_loaded, 100)\n    }\n  }\n\n\n  function run_callbacks() {\n    try {\n      root._bokeh_onload_callbacks.forEach(function(callback) {\n        if (callback != null)\n          callback();\n      });\n    } finally {\n      delete root._bokeh_onload_callbacks\n    }\n    console.debug(\"Bokeh: all callbacks have finished\");\n  }\n\n  function load_libs(css_urls, js_urls, callback) {\n    if (css_urls == null) css_urls = [];\n    if (js_urls == null) js_urls = [];\n\n    root._bokeh_onload_callbacks.push(callback);\n    if (root._bokeh_is_loading > 0) {\n      console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n      return null;\n    }\n    if (js_urls == null || js_urls.length === 0) {\n      run_callbacks();\n      return null;\n    }\n    console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n    root._bokeh_is_loading = css_urls.length + js_urls.length;\n\n    function on_load() {\n      root._bokeh_is_loading--;\n      if (root._bokeh_is_loading === 0) {\n        console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n        run_callbacks()\n      }\n    }\n\n    function on_error() {\n      console.error(\"failed to load \" + url);\n    }\n\n    for (var i = 0; i < css_urls.length; i++) {\n      var url = css_urls[i];\n      const element = document.createElement(\"link\");\n      element.onload = on_load;\n      element.onerror = on_error;\n      element.rel = \"stylesheet\";\n      element.type = \"text/css\";\n      element.href = url;\n      console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n      document.body.appendChild(element);\n    }\n\n    for (var i = 0; i < js_urls.length; i++) {\n      var url = js_urls[i];\n      var element = document.createElement('script');\n      element.onload = on_load;\n      element.onerror = on_error;\n      element.async = false;\n      element.src = url;\n      console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n      document.head.appendChild(element);\n    }\n  };var element = document.getElementById(\"1001\");\n  if (element == null) {\n    console.error(\"Bokeh: ERROR: autoload.js configured with elementid '1001' but no matching script tag was found. \")\n    return false;\n  }\n\n  function inject_raw_css(css) {\n    const element = document.createElement(\"style\");\n    element.appendChild(document.createTextNode(css));\n    document.body.appendChild(element);\n  }\n\n  var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.2.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.2.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.2.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.2.0.min.js\"];\n  var css_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.2.0.min.css\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.2.0.min.css\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.2.0.min.css\"];\n\n  var inline_js = [\n    function(Bokeh) {\n      Bokeh.set_log_level(\"info\");\n    },\n    \n    function(Bokeh) {\n      \n    },\n    function(Bokeh) {} // ensure no trailing comma for IE\n  ];\n\n  function run_inline_js() {\n    \n    if ((root.Bokeh !== undefined) || (force === true)) {\n      for (var i = 0; i < inline_js.length; i++) {\n        inline_js[i].call(root, root.Bokeh);\n      }if (force === true) {\n        display_loaded();\n      }} else if (Date.now() < root._bokeh_timeout) {\n      setTimeout(run_inline_js, 100);\n    } else if (!root._bokeh_failed_load) {\n      console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n      root._bokeh_failed_load = true;\n    } else if (force !== true) {\n      var cell = $(document.getElementById(\"1001\")).parents('.cell').data().cell;\n      cell.output_area.append_execute_result(NB_LOAD_WARNING)\n    }\n\n  }\n\n  if (root._bokeh_is_loading === 0) {\n    console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n    run_inline_js();\n  } else {\n    load_libs(css_urls, js_urls, function() {\n      console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n      run_inline_js();\n    });\n  }\n}(window));"
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
    "source": [
     "output_notebook()"
    ]
@@ -92,7 +405,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "metadata": {
     "colab": {},
     "colab_type": "code",
@@ -106,7 +419,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {
     "colab": {},
     "colab_type": "code",
@@ -132,7 +445,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "metadata": {
     "colab": {},
     "colab_type": "code",
@@ -161,7 +474,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/",
@@ -202,7 +515,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/",
@@ -223,7 +536,305 @@
     "id": "CoWDeDBl2wHQ",
     "outputId": "5a41d67f-63d7-4365-c170-ee04ef5727c1"
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Help on function read_csv in module pandas.io.parsers:\n",
+      "\n",
+      "read_csv(filepath_or_buffer, sep=',', delimiter=None, header='infer', names=None, index_col=None, usecols=None, squeeze=False, prefix=None, mangle_dupe_cols=True, dtype=None, engine=None, converters=None, true_values=None, false_values=None, skipinitialspace=False, skiprows=None, skipfooter=0, nrows=None, na_values=None, keep_default_na=True, na_filter=True, verbose=False, skip_blank_lines=True, parse_dates=False, infer_datetime_format=False, keep_date_col=False, date_parser=None, dayfirst=False, iterator=False, chunksize=None, compression='infer', thousands=None, decimal=b'.', lineterminator=None, quotechar='\"', quoting=0, doublequote=True, escapechar=None, comment=None, encoding=None, dialect=None, tupleize_cols=None, error_bad_lines=True, warn_bad_lines=True, delim_whitespace=False, low_memory=True, memory_map=False, float_precision=None)\n",
+      "    Read a comma-separated values (csv) file into DataFrame.\n",
+      "    \n",
+      "    Also supports optionally iterating or breaking of the file\n",
+      "    into chunks.\n",
+      "    \n",
+      "    Additional help can be found in the online docs for\n",
+      "    `IO Tools <http://pandas.pydata.org/pandas-docs/stable/io.html>`_.\n",
+      "    \n",
+      "    Parameters\n",
+      "    ----------\n",
+      "    filepath_or_buffer : str, path object, or file-like object\n",
+      "        Any valid string path is acceptable. The string could be a URL. Valid\n",
+      "        URL schemes include http, ftp, s3, and file. For file URLs, a host is\n",
+      "        expected. A local file could be: file://localhost/path/to/table.csv.\n",
+      "    \n",
+      "        If you want to pass in a path object, pandas accepts either\n",
+      "        ``pathlib.Path`` or ``py._path.local.LocalPath``.\n",
+      "    \n",
+      "        By file-like object, we refer to objects with a ``read()`` method, such as\n",
+      "        a file handler (e.g. via builtin ``open`` function) or ``StringIO``.\n",
+      "    sep : str, default ','\n",
+      "        Delimiter to use. If sep is None, the C engine cannot automatically detect\n",
+      "        the separator, but the Python parsing engine can, meaning the latter will\n",
+      "        be used and automatically detect the separator by Python's builtin sniffer\n",
+      "        tool, ``csv.Sniffer``. In addition, separators longer than 1 character and\n",
+      "        different from ``'\\s+'`` will be interpreted as regular expressions and\n",
+      "        will also force the use of the Python parsing engine. Note that regex\n",
+      "        delimiters are prone to ignoring quoted data. Regex example: ``'\\r\\t'``.\n",
+      "    delimiter : str, default ``None``\n",
+      "        Alias for sep.\n",
+      "    header : int, list of int, default 'infer'\n",
+      "        Row number(s) to use as the column names, and the start of the\n",
+      "        data.  Default behavior is to infer the column names: if no names\n",
+      "        are passed the behavior is identical to ``header=0`` and column\n",
+      "        names are inferred from the first line of the file, if column\n",
+      "        names are passed explicitly then the behavior is identical to\n",
+      "        ``header=None``. Explicitly pass ``header=0`` to be able to\n",
+      "        replace existing names. The header can be a list of integers that\n",
+      "        specify row locations for a multi-index on the columns\n",
+      "        e.g. [0,1,3]. Intervening rows that are not specified will be\n",
+      "        skipped (e.g. 2 in this example is skipped). Note that this\n",
+      "        parameter ignores commented lines and empty lines if\n",
+      "        ``skip_blank_lines=True``, so ``header=0`` denotes the first line of\n",
+      "        data rather than the first line of the file.\n",
+      "    names : array-like, optional\n",
+      "        List of column names to use. If file contains no header row, then you\n",
+      "        should explicitly pass ``header=None``. Duplicates in this list will cause\n",
+      "        a ``UserWarning`` to be issued.\n",
+      "    index_col : int, sequence or bool, optional\n",
+      "        Column to use as the row labels of the DataFrame. If a sequence is given, a\n",
+      "        MultiIndex is used. If you have a malformed file with delimiters at the end\n",
+      "        of each line, you might consider ``index_col=False`` to force pandas to\n",
+      "        not use the first column as the index (row names).\n",
+      "    usecols : list-like or callable, optional\n",
+      "        Return a subset of the columns. If list-like, all elements must either\n",
+      "        be positional (i.e. integer indices into the document columns) or strings\n",
+      "        that correspond to column names provided either by the user in `names` or\n",
+      "        inferred from the document header row(s). For example, a valid list-like\n",
+      "        `usecols` parameter would be ``[0, 1, 2]`` or ``['foo', 'bar', 'baz']``.\n",
+      "        Element order is ignored, so ``usecols=[0, 1]`` is the same as ``[1, 0]``.\n",
+      "        To instantiate a DataFrame from ``data`` with element order preserved use\n",
+      "        ``pd.read_csv(data, usecols=['foo', 'bar'])[['foo', 'bar']]`` for columns\n",
+      "        in ``['foo', 'bar']`` order or\n",
+      "        ``pd.read_csv(data, usecols=['foo', 'bar'])[['bar', 'foo']]``\n",
+      "        for ``['bar', 'foo']`` order.\n",
+      "    \n",
+      "        If callable, the callable function will be evaluated against the column\n",
+      "        names, returning names where the callable function evaluates to True. An\n",
+      "        example of a valid callable argument would be ``lambda x: x.upper() in\n",
+      "        ['AAA', 'BBB', 'DDD']``. Using this parameter results in much faster\n",
+      "        parsing time and lower memory usage.\n",
+      "    squeeze : bool, default False\n",
+      "        If the parsed data only contains one column then return a Series.\n",
+      "    prefix : str, optional\n",
+      "        Prefix to add to column numbers when no header, e.g. 'X' for X0, X1, ...\n",
+      "    mangle_dupe_cols : bool, default True\n",
+      "        Duplicate columns will be specified as 'X', 'X.1', ...'X.N', rather than\n",
+      "        'X'...'X'. Passing in False will cause data to be overwritten if there\n",
+      "        are duplicate names in the columns.\n",
+      "    dtype : Type name or dict of column -> type, optional\n",
+      "        Data type for data or columns. E.g. {'a': np.float64, 'b': np.int32,\n",
+      "        'c': 'Int64'}\n",
+      "        Use `str` or `object` together with suitable `na_values` settings\n",
+      "        to preserve and not interpret dtype.\n",
+      "        If converters are specified, they will be applied INSTEAD\n",
+      "        of dtype conversion.\n",
+      "    engine : {'c', 'python'}, optional\n",
+      "        Parser engine to use. The C engine is faster while the python engine is\n",
+      "        currently more feature-complete.\n",
+      "    converters : dict, optional\n",
+      "        Dict of functions for converting values in certain columns. Keys can either\n",
+      "        be integers or column labels.\n",
+      "    true_values : list, optional\n",
+      "        Values to consider as True.\n",
+      "    false_values : list, optional\n",
+      "        Values to consider as False.\n",
+      "    skipinitialspace : bool, default False\n",
+      "        Skip spaces after delimiter.\n",
+      "    skiprows : list-like, int or callable, optional\n",
+      "        Line numbers to skip (0-indexed) or number of lines to skip (int)\n",
+      "        at the start of the file.\n",
+      "    \n",
+      "        If callable, the callable function will be evaluated against the row\n",
+      "        indices, returning True if the row should be skipped and False otherwise.\n",
+      "        An example of a valid callable argument would be ``lambda x: x in [0, 2]``.\n",
+      "    skipfooter : int, default 0\n",
+      "        Number of lines at bottom of file to skip (Unsupported with engine='c').\n",
+      "    nrows : int, optional\n",
+      "        Number of rows of file to read. Useful for reading pieces of large files.\n",
+      "    na_values : scalar, str, list-like, or dict, optional\n",
+      "        Additional strings to recognize as NA/NaN. If dict passed, specific\n",
+      "        per-column NA values.  By default the following values are interpreted as\n",
+      "        NaN: '', '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', '-nan',\n",
+      "        '1.#IND', '1.#QNAN', 'N/A', 'NA', 'NULL', 'NaN', 'n/a', 'nan',\n",
+      "        'null'.\n",
+      "    keep_default_na : bool, default True\n",
+      "        Whether or not to include the default NaN values when parsing the data.\n",
+      "        Depending on whether `na_values` is passed in, the behavior is as follows:\n",
+      "    \n",
+      "        * If `keep_default_na` is True, and `na_values` are specified, `na_values`\n",
+      "          is appended to the default NaN values used for parsing.\n",
+      "        * If `keep_default_na` is True, and `na_values` are not specified, only\n",
+      "          the default NaN values are used for parsing.\n",
+      "        * If `keep_default_na` is False, and `na_values` are specified, only\n",
+      "          the NaN values specified `na_values` are used for parsing.\n",
+      "        * If `keep_default_na` is False, and `na_values` are not specified, no\n",
+      "          strings will be parsed as NaN.\n",
+      "    \n",
+      "        Note that if `na_filter` is passed in as False, the `keep_default_na` and\n",
+      "        `na_values` parameters will be ignored.\n",
+      "    na_filter : bool, default True\n",
+      "        Detect missing value markers (empty strings and the value of na_values). In\n",
+      "        data without any NAs, passing na_filter=False can improve the performance\n",
+      "        of reading a large file.\n",
+      "    verbose : bool, default False\n",
+      "        Indicate number of NA values placed in non-numeric columns.\n",
+      "    skip_blank_lines : bool, default True\n",
+      "        If True, skip over blank lines rather than interpreting as NaN values.\n",
+      "    parse_dates : bool or list of int or names or list of lists or dict, default False\n",
+      "        The behavior is as follows:\n",
+      "    \n",
+      "        * boolean. If True -> try parsing the index.\n",
+      "        * list of int or names. e.g. If [1, 2, 3] -> try parsing columns 1, 2, 3\n",
+      "          each as a separate date column.\n",
+      "        * list of lists. e.g.  If [[1, 3]] -> combine columns 1 and 3 and parse as\n",
+      "          a single date column.\n",
+      "        * dict, e.g. {'foo' : [1, 3]} -> parse columns 1, 3 as date and call\n",
+      "          result 'foo'\n",
+      "    \n",
+      "        If a column or index cannot be represented as an array of datetimes,\n",
+      "        say because of an unparseable value or a mixture of timezones, the column\n",
+      "        or index will be returned unaltered as an object data type. For\n",
+      "        non-standard datetime parsing, use ``pd.to_datetime`` after\n",
+      "        ``pd.read_csv``. To parse an index or column with a mixture of timezones,\n",
+      "        specify ``date_parser`` to be a partially-applied\n",
+      "        :func:`pandas.to_datetime` with ``utc=True``. See\n",
+      "        :ref:`io.csv.mixed_timezones` for more.\n",
+      "    \n",
+      "        Note: A fast-path exists for iso8601-formatted dates.\n",
+      "    infer_datetime_format : bool, default False\n",
+      "        If True and `parse_dates` is enabled, pandas will attempt to infer the\n",
+      "        format of the datetime strings in the columns, and if it can be inferred,\n",
+      "        switch to a faster method of parsing them. In some cases this can increase\n",
+      "        the parsing speed by 5-10x.\n",
+      "    keep_date_col : bool, default False\n",
+      "        If True and `parse_dates` specifies combining multiple columns then\n",
+      "        keep the original columns.\n",
+      "    date_parser : function, optional\n",
+      "        Function to use for converting a sequence of string columns to an array of\n",
+      "        datetime instances. The default uses ``dateutil.parser.parser`` to do the\n",
+      "        conversion. Pandas will try to call `date_parser` in three different ways,\n",
+      "        advancing to the next if an exception occurs: 1) Pass one or more arrays\n",
+      "        (as defined by `parse_dates`) as arguments; 2) concatenate (row-wise) the\n",
+      "        string values from the columns defined by `parse_dates` into a single array\n",
+      "        and pass that; and 3) call `date_parser` once for each row using one or\n",
+      "        more strings (corresponding to the columns defined by `parse_dates`) as\n",
+      "        arguments.\n",
+      "    dayfirst : bool, default False\n",
+      "        DD/MM format dates, international and European format.\n",
+      "    iterator : bool, default False\n",
+      "        Return TextFileReader object for iteration or getting chunks with\n",
+      "        ``get_chunk()``.\n",
+      "    chunksize : int, optional\n",
+      "        Return TextFileReader object for iteration.\n",
+      "        See the `IO Tools docs\n",
+      "        <http://pandas.pydata.org/pandas-docs/stable/io.html#io-chunking>`_\n",
+      "        for more information on ``iterator`` and ``chunksize``.\n",
+      "    compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default 'infer'\n",
+      "        For on-the-fly decompression of on-disk data. If 'infer' and\n",
+      "        `filepath_or_buffer` is path-like, then detect compression from the\n",
+      "        following extensions: '.gz', '.bz2', '.zip', or '.xz' (otherwise no\n",
+      "        decompression). If using 'zip', the ZIP file must contain only one data\n",
+      "        file to be read in. Set to None for no decompression.\n",
+      "    \n",
+      "        .. versionadded:: 0.18.1 support for 'zip' and 'xz' compression.\n",
+      "    \n",
+      "    thousands : str, optional\n",
+      "        Thousands separator.\n",
+      "    decimal : str, default '.'\n",
+      "        Character to recognize as decimal point (e.g. use ',' for European data).\n",
+      "    lineterminator : str (length 1), optional\n",
+      "        Character to break file into lines. Only valid with C parser.\n",
+      "    quotechar : str (length 1), optional\n",
+      "        The character used to denote the start and end of a quoted item. Quoted\n",
+      "        items can include the delimiter and it will be ignored.\n",
+      "    quoting : int or csv.QUOTE_* instance, default 0\n",
+      "        Control field quoting behavior per ``csv.QUOTE_*`` constants. Use one of\n",
+      "        QUOTE_MINIMAL (0), QUOTE_ALL (1), QUOTE_NONNUMERIC (2) or QUOTE_NONE (3).\n",
+      "    doublequote : bool, default ``True``\n",
+      "       When quotechar is specified and quoting is not ``QUOTE_NONE``, indicate\n",
+      "       whether or not to interpret two consecutive quotechar elements INSIDE a\n",
+      "       field as a single ``quotechar`` element.\n",
+      "    escapechar : str (length 1), optional\n",
+      "        One-character string used to escape other characters.\n",
+      "    comment : str, optional\n",
+      "        Indicates remainder of line should not be parsed. If found at the beginning\n",
+      "        of a line, the line will be ignored altogether. This parameter must be a\n",
+      "        single character. Like empty lines (as long as ``skip_blank_lines=True``),\n",
+      "        fully commented lines are ignored by the parameter `header` but not by\n",
+      "        `skiprows`. For example, if ``comment='#'``, parsing\n",
+      "        ``#empty\\na,b,c\\n1,2,3`` with ``header=0`` will result in 'a,b,c' being\n",
+      "        treated as the header.\n",
+      "    encoding : str, optional\n",
+      "        Encoding to use for UTF when reading/writing (ex. 'utf-8'). `List of Python\n",
+      "        standard encodings\n",
+      "        <https://docs.python.org/3/library/codecs.html#standard-encodings>`_ .\n",
+      "    dialect : str or csv.Dialect, optional\n",
+      "        If provided, this parameter will override values (default or not) for the\n",
+      "        following parameters: `delimiter`, `doublequote`, `escapechar`,\n",
+      "        `skipinitialspace`, `quotechar`, and `quoting`. If it is necessary to\n",
+      "        override values, a ParserWarning will be issued. See csv.Dialect\n",
+      "        documentation for more details.\n",
+      "    tupleize_cols : bool, default False\n",
+      "        Leave a list of tuples on columns as is (default is to convert to\n",
+      "        a MultiIndex on the columns).\n",
+      "    \n",
+      "        .. deprecated:: 0.21.0\n",
+      "           This argument will be removed and will always convert to MultiIndex\n",
+      "    \n",
+      "    error_bad_lines : bool, default True\n",
+      "        Lines with too many fields (e.g. a csv line with too many commas) will by\n",
+      "        default cause an exception to be raised, and no DataFrame will be returned.\n",
+      "        If False, then these \"bad lines\" will dropped from the DataFrame that is\n",
+      "        returned.\n",
+      "    warn_bad_lines : bool, default True\n",
+      "        If error_bad_lines is False, and warn_bad_lines is True, a warning for each\n",
+      "        \"bad line\" will be output.\n",
+      "    delim_whitespace : bool, default False\n",
+      "        Specifies whether or not whitespace (e.g. ``' '`` or ``'    '``) will be\n",
+      "        used as the sep. Equivalent to setting ``sep='\\s+'``. If this option\n",
+      "        is set to True, nothing should be passed in for the ``delimiter``\n",
+      "        parameter.\n",
+      "    \n",
+      "        .. versionadded:: 0.18.1 support for the Python parser.\n",
+      "    \n",
+      "    low_memory : bool, default True\n",
+      "        Internally process the file in chunks, resulting in lower memory use\n",
+      "        while parsing, but possibly mixed type inference.  To ensure no mixed\n",
+      "        types either set False, or specify the type with the `dtype` parameter.\n",
+      "        Note that the entire file is read into a single DataFrame regardless,\n",
+      "        use the `chunksize` or `iterator` parameter to return the data in chunks.\n",
+      "        (Only valid with C parser).\n",
+      "    memory_map : bool, default False\n",
+      "        If a filepath is provided for `filepath_or_buffer`, map the file object\n",
+      "        directly onto memory and access the data directly from there. Using this\n",
+      "        option can improve performance because there is no longer any I/O overhead.\n",
+      "    float_precision : str, optional\n",
+      "        Specifies which converter the C engine should use for floating-point\n",
+      "        values. The options are `None` for the ordinary converter,\n",
+      "        `high` for the high-precision converter, and `round_trip` for the\n",
+      "        round-trip converter.\n",
+      "    \n",
+      "    Returns\n",
+      "    -------\n",
+      "    DataFrame or TextParser\n",
+      "        A comma-separated values (csv) file is returned as two-dimensional\n",
+      "        data structure with labeled axes.\n",
+      "    \n",
+      "    See Also\n",
+      "    --------\n",
+      "    to_csv : Write DataFrame to a comma-separated values (csv) file.\n",
+      "    read_csv : Read a comma-separated values (csv) file into DataFrame.\n",
+      "    read_fwf : Read a table of fixed-width formatted lines into DataFrame.\n",
+      "    \n",
+      "    Examples\n",
+      "    --------\n",
+      "    >>> pd.read_csv('data.csv')  # doctest: +SKIP\n",
+      "\n"
+     ]
+    }
+   ],
    "source": [
     "help(pd.read_csv)"
    ]
@@ -240,14 +851,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
    "metadata": {
     "colab": {},
     "colab_type": "code",
     "id": "2iltS1Q-k3Wn",
     "outputId": "ea81462d-8a49-406b-c933-182c49379053"
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(136, 52230)"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "data_tr.shape"
    ]
@@ -266,14 +888,242 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 11,
    "metadata": {
     "colab": {},
     "colab_type": "code",
     "id": "TVfPwU6-k3Wt",
     "outputId": "884dd460-6c53-4bf4-9c37-f7c7299b37a2"
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>ALB.Gene_AceView</th>\n",
+       "      <th>CD24L4.1.Gene_AceView</th>\n",
+       "      <th>RPS11.Gene_RefSeq</th>\n",
+       "      <th>RPS18.Gene_AceView</th>\n",
+       "      <th>C5orf13.Gene_AceView</th>\n",
+       "      <th>CCT2.Gene_AceView</th>\n",
+       "      <th>COL1A1.Gene_AceView</th>\n",
+       "      <th>DDX1.Gene_AceView</th>\n",
+       "      <th>EEF1A1.Gene_AceView</th>\n",
+       "      <th>FLT3LG_.Gene_AceView</th>\n",
+       "      <th>...</th>\n",
+       "      <th>zawskaw.Gene_AceView</th>\n",
+       "      <th>zeedor.Gene_AceView</th>\n",
+       "      <th>zergor.Gene_AceView</th>\n",
+       "      <th>zorsa.Gene_AceView</th>\n",
+       "      <th>zoychabu.Gene_AceView</th>\n",
+       "      <th>zoysteeby.Gene_AceView</th>\n",
+       "      <th>zudee.Gene_AceView</th>\n",
+       "      <th>zureyby.Gene_AceView</th>\n",
+       "      <th>zuswoybu.Gene_AceView</th>\n",
+       "      <th>zyjee.Gene_AceView</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>9.29</td>\n",
+       "      <td>18.82</td>\n",
+       "      <td>21.17</td>\n",
+       "      <td>20.90</td>\n",
+       "      <td>20.02</td>\n",
+       "      <td>16.31</td>\n",
+       "      <td>18.60</td>\n",
+       "      <td>15.73</td>\n",
+       "      <td>21.71</td>\n",
+       "      <td>20.02</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>9.25</td>\n",
+       "      <td>20.25</td>\n",
+       "      <td>22.44</td>\n",
+       "      <td>22.00</td>\n",
+       "      <td>21.05</td>\n",
+       "      <td>17.06</td>\n",
+       "      <td>19.39</td>\n",
+       "      <td>22.84</td>\n",
+       "      <td>22.72</td>\n",
+       "      <td>21.26</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>5.54</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>3.39</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>5.45</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>8.99</td>\n",
+       "      <td>20.09</td>\n",
+       "      <td>22.09</td>\n",
+       "      <td>21.71</td>\n",
+       "      <td>21.65</td>\n",
+       "      <td>16.85</td>\n",
+       "      <td>23.02</td>\n",
+       "      <td>15.79</td>\n",
+       "      <td>22.24</td>\n",
+       "      <td>20.75</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>3.75</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>7.32</td>\n",
+       "      <td>19.82</td>\n",
+       "      <td>20.52</td>\n",
+       "      <td>20.90</td>\n",
+       "      <td>21.58</td>\n",
+       "      <td>16.49</td>\n",
+       "      <td>18.91</td>\n",
+       "      <td>15.45</td>\n",
+       "      <td>22.06</td>\n",
+       "      <td>19.59</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>10.56</td>\n",
+       "      <td>21.19</td>\n",
+       "      <td>20.69</td>\n",
+       "      <td>21.29</td>\n",
+       "      <td>20.28</td>\n",
+       "      <td>16.22</td>\n",
+       "      <td>17.15</td>\n",
+       "      <td>16.01</td>\n",
+       "      <td>21.84</td>\n",
+       "      <td>19.74</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>5.20</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 52229 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   ALB.Gene_AceView  CD24L4.1.Gene_AceView  RPS11.Gene_RefSeq  \\\n",
+       "0              9.29                  18.82              21.17   \n",
+       "1              9.25                  20.25              22.44   \n",
+       "2              8.99                  20.09              22.09   \n",
+       "3              7.32                  19.82              20.52   \n",
+       "4             10.56                  21.19              20.69   \n",
+       "\n",
+       "   RPS18.Gene_AceView  C5orf13.Gene_AceView  CCT2.Gene_AceView  \\\n",
+       "0               20.90                 20.02              16.31   \n",
+       "1               22.00                 21.05              17.06   \n",
+       "2               21.71                 21.65              16.85   \n",
+       "3               20.90                 21.58              16.49   \n",
+       "4               21.29                 20.28              16.22   \n",
+       "\n",
+       "   COL1A1.Gene_AceView  DDX1.Gene_AceView  EEF1A1.Gene_AceView  \\\n",
+       "0                18.60              15.73                21.71   \n",
+       "1                19.39              22.84                22.72   \n",
+       "2                23.02              15.79                22.24   \n",
+       "3                18.91              15.45                22.06   \n",
+       "4                17.15              16.01                21.84   \n",
+       "\n",
+       "   FLT3LG_.Gene_AceView  ...  zawskaw.Gene_AceView  zeedor.Gene_AceView  \\\n",
+       "0                 20.02  ...                   0.0                 0.00   \n",
+       "1                 21.26  ...                   0.0                 5.54   \n",
+       "2                 20.75  ...                   0.0                 0.00   \n",
+       "3                 19.59  ...                   0.0                 0.00   \n",
+       "4                 19.74  ...                   0.0                 0.00   \n",
+       "\n",
+       "   zergor.Gene_AceView  zorsa.Gene_AceView  zoychabu.Gene_AceView  \\\n",
+       "0                  0.0                0.00                    0.0   \n",
+       "1                  0.0                3.39                    0.0   \n",
+       "2                  0.0                3.75                    0.0   \n",
+       "3                  0.0                0.00                    0.0   \n",
+       "4                  0.0                5.20                    0.0   \n",
+       "\n",
+       "   zoysteeby.Gene_AceView  zudee.Gene_AceView  zureyby.Gene_AceView  \\\n",
+       "0                     0.0                0.00                   0.0   \n",
+       "1                     0.0                5.45                   0.0   \n",
+       "2                     0.0                0.00                   0.0   \n",
+       "3                     0.0                0.00                   0.0   \n",
+       "4                     0.0                0.00                   0.0   \n",
+       "\n",
+       "   zuswoybu.Gene_AceView  zyjee.Gene_AceView  \n",
+       "0                    0.0                 0.0  \n",
+       "1                    0.0                 0.0  \n",
+       "2                    0.0                 0.0  \n",
+       "3                    0.0                 0.0  \n",
+       "4                    0.0                 0.0  \n",
+       "\n",
+       "[5 rows x 52229 columns]"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "data_tr.head()"
    ]
@@ -290,13 +1140,29 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 13,
    "metadata": {
     "colab": {},
     "colab_type": "code",
     "id": "F1bUOeE4k3Wx"
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "ename": "KeyError",
+     "evalue": "\"['sampleID'] not found in axis\"",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-13-3a2971c1dfd7>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdata_tr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata_tr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdrop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'sampleID'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      2\u001b[0m \u001b[0mdata_ts\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata_ts\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdrop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'sampleID'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36mdrop\u001b[0;34m(self, labels, axis, index, columns, level, inplace, errors)\u001b[0m\n\u001b[1;32m   3938\u001b[0m                                            \u001b[0mindex\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   3939\u001b[0m                                            \u001b[0mlevel\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mlevel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minplace\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minplace\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3940\u001b[0;31m                                            errors=errors)\n\u001b[0m\u001b[1;32m   3941\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   3942\u001b[0m     @rewrite_axis_style_signature('mapper', [('copy', True),\n",
+      "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36mdrop\u001b[0;34m(self, labels, axis, index, columns, level, inplace, errors)\u001b[0m\n\u001b[1;32m   3778\u001b[0m         \u001b[0;32mfor\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabels\u001b[0m \u001b[0;32min\u001b[0m \u001b[0maxes\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   3779\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mlabels\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3780\u001b[0;31m                 \u001b[0mobj\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mobj\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_drop_axis\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlabels\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlevel\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mlevel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0merrors\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   3781\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   3782\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0minplace\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36m_drop_axis\u001b[0;34m(self, labels, axis, level, errors)\u001b[0m\n\u001b[1;32m   3810\u001b[0m                 \u001b[0mnew_axis\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdrop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlabels\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlevel\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mlevel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0merrors\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   3811\u001b[0m             \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3812\u001b[0;31m                 \u001b[0mnew_axis\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdrop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlabels\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0merrors\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   3813\u001b[0m             \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreindex\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0maxis_name\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mnew_axis\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   3814\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mdrop\u001b[0;34m(self, labels, errors)\u001b[0m\n\u001b[1;32m   4963\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0merrors\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0;34m'ignore'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   4964\u001b[0m                 raise KeyError(\n\u001b[0;32m-> 4965\u001b[0;31m                     '{} not found in axis'.format(labels[mask]))\n\u001b[0m\u001b[1;32m   4966\u001b[0m             \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mindexer\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m~\u001b[0m\u001b[0mmask\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   4967\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdelete\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mKeyError\u001b[0m: \"['sampleID'] not found in axis\""
+     ]
+    }
+   ],
    "source": [
     "data_tr = data_tr.drop('sampleID', axis=1)\n",
     "data_ts = data_ts.drop('sampleID', axis=1)"
@@ -314,14 +1180,242 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 14,
    "metadata": {
     "colab": {},
     "colab_type": "code",
     "id": "QgcQgBVek3W1",
     "outputId": "6cbec2e0-0001-4e03-c040-0bbddd51db5b"
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>ALB.Gene_AceView</th>\n",
+       "      <th>CD24L4.1.Gene_AceView</th>\n",
+       "      <th>RPS11.Gene_RefSeq</th>\n",
+       "      <th>RPS18.Gene_AceView</th>\n",
+       "      <th>C5orf13.Gene_AceView</th>\n",
+       "      <th>CCT2.Gene_AceView</th>\n",
+       "      <th>COL1A1.Gene_AceView</th>\n",
+       "      <th>DDX1.Gene_AceView</th>\n",
+       "      <th>EEF1A1.Gene_AceView</th>\n",
+       "      <th>FLT3LG_.Gene_AceView</th>\n",
+       "      <th>...</th>\n",
+       "      <th>zawskaw.Gene_AceView</th>\n",
+       "      <th>zeedor.Gene_AceView</th>\n",
+       "      <th>zergor.Gene_AceView</th>\n",
+       "      <th>zorsa.Gene_AceView</th>\n",
+       "      <th>zoychabu.Gene_AceView</th>\n",
+       "      <th>zoysteeby.Gene_AceView</th>\n",
+       "      <th>zudee.Gene_AceView</th>\n",
+       "      <th>zureyby.Gene_AceView</th>\n",
+       "      <th>zuswoybu.Gene_AceView</th>\n",
+       "      <th>zyjee.Gene_AceView</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>9.29</td>\n",
+       "      <td>18.82</td>\n",
+       "      <td>21.17</td>\n",
+       "      <td>20.90</td>\n",
+       "      <td>20.02</td>\n",
+       "      <td>16.31</td>\n",
+       "      <td>18.60</td>\n",
+       "      <td>15.73</td>\n",
+       "      <td>21.71</td>\n",
+       "      <td>20.02</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>9.25</td>\n",
+       "      <td>20.25</td>\n",
+       "      <td>22.44</td>\n",
+       "      <td>22.00</td>\n",
+       "      <td>21.05</td>\n",
+       "      <td>17.06</td>\n",
+       "      <td>19.39</td>\n",
+       "      <td>22.84</td>\n",
+       "      <td>22.72</td>\n",
+       "      <td>21.26</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>5.54</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>3.39</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>5.45</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>8.99</td>\n",
+       "      <td>20.09</td>\n",
+       "      <td>22.09</td>\n",
+       "      <td>21.71</td>\n",
+       "      <td>21.65</td>\n",
+       "      <td>16.85</td>\n",
+       "      <td>23.02</td>\n",
+       "      <td>15.79</td>\n",
+       "      <td>22.24</td>\n",
+       "      <td>20.75</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>3.75</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>7.32</td>\n",
+       "      <td>19.82</td>\n",
+       "      <td>20.52</td>\n",
+       "      <td>20.90</td>\n",
+       "      <td>21.58</td>\n",
+       "      <td>16.49</td>\n",
+       "      <td>18.91</td>\n",
+       "      <td>15.45</td>\n",
+       "      <td>22.06</td>\n",
+       "      <td>19.59</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>10.56</td>\n",
+       "      <td>21.19</td>\n",
+       "      <td>20.69</td>\n",
+       "      <td>21.29</td>\n",
+       "      <td>20.28</td>\n",
+       "      <td>16.22</td>\n",
+       "      <td>17.15</td>\n",
+       "      <td>16.01</td>\n",
+       "      <td>21.84</td>\n",
+       "      <td>19.74</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>5.20</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 52229 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   ALB.Gene_AceView  CD24L4.1.Gene_AceView  RPS11.Gene_RefSeq  \\\n",
+       "0              9.29                  18.82              21.17   \n",
+       "1              9.25                  20.25              22.44   \n",
+       "2              8.99                  20.09              22.09   \n",
+       "3              7.32                  19.82              20.52   \n",
+       "4             10.56                  21.19              20.69   \n",
+       "\n",
+       "   RPS18.Gene_AceView  C5orf13.Gene_AceView  CCT2.Gene_AceView  \\\n",
+       "0               20.90                 20.02              16.31   \n",
+       "1               22.00                 21.05              17.06   \n",
+       "2               21.71                 21.65              16.85   \n",
+       "3               20.90                 21.58              16.49   \n",
+       "4               21.29                 20.28              16.22   \n",
+       "\n",
+       "   COL1A1.Gene_AceView  DDX1.Gene_AceView  EEF1A1.Gene_AceView  \\\n",
+       "0                18.60              15.73                21.71   \n",
+       "1                19.39              22.84                22.72   \n",
+       "2                23.02              15.79                22.24   \n",
+       "3                18.91              15.45                22.06   \n",
+       "4                17.15              16.01                21.84   \n",
+       "\n",
+       "   FLT3LG_.Gene_AceView  ...  zawskaw.Gene_AceView  zeedor.Gene_AceView  \\\n",
+       "0                 20.02  ...                   0.0                 0.00   \n",
+       "1                 21.26  ...                   0.0                 5.54   \n",
+       "2                 20.75  ...                   0.0                 0.00   \n",
+       "3                 19.59  ...                   0.0                 0.00   \n",
+       "4                 19.74  ...                   0.0                 0.00   \n",
+       "\n",
+       "   zergor.Gene_AceView  zorsa.Gene_AceView  zoychabu.Gene_AceView  \\\n",
+       "0                  0.0                0.00                    0.0   \n",
+       "1                  0.0                3.39                    0.0   \n",
+       "2                  0.0                3.75                    0.0   \n",
+       "3                  0.0                0.00                    0.0   \n",
+       "4                  0.0                5.20                    0.0   \n",
+       "\n",
+       "   zoysteeby.Gene_AceView  zudee.Gene_AceView  zureyby.Gene_AceView  \\\n",
+       "0                     0.0                0.00                   0.0   \n",
+       "1                     0.0                5.45                   0.0   \n",
+       "2                     0.0                0.00                   0.0   \n",
+       "3                     0.0                0.00                   0.0   \n",
+       "4                     0.0                0.00                   0.0   \n",
+       "\n",
+       "   zuswoybu.Gene_AceView  zyjee.Gene_AceView  \n",
+       "0                    0.0                 0.0  \n",
+       "1                    0.0                 0.0  \n",
+       "2                    0.0                 0.0  \n",
+       "3                    0.0                 0.0  \n",
+       "4                    0.0                 0.0  \n",
+       "\n",
+       "[5 rows x 52229 columns]"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "data_tr.head()"
    ]
@@ -338,14 +1432,95 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 15,
    "metadata": {
     "colab": {},
     "colab_type": "code",
     "id": "7Vbq9mqXk3W5",
     "outputId": "6bc4f986-30e2-4953-fc78-80b80982ba47"
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>sampleID</th>\n",
+       "      <th>CLASS</th>\n",
+       "      <th>SEX</th>\n",
+       "      <th>RND</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>SEQC_NB001</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>SEQC_NB003</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>SEQC_NB005</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>SEQC_NB011</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>SEQC_NB013</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "     sampleID  CLASS  SEX  RND\n",
+       "0  SEQC_NB001      0    1    1\n",
+       "1  SEQC_NB003      0    0    0\n",
+       "2  SEQC_NB005      0    0    1\n",
+       "3  SEQC_NB011      1    1    1\n",
+       "4  SEQC_NB013      0    1    1"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "labs_tr = pd.read_csv(LABS_TR, sep = \"\\t\")\n",
     "labs_ts = pd.read_csv(LABS_TS, sep = \"\\t\")\n",
@@ -364,14 +1539,77 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 16,
    "metadata": {
     "colab": {},
     "colab_type": "code",
     "id": "pyTfzujJk3W9",
     "outputId": "cd7cf62a-c5b1-491a-853e-631b5cc9a4d2"
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>CLASS</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   CLASS\n",
+       "0      0\n",
+       "1      0\n",
+       "2      0\n",
+       "3      1\n",
+       "4      0"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "class_lab_tr = labs_tr[['CLASS']]\n",
     "class_lab_ts = labs_ts[['CLASS']]\n",
@@ -391,7 +1629,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 17,
    "metadata": {
     "colab": {},
     "colab_type": "code",
@@ -438,13 +1676,30 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 18,
    "metadata": {
     "colab": {},
     "colab_type": "code",
     "id": "XYs3b6JJ3qrn"
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[ 9.29, 18.82, 21.17, ...,  0.  ,  0.  ,  0.  ],\n",
+       "       [ 9.25, 20.25, 22.44, ...,  0.  ,  0.  ,  0.  ],\n",
+       "       [ 8.99, 20.09, 22.09, ...,  0.  ,  0.  ,  0.  ],\n",
+       "       ...,\n",
+       "       [ 8.47, 20.75, 20.08, ...,  0.  ,  0.  ,  0.  ],\n",
+       "       [ 8.58, 20.57, 20.67, ...,  0.  ,  0.  ,  0.  ],\n",
+       "       [ 8.62, 20.13, 21.04, ...,  0.  ,  0.  ,  0.  ]])"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "x_tr"
    ]
@@ -461,13 +1716,30 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 19,
    "metadata": {
     "colab": {},
     "colab_type": "code",
     "id": "iwI8uSvC4BbC"
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
+       "       1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1,\n",
+       "       1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1,\n",
+       "       1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,\n",
+       "       1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1,\n",
+       "       0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0,\n",
+       "       1, 1, 1, 1])"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "y_tr"
    ]
@@ -512,7 +1784,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -566,7 +1838,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 21,
    "metadata": {
     "colab": {},
     "colab_type": "code",
@@ -592,7 +1864,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 22,
    "metadata": {
     "colab": {},
     "colab_type": "code",
@@ -605,7 +1877,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -615,9 +1887,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 24,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(136, 2)"
+      ]
+     },
+     "execution_count": 24,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "z_tr.shape"
    ]
@@ -634,13 +1917,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 25,
    "metadata": {
     "colab": {},
     "colab_type": "code",
     "id": "Cma7FaOd6F1M"
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0.08194369 0.0433671 ]\n"
+     ]
+    }
+   ],
    "source": [
     "print(pca.explained_variance_ratio_)"
    ]
@@ -669,9 +1960,63 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 26,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "\n",
+       "\n",
+       "\n",
+       "\n",
+       "\n",
+       "  <div class=\"bk-root\" id=\"5a65f1ed-6894-4aaa-9dff-834b1a17a73f\" data-root-id=\"1002\"></div>\n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/javascript": [
+       "(function(root) {\n",
+       "  function embed_document(root) {\n",
+       "    \n",
+       "  var docs_json = {\"6b905e49-73ad-40ff-b792-d2bb188c3823\":{\"roots\":{\"references\":[{\"attributes\":{\"below\":[{\"id\":\"1013\",\"type\":\"LinearAxis\"}],\"center\":[{\"id\":\"1017\",\"type\":\"Grid\"},{\"id\":\"1022\",\"type\":\"Grid\"}],\"left\":[{\"id\":\"1018\",\"type\":\"LinearAxis\"}],\"plot_height\":400,\"plot_width\":400,\"renderers\":[{\"id\":\"1039\",\"type\":\"GlyphRenderer\"},{\"id\":\"1044\",\"type\":\"GlyphRenderer\"}],\"title\":{\"id\":\"1003\",\"type\":\"Title\"},\"toolbar\":{\"id\":\"1029\",\"type\":\"Toolbar\"},\"x_range\":{\"id\":\"1005\",\"type\":\"DataRange1d\"},\"x_scale\":{\"id\":\"1009\",\"type\":\"LinearScale\"},\"y_range\":{\"id\":\"1007\",\"type\":\"DataRange1d\"},\"y_scale\":{\"id\":\"1011\",\"type\":\"LinearScale\"}},\"id\":\"1002\",\"subtype\":\"Figure\",\"type\":\"Plot\"},{\"attributes\":{\"data_source\":{\"id\":\"1041\",\"type\":\"ColumnDataSource\"},\"glyph\":{\"id\":\"1042\",\"type\":\"Circle\"},\"hover_glyph\":null,\"muted_glyph\":null,\"nonselection_glyph\":{\"id\":\"1043\",\"type\":\"Circle\"},\"selection_glyph\":null,\"view\":{\"id\":\"1045\",\"type\":\"CDSView\"}},\"id\":\"1044\",\"type\":\"GlyphRenderer\"},{\"attributes\":{},\"id\":\"1052\",\"type\":\"UnionRenderers\"},{\"attributes\":{\"fill_color\":{\"value\":\"blue\"},\"line_color\":{\"value\":\"blue\"},\"x\":{\"field\":\"x\"},\"y\":{\"field\":\"y\"}},\"id\":\"1042\",\"type\":\"Circle\"},{\"attributes\":{\"fill_alpha\":{\"value\":0.1},\"fill_color\":{\"value\":\"#1f77b4\"},\"line_alpha\":{\"value\":0.1},\"line_color\":{\"value\":\"#1f77b4\"},\"x\":{\"field\":\"x\"},\"y\":{\"field\":\"y\"}},\"id\":\"1038\",\"type\":\"Circle\"},{\"attributes\":{\"source\":{\"id\":\"1041\",\"type\":\"ColumnDataSource\"}},\"id\":\"1045\",\"type\":\"CDSView\"},{\"attributes\":{},\"id\":\"1027\",\"type\":\"ResetTool\"},{\"attributes\":{},\"id\":\"1055\",\"type\":\"Selection\"},{\"attributes\":{\"dimension\":1,\"ticker\":{\"id\":\"1019\",\"type\":\"BasicTicker\"}},\"id\":\"1022\",\"type\":\"Grid\"},{\"attributes\":{\"data_source\":{\"id\":\"1036\",\"type\":\"ColumnDataSource\"},\"glyph\":{\"id\":\"1037\",\"type\":\"Circle\"},\"hover_glyph\":null,\"muted_glyph\":null,\"nonselection_glyph\":{\"id\":\"1038\",\"type\":\"Circle\"},\"selection_glyph\":null,\"view\":{\"id\":\"1040\",\"type\":\"CDSView\"}},\"id\":\"1039\",\"type\":\"GlyphRenderer\"},{\"attributes\":{\"callback\":null,\"data\":{\"x\":{\"__ndarray__\":\"s4dscZTUFsCsUR4MMntMQEacKLQ+z0dAZjHJmj/mKMDavt88hvAvQAKqnplzfeQ/iRqKdbXJRUDB41gKeYQzQOEzq9HxPkdAof95CJrpUUCXVoj1RC9SQFBDbl6DGfW//wiVFHpZJEDanZ+ntBVJQPXwzwnJIlNAD4fQrjFbQkApN+KVnwFMQJZQEet7gUJAB8OIkj+NPkA6POlDoostwF6LPGBkvxtAcEMN5dbzFEBG8wS/hXFOQNvWE0osDk1AAx0Rr8ddOEBhAMw5xFA0QMbOy//t9VFAt/2PyC4xN0C918OCG0c4QNvLJkWY3EFAWuQZTmXUSUB6OcmTg1E5QP7aMVQ/kSHA3+DhUYTsDkBeqygeHqg1QBm4Scy720JAYGv9cKPgREC1MSH71K8tQLpNrTITFlRAHXxspr3+SEDgaqfxTofxv1Vkp2gpcSNAyZ7O8pNKUEDDp6vqvAxQQO/AHnRFIlNA\",\"dtype\":\"float64\",\"shape\":[45]},\"y\":{\"__ndarray__\":\"5vVobXIYNEAZI7kdXVw2wDxKDc6nBgHAt8NFMWd/NMAOK6V1J5EJwBzi+V5UxxZA8RXRZPOeIcBhsnMT6sE3wJBnXaFJpxhAUHAj30TDz7/JqmMKc+w9wH/oBR/sckJAKuQKfyZNL0B9ResFsvEoQF959I4Mn0PAoTzn6xhBNkB3LAS7Jfk1wDYPw4n7pChAyQEh+La0F8DxZ9i1DoIowBKmZti8ByRA6tnjzr/EQsAMobWnnulFQG7mTrGWVzTA+GZR60VCFkCl1E4NSO1BQO5R0RpiqwrArpcb6qPaTUBa9JxAoDQ8QEPAWzE+bjNAbTnJ9riZNUDio8BMqBFGQJz/aX26KUNALQ4ZgbopSUBeJV1jAgM1QHt55P1vjyxAdN2EhwMyRUBzj8fL9JlDQFODmsrFiBVA58tVN1cLQMAWcmzzh0k/QDQLsl0qHyhAZg3FVBo0OcDevGZhQ6Y2wHid6WtZ/hjA\",\"dtype\":\"float64\",\"shape\":[45]}},\"selected\":{\"id\":\"1053\",\"type\":\"Selection\"},\"selection_policy\":{\"id\":\"1052\",\"type\":\"UnionRenderers\"}},\"id\":\"1036\",\"type\":\"ColumnDataSource\"},{\"attributes\":{\"callback\":null,\"data\":{\"x\":{\"__ndarray__\":\"/Si/hE7yN8C7LbeVIKI7wMMdNO2D8inAgOipwzArOcDAL+3Xn/s1wDlQqZT/RC3A0lDAJ94uPcBEfI+FtRMowC2xhdPCKDbAh/tMbl75GsCKjArEw/I5wFIeYVW4nzjA/XHQp7TxKsDx+0WEIxQ2wAcO2cuCFz/A4CDm9licNcDGtBtiHbs5wHoGXaS4TTjAbFP5QWQsuD/era7rU1o3wBKDDJQ51zTAFZbmKzXTN8DJH3gJwY04wADyZL+6Lj3AUkAxbHHkPsAIDXLNd+AiwJ8e4cfbYTvAYfI139CSN8A49w0ghAQOwC6ADoIpczXAxEDA7yA4QMChNRjYobcowFr7IezNkTHA7S9ynqPANMCd9IoXjBg7wOLmNaL24TPA4anyBgS0NcDWCO503F4nwNjB0lfCnPu/YPHMQOpZ/b++PLtTrjkwwPjSg1Z3kD7AUP8gRchQMsCZtgXC/hAwwJlJaCSHEBzAy0yjL87sPcDBp9RjzaMtwHxLFCQxBCNAchQkl+BpAsB/Qr8LB2MuwDNCpP4iizfABpaDj85/PkDWX3fIOSE9wK48LhtqRzjAX70SodsvOcC+2/mOfvhBwGJMpTrvDzDAK39eBbxjMcCwB+gEkcoCQEnRjZb8iSnAz5Dh2+j+EsAGyAlM39oHQH5cMAtzIzHAZ5wsm+mrIsCCkyzz8YM1wC5kNz067DnANOanmreYJcAkXi1Yz7IrwA9R6qUBfTTAQ0s43xlAPMBaeSwB/y01wE9I/c2VkzLAANgvIXagREAjaGCbqiguwOCbeVChYjvAr+Z9nW/MM8Cpd1pFUEYVwJkdq+B5LDTAhyAPgUT+HsATLFQzmOw1wGeQb5ytOCfAuci/rTgkMsAiDs6wl9A3wFs5pdAaRTrAvZR9mMg6JEBUVyucjKIywM0trS/48ANAx6o8AjDuOsDVRS6rQjIVwEG2mc3L7TPASSgSW+zsL8A=\",\"dtype\":\"float64\",\"shape\":[91]},\"y\":{\"__ndarray__\":\"SmsRgBg8IUC8oD0b4RAvwB35A0JM8CnAz1/S3gASSkBSJfXGvD0YQGwNRpWAkzZA2VwfrDmBHkA36UHxc0kYQLRMpdZFZSVA04e0tBALNMAtRmWoxfwiQCBtLdeqCiXAEpb3q3/yJcA+RjvKlI8ywAG+oHGJGzLArEUo+Dj2jj9zV2of099BQC3Meir/JzbA+cjwoBiKRsBO8tZT0QQzwFwIlUXLmirAn9wY8xrtLMDn9QASdnQqwKf2eC5KKzBA3VF8NTxo+T/3Yz1KISY7wODtM+cnMTtAs/VrBGJsMkAP7adwqZlAwKHby3spuQJAZjhzpKyvK0BVxCjuMI00wLdmpr6nPzXAJzwBBDRkN8DWDMhBSWUMwF9IK1kH9B3AZ6rQl1vXK0A4EeHgZhVCwLSyZkqlax/AXTLcEWMThT/Mu1RCqQwhwKi/3zZ4OSpAE56Gdw51TUAfwlPljvMtwG8hT9sUCjbA+n0D8p+aGUB/Y+YlKhgvwIu/wa5PsC/AfQqxRAXdIUATOz28XxYhwIvoWu+DLDHAwJEC14kYM0DV02Dp/lgWwOchg1GW6jPAp3XzbwprK8CZCgw0UtQqwAqj9yrVFSHAeGNTQPu9KsAm1tK2sLgswJCMXflGJCjAyOEk8BUAQEA2sLks9bAUQOh0mgDoEzfAAplW4YL0OcAU5j6QbBwswNEV4Ni+azLAMt49bRyOJsDuE8fUyZodQO04mT27ditASLpzHfjfJEBEyWe0uVIDQGMEyRFfGCfAbrwjHpwSI8BEqySOoR8WQGi1HsA8u0BALsA+Ef1tNUAyoOniUqJAwGzvlTruXhrAqoczEyab97+rm3lEManJv3nEyYZYuijAQAWdtYxSO0CpGUBQNz8lwA9hTiQy1zJAXVNzRuA1N8CPSam/Y5I2wIpPufEzrjDAUJXG7kayBsDUwfIv1iUpwOcFGffjryNAPRm6zdXaJkA=\",\"dtype\":\"float64\",\"shape\":[91]}},\"selected\":{\"id\":\"1055\",\"type\":\"Selection\"},\"selection_policy\":{\"id\":\"1054\",\"type\":\"UnionRenderers\"}},\"id\":\"1041\",\"type\":\"ColumnDataSource\"},{\"attributes\":{},\"id\":\"1053\",\"type\":\"Selection\"},{\"attributes\":{\"text\":\"PCA of Train data\"},\"id\":\"1003\",\"type\":\"Title\"},{\"attributes\":{},\"id\":\"1028\",\"type\":\"HelpTool\"},{\"attributes\":{\"callback\":null},\"id\":\"1007\",\"type\":\"DataRange1d\"},{\"attributes\":{},\"id\":\"1054\",\"type\":\"UnionRenderers\"},{\"attributes\":{},\"id\":\"1009\",\"type\":\"LinearScale\"},{\"attributes\":{},\"id\":\"1011\",\"type\":\"LinearScale\"},{\"attributes\":{},\"id\":\"1019\",\"type\":\"BasicTicker\"},{\"attributes\":{},\"id\":\"1023\",\"type\":\"PanTool\"},{\"attributes\":{\"axis_label\":\"PC1\",\"formatter\":{\"id\":\"1047\",\"type\":\"BasicTickFormatter\"},\"ticker\":{\"id\":\"1014\",\"type\":\"BasicTicker\"}},\"id\":\"1013\",\"type\":\"LinearAxis\"},{\"attributes\":{},\"id\":\"1024\",\"type\":\"WheelZoomTool\"},{\"attributes\":{},\"id\":\"1047\",\"type\":\"BasicTickFormatter\"},{\"attributes\":{\"active_drag\":\"auto\",\"active_inspect\":\"auto\",\"active_multi\":null,\"active_scroll\":\"auto\",\"active_tap\":\"auto\",\"tools\":[{\"id\":\"1023\",\"type\":\"PanTool\"},{\"id\":\"1024\",\"type\":\"WheelZoomTool\"},{\"id\":\"1025\",\"type\":\"BoxZoomTool\"},{\"id\":\"1026\",\"type\":\"SaveTool\"},{\"id\":\"1027\",\"type\":\"ResetTool\"},{\"id\":\"1028\",\"type\":\"HelpTool\"}]},\"id\":\"1029\",\"type\":\"Toolbar\"},{\"attributes\":{\"callback\":null},\"id\":\"1005\",\"type\":\"DataRange1d\"},{\"attributes\":{\"ticker\":{\"id\":\"1014\",\"type\":\"BasicTicker\"}},\"id\":\"1017\",\"type\":\"Grid\"},{\"attributes\":{\"axis_label\":\"PC2\",\"formatter\":{\"id\":\"1049\",\"type\":\"BasicTickFormatter\"},\"ticker\":{\"id\":\"1019\",\"type\":\"BasicTicker\"}},\"id\":\"1018\",\"type\":\"LinearAxis\"},{\"attributes\":{\"fill_alpha\":{\"value\":0.1},\"fill_color\":{\"value\":\"#1f77b4\"},\"line_alpha\":{\"value\":0.1},\"line_color\":{\"value\":\"#1f77b4\"},\"x\":{\"field\":\"x\"},\"y\":{\"field\":\"y\"}},\"id\":\"1043\",\"type\":\"Circle\"},{\"attributes\":{\"fill_color\":{\"value\":\"orange\"},\"line_color\":{\"value\":\"orange\"},\"x\":{\"field\":\"x\"},\"y\":{\"field\":\"y\"}},\"id\":\"1037\",\"type\":\"Circle\"},{\"attributes\":{},\"id\":\"1014\",\"type\":\"BasicTicker\"},{\"attributes\":{\"bottom_units\":\"screen\",\"fill_alpha\":{\"value\":0.5},\"fill_color\":{\"value\":\"lightgrey\"},\"left_units\":\"screen\",\"level\":\"overlay\",\"line_alpha\":{\"value\":1.0},\"line_color\":{\"value\":\"black\"},\"line_dash\":[4,4],\"line_width\":{\"value\":2},\"render_mode\":\"css\",\"right_units\":\"screen\",\"top_units\":\"screen\"},\"id\":\"1051\",\"type\":\"BoxAnnotation\"},{\"attributes\":{},\"id\":\"1049\",\"type\":\"BasicTickFormatter\"},{\"attributes\":{\"overlay\":{\"id\":\"1051\",\"type\":\"BoxAnnotation\"}},\"id\":\"1025\",\"type\":\"BoxZoomTool\"},{\"attributes\":{\"source\":{\"id\":\"1036\",\"type\":\"ColumnDataSource\"}},\"id\":\"1040\",\"type\":\"CDSView\"},{\"attributes\":{},\"id\":\"1026\",\"type\":\"SaveTool\"}],\"root_ids\":[\"1002\"]},\"title\":\"Bokeh Application\",\"version\":\"1.2.0\"}};\n",
+       "  var render_items = [{\"docid\":\"6b905e49-73ad-40ff-b792-d2bb188c3823\",\"roots\":{\"1002\":\"5a65f1ed-6894-4aaa-9dff-834b1a17a73f\"}}];\n",
+       "  root.Bokeh.embed.embed_items_notebook(docs_json, render_items);\n",
+       "\n",
+       "  }\n",
+       "  if (root.Bokeh !== undefined) {\n",
+       "    embed_document(root);\n",
+       "  } else {\n",
+       "    var attempts = 0;\n",
+       "    var timer = setInterval(function(root) {\n",
+       "      if (root.Bokeh !== undefined) {\n",
+       "        embed_document(root);\n",
+       "        clearInterval(timer);\n",
+       "      }\n",
+       "      attempts++;\n",
+       "      if (attempts > 100) {\n",
+       "        console.log(\"Bokeh: ERROR: Unable to run BokehJS code because BokehJS library is missing\");\n",
+       "        clearInterval(timer);\n",
+       "      }\n",
+       "    }, 10, root)\n",
+       "  }\n",
+       "})(window);"
+      ],
+      "application/vnd.bokehjs_exec.v0+json": ""
+     },
+     "metadata": {
+      "application/vnd.bokehjs_exec.v0+json": {
+       "id": "1002"
+      }
+     },
+     "output_type": "display_data"
+    }
+   ],
    "source": [
     "p = figure(plot_width=400, plot_height=400, title=\"PCA of Train data\")\n",
     "p.circle(z_tr[y_tr==0, 0], z_tr[y_tr==0, 1], line_color=\"orange\", fill_color=\"orange\")\n",
@@ -693,9 +2038,63 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 27,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "\n",
+       "\n",
+       "\n",
+       "\n",
+       "\n",
+       "  <div class=\"bk-root\" id=\"7b1983bd-dbe4-4e2f-b963-37ac8561cbac\" data-root-id=\"1116\"></div>\n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/javascript": [
+       "(function(root) {\n",
+       "  function embed_document(root) {\n",
+       "    \n",
+       "  var docs_json = {\"47a753d2-112e-47e1-894f-61297e2f2ad9\":{\"roots\":{\"references\":[{\"attributes\":{\"below\":[{\"id\":\"1127\",\"type\":\"LinearAxis\"}],\"center\":[{\"id\":\"1131\",\"type\":\"Grid\"},{\"id\":\"1136\",\"type\":\"Grid\"}],\"left\":[{\"id\":\"1132\",\"type\":\"LinearAxis\"}],\"plot_height\":400,\"plot_width\":400,\"renderers\":[{\"id\":\"1153\",\"type\":\"GlyphRenderer\"},{\"id\":\"1158\",\"type\":\"GlyphRenderer\"}],\"title\":{\"id\":\"1117\",\"type\":\"Title\"},\"toolbar\":{\"id\":\"1143\",\"type\":\"Toolbar\"},\"x_range\":{\"id\":\"1119\",\"type\":\"DataRange1d\"},\"x_scale\":{\"id\":\"1123\",\"type\":\"LinearScale\"},\"y_range\":{\"id\":\"1121\",\"type\":\"DataRange1d\"},\"y_scale\":{\"id\":\"1125\",\"type\":\"LinearScale\"}},\"id\":\"1116\",\"subtype\":\"Figure\",\"type\":\"Plot\"},{\"attributes\":{\"bottom_units\":\"screen\",\"fill_alpha\":{\"value\":0.5},\"fill_color\":{\"value\":\"lightgrey\"},\"left_units\":\"screen\",\"level\":\"overlay\",\"line_alpha\":{\"value\":1.0},\"line_color\":{\"value\":\"black\"},\"line_dash\":[4,4],\"line_width\":{\"value\":2},\"render_mode\":\"css\",\"right_units\":\"screen\",\"top_units\":\"screen\"},\"id\":\"1175\",\"type\":\"BoxAnnotation\"},{\"attributes\":{\"data_source\":{\"id\":\"1155\",\"type\":\"ColumnDataSource\"},\"glyph\":{\"id\":\"1156\",\"type\":\"Circle\"},\"hover_glyph\":null,\"muted_glyph\":null,\"nonselection_glyph\":{\"id\":\"1157\",\"type\":\"Circle\"},\"selection_glyph\":null,\"view\":{\"id\":\"1159\",\"type\":\"CDSView\"}},\"id\":\"1158\",\"type\":\"GlyphRenderer\"},{\"attributes\":{},\"id\":\"1173\",\"type\":\"BasicTickFormatter\"},{\"attributes\":{},\"id\":\"1177\",\"type\":\"Selection\"},{\"attributes\":{\"fill_color\":{\"value\":\"blue\"},\"line_color\":{\"value\":\"blue\"},\"x\":{\"field\":\"x\"},\"y\":{\"field\":\"y\"}},\"id\":\"1156\",\"type\":\"Circle\"},{\"attributes\":{},\"id\":\"1142\",\"type\":\"HelpTool\"},{\"attributes\":{\"source\":{\"id\":\"1150\",\"type\":\"ColumnDataSource\"}},\"id\":\"1154\",\"type\":\"CDSView\"},{\"attributes\":{\"active_drag\":\"auto\",\"active_inspect\":\"auto\",\"active_multi\":null,\"active_scroll\":\"auto\",\"active_tap\":\"auto\",\"tools\":[{\"id\":\"1137\",\"type\":\"PanTool\"},{\"id\":\"1138\",\"type\":\"WheelZoomTool\"},{\"id\":\"1139\",\"type\":\"BoxZoomTool\"},{\"id\":\"1140\",\"type\":\"SaveTool\"},{\"id\":\"1141\",\"type\":\"ResetTool\"},{\"id\":\"1142\",\"type\":\"HelpTool\"}]},\"id\":\"1143\",\"type\":\"Toolbar\"},{\"attributes\":{\"overlay\":{\"id\":\"1175\",\"type\":\"BoxAnnotation\"}},\"id\":\"1139\",\"type\":\"BoxZoomTool\"},{\"attributes\":{\"callback\":null},\"id\":\"1119\",\"type\":\"DataRange1d\"},{\"attributes\":{\"callback\":null,\"data\":{\"x\":{\"__ndarray__\":\"s4dscZTUFsCsUR4MMntMQEacKLQ+z0dAZjHJmj/mKMDavt88hvAvQAKqnplzfeQ/iRqKdbXJRUDB41gKeYQzQOEzq9HxPkdAof95CJrpUUCXVoj1RC9SQFBDbl6DGfW//wiVFHpZJEDanZ+ntBVJQPXwzwnJIlNAD4fQrjFbQkApN+KVnwFMQJZQEet7gUJAB8OIkj+NPkA6POlDoostwF6LPGBkvxtAcEMN5dbzFEBG8wS/hXFOQNvWE0osDk1AAx0Rr8ddOEBhAMw5xFA0QMbOy//t9VFAt/2PyC4xN0C918OCG0c4QNvLJkWY3EFAWuQZTmXUSUB6OcmTg1E5QP7aMVQ/kSHA3+DhUYTsDkBeqygeHqg1QBm4Scy720JAYGv9cKPgREC1MSH71K8tQLpNrTITFlRAHXxspr3+SEDgaqfxTofxv1Vkp2gpcSNAyZ7O8pNKUEDDp6vqvAxQQO/AHnRFIlNA\",\"dtype\":\"float64\",\"shape\":[45]},\"y\":{\"__ndarray__\":\"5vVobXIYNEAZI7kdXVw2wDxKDc6nBgHAt8NFMWd/NMAOK6V1J5EJwBzi+V5UxxZA8RXRZPOeIcBhsnMT6sE3wJBnXaFJpxhAUHAj30TDz7/JqmMKc+w9wH/oBR/sckJAKuQKfyZNL0B9ResFsvEoQF959I4Mn0PAoTzn6xhBNkB3LAS7Jfk1wDYPw4n7pChAyQEh+La0F8DxZ9i1DoIowBKmZti8ByRA6tnjzr/EQsAMobWnnulFQG7mTrGWVzTA+GZR60VCFkCl1E4NSO1BQO5R0RpiqwrArpcb6qPaTUBa9JxAoDQ8QEPAWzE+bjNAbTnJ9riZNUDio8BMqBFGQJz/aX26KUNALQ4ZgbopSUBeJV1jAgM1QHt55P1vjyxAdN2EhwMyRUBzj8fL9JlDQFODmsrFiBVA58tVN1cLQMAWcmzzh0k/QDQLsl0qHyhAZg3FVBo0OcDevGZhQ6Y2wHid6WtZ/hjA\",\"dtype\":\"float64\",\"shape\":[45]}},\"selected\":{\"id\":\"1177\",\"type\":\"Selection\"},\"selection_policy\":{\"id\":\"1176\",\"type\":\"UnionRenderers\"}},\"id\":\"1150\",\"type\":\"ColumnDataSource\"},{\"attributes\":{},\"id\":\"1137\",\"type\":\"PanTool\"},{\"attributes\":{\"text\":\"PCA of Train data\"},\"id\":\"1117\",\"type\":\"Title\"},{\"attributes\":{\"callback\":null,\"data\":{\"x\":{\"__ndarray__\":\"/Si/hE7yN8C7LbeVIKI7wMMdNO2D8inAgOipwzArOcDAL+3Xn/s1wDlQqZT/RC3A0lDAJ94uPcBEfI+FtRMowC2xhdPCKDbAh/tMbl75GsCKjArEw/I5wFIeYVW4nzjA/XHQp7TxKsDx+0WEIxQ2wAcO2cuCFz/A4CDm9licNcDGtBtiHbs5wHoGXaS4TTjAbFP5QWQsuD/era7rU1o3wBKDDJQ51zTAFZbmKzXTN8DJH3gJwY04wADyZL+6Lj3AUkAxbHHkPsAIDXLNd+AiwJ8e4cfbYTvAYfI139CSN8A49w0ghAQOwC6ADoIpczXAxEDA7yA4QMChNRjYobcowFr7IezNkTHA7S9ynqPANMCd9IoXjBg7wOLmNaL24TPA4anyBgS0NcDWCO503F4nwNjB0lfCnPu/YPHMQOpZ/b++PLtTrjkwwPjSg1Z3kD7AUP8gRchQMsCZtgXC/hAwwJlJaCSHEBzAy0yjL87sPcDBp9RjzaMtwHxLFCQxBCNAchQkl+BpAsB/Qr8LB2MuwDNCpP4iizfABpaDj85/PkDWX3fIOSE9wK48LhtqRzjAX70SodsvOcC+2/mOfvhBwGJMpTrvDzDAK39eBbxjMcCwB+gEkcoCQEnRjZb8iSnAz5Dh2+j+EsAGyAlM39oHQH5cMAtzIzHAZ5wsm+mrIsCCkyzz8YM1wC5kNz067DnANOanmreYJcAkXi1Yz7IrwA9R6qUBfTTAQ0s43xlAPMBaeSwB/y01wE9I/c2VkzLAANgvIXagREAjaGCbqiguwOCbeVChYjvAr+Z9nW/MM8Cpd1pFUEYVwJkdq+B5LDTAhyAPgUT+HsATLFQzmOw1wGeQb5ytOCfAuci/rTgkMsAiDs6wl9A3wFs5pdAaRTrAvZR9mMg6JEBUVyucjKIywM0trS/48ANAx6o8AjDuOsDVRS6rQjIVwEG2mc3L7TPASSgSW+zsL8A=\",\"dtype\":\"float64\",\"shape\":[91]},\"y\":{\"__ndarray__\":\"SmsRgBg8IUC8oD0b4RAvwB35A0JM8CnAz1/S3gASSkBSJfXGvD0YQGwNRpWAkzZA2VwfrDmBHkA36UHxc0kYQLRMpdZFZSVA04e0tBALNMAtRmWoxfwiQCBtLdeqCiXAEpb3q3/yJcA+RjvKlI8ywAG+oHGJGzLArEUo+Dj2jj9zV2of099BQC3Meir/JzbA+cjwoBiKRsBO8tZT0QQzwFwIlUXLmirAn9wY8xrtLMDn9QASdnQqwKf2eC5KKzBA3VF8NTxo+T/3Yz1KISY7wODtM+cnMTtAs/VrBGJsMkAP7adwqZlAwKHby3spuQJAZjhzpKyvK0BVxCjuMI00wLdmpr6nPzXAJzwBBDRkN8DWDMhBSWUMwF9IK1kH9B3AZ6rQl1vXK0A4EeHgZhVCwLSyZkqlax/AXTLcEWMThT/Mu1RCqQwhwKi/3zZ4OSpAE56Gdw51TUAfwlPljvMtwG8hT9sUCjbA+n0D8p+aGUB/Y+YlKhgvwIu/wa5PsC/AfQqxRAXdIUATOz28XxYhwIvoWu+DLDHAwJEC14kYM0DV02Dp/lgWwOchg1GW6jPAp3XzbwprK8CZCgw0UtQqwAqj9yrVFSHAeGNTQPu9KsAm1tK2sLgswJCMXflGJCjAyOEk8BUAQEA2sLks9bAUQOh0mgDoEzfAAplW4YL0OcAU5j6QbBwswNEV4Ni+azLAMt49bRyOJsDuE8fUyZodQO04mT27ditASLpzHfjfJEBEyWe0uVIDQGMEyRFfGCfAbrwjHpwSI8BEqySOoR8WQGi1HsA8u0BALsA+Ef1tNUAyoOniUqJAwGzvlTruXhrAqoczEyab97+rm3lEManJv3nEyYZYuijAQAWdtYxSO0CpGUBQNz8lwA9hTiQy1zJAXVNzRuA1N8CPSam/Y5I2wIpPufEzrjDAUJXG7kayBsDUwfIv1iUpwOcFGffjryNAPRm6zdXaJkA=\",\"dtype\":\"float64\",\"shape\":[91]}},\"selected\":{\"id\":\"1179\",\"type\":\"Selection\"},\"selection_policy\":{\"id\":\"1178\",\"type\":\"UnionRenderers\"}},\"id\":\"1155\",\"type\":\"ColumnDataSource\"},{\"attributes\":{\"fill_alpha\":{\"value\":0.1},\"fill_color\":{\"value\":\"#1f77b4\"},\"line_alpha\":{\"value\":0.1},\"line_color\":{\"value\":\"#1f77b4\"},\"x\":{\"field\":\"x\"},\"y\":{\"field\":\"y\"}},\"id\":\"1157\",\"type\":\"Circle\"},{\"attributes\":{\"fill_color\":{\"value\":\"orange\"},\"line_color\":{\"value\":\"orange\"},\"x\":{\"field\":\"x\"},\"y\":{\"field\":\"y\"}},\"id\":\"1151\",\"type\":\"Circle\"},{\"attributes\":{},\"id\":\"1178\",\"type\":\"UnionRenderers\"},{\"attributes\":{},\"id\":\"1133\",\"type\":\"BasicTicker\"},{\"attributes\":{\"ticker\":{\"id\":\"1128\",\"type\":\"BasicTicker\"}},\"id\":\"1131\",\"type\":\"Grid\"},{\"attributes\":{\"dimension\":1,\"ticker\":{\"id\":\"1133\",\"type\":\"BasicTicker\"}},\"id\":\"1136\",\"type\":\"Grid\"},{\"attributes\":{},\"id\":\"1140\",\"type\":\"SaveTool\"},{\"attributes\":{\"data_source\":{\"id\":\"1150\",\"type\":\"ColumnDataSource\"},\"glyph\":{\"id\":\"1151\",\"type\":\"Circle\"},\"hover_glyph\":null,\"muted_glyph\":null,\"nonselection_glyph\":{\"id\":\"1152\",\"type\":\"Circle\"},\"selection_glyph\":null,\"view\":{\"id\":\"1154\",\"type\":\"CDSView\"}},\"id\":\"1153\",\"type\":\"GlyphRenderer\"},{\"attributes\":{\"axis_label\":\"PC1 (8.19%)\",\"formatter\":{\"id\":\"1171\",\"type\":\"BasicTickFormatter\"},\"ticker\":{\"id\":\"1128\",\"type\":\"BasicTicker\"}},\"id\":\"1127\",\"type\":\"LinearAxis\"},{\"attributes\":{},\"id\":\"1171\",\"type\":\"BasicTickFormatter\"},{\"attributes\":{},\"id\":\"1179\",\"type\":\"Selection\"},{\"attributes\":{},\"id\":\"1176\",\"type\":\"UnionRenderers\"},{\"attributes\":{},\"id\":\"1125\",\"type\":\"LinearScale\"},{\"attributes\":{\"fill_alpha\":{\"value\":0.1},\"fill_color\":{\"value\":\"#1f77b4\"},\"line_alpha\":{\"value\":0.1},\"line_color\":{\"value\":\"#1f77b4\"},\"x\":{\"field\":\"x\"},\"y\":{\"field\":\"y\"}},\"id\":\"1152\",\"type\":\"Circle\"},{\"attributes\":{\"axis_label\":\"PC2 (4.34%)\",\"formatter\":{\"id\":\"1173\",\"type\":\"BasicTickFormatter\"},\"ticker\":{\"id\":\"1133\",\"type\":\"BasicTicker\"}},\"id\":\"1132\",\"type\":\"LinearAxis\"},{\"attributes\":{\"source\":{\"id\":\"1155\",\"type\":\"ColumnDataSource\"}},\"id\":\"1159\",\"type\":\"CDSView\"},{\"attributes\":{},\"id\":\"1123\",\"type\":\"LinearScale\"},{\"attributes\":{\"callback\":null},\"id\":\"1121\",\"type\":\"DataRange1d\"},{\"attributes\":{},\"id\":\"1138\",\"type\":\"WheelZoomTool\"},{\"attributes\":{},\"id\":\"1141\",\"type\":\"ResetTool\"},{\"attributes\":{},\"id\":\"1128\",\"type\":\"BasicTicker\"}],\"root_ids\":[\"1116\"]},\"title\":\"Bokeh Application\",\"version\":\"1.2.0\"}};\n",
+       "  var render_items = [{\"docid\":\"47a753d2-112e-47e1-894f-61297e2f2ad9\",\"roots\":{\"1116\":\"7b1983bd-dbe4-4e2f-b963-37ac8561cbac\"}}];\n",
+       "  root.Bokeh.embed.embed_items_notebook(docs_json, render_items);\n",
+       "\n",
+       "  }\n",
+       "  if (root.Bokeh !== undefined) {\n",
+       "    embed_document(root);\n",
+       "  } else {\n",
+       "    var attempts = 0;\n",
+       "    var timer = setInterval(function(root) {\n",
+       "      if (root.Bokeh !== undefined) {\n",
+       "        embed_document(root);\n",
+       "        clearInterval(timer);\n",
+       "      }\n",
+       "      attempts++;\n",
+       "      if (attempts > 100) {\n",
+       "        console.log(\"Bokeh: ERROR: Unable to run BokehJS code because BokehJS library is missing\");\n",
+       "        clearInterval(timer);\n",
+       "      }\n",
+       "    }, 10, root)\n",
+       "  }\n",
+       "})(window);"
+      ],
+      "application/vnd.bokehjs_exec.v0+json": ""
+     },
+     "metadata": {
+      "application/vnd.bokehjs_exec.v0+json": {
+       "id": "1116"
+      }
+     },
+     "output_type": "display_data"
+    }
+   ],
    "source": [
     "vars = pca.explained_variance_ratio_\n",
     "p = figure(plot_width=400, plot_height=400, title=\"PCA of Train data\")\n",
@@ -718,11 +2117,71 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 29,
    "metadata": {},
-   "outputs": [],
-   "source": [
-    "## exercise here"
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "\n",
+       "\n",
+       "\n",
+       "\n",
+       "\n",
+       "  <div class=\"bk-root\" id=\"5e85ed04-5d19-436d-9c6d-96802c0b59f3\" data-root-id=\"1374\"></div>\n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/javascript": [
+       "(function(root) {\n",
+       "  function embed_document(root) {\n",
+       "    \n",
+       "  var docs_json = {\"1b8f65fb-f686-4315-b452-a60ed4f3292c\":{\"roots\":{\"references\":[{\"attributes\":{\"below\":[{\"id\":\"1385\",\"type\":\"LinearAxis\"}],\"center\":[{\"id\":\"1389\",\"type\":\"Grid\"},{\"id\":\"1394\",\"type\":\"Grid\"}],\"left\":[{\"id\":\"1390\",\"type\":\"LinearAxis\"}],\"plot_height\":400,\"plot_width\":400,\"renderers\":[{\"id\":\"1411\",\"type\":\"GlyphRenderer\"},{\"id\":\"1416\",\"type\":\"GlyphRenderer\"}],\"title\":{\"id\":\"1375\",\"type\":\"Title\"},\"toolbar\":{\"id\":\"1401\",\"type\":\"Toolbar\"},\"x_range\":{\"id\":\"1377\",\"type\":\"DataRange1d\"},\"x_scale\":{\"id\":\"1381\",\"type\":\"LinearScale\"},\"y_range\":{\"id\":\"1379\",\"type\":\"DataRange1d\"},\"y_scale\":{\"id\":\"1383\",\"type\":\"LinearScale\"}},\"id\":\"1374\",\"subtype\":\"Figure\",\"type\":\"Plot\"},{\"attributes\":{\"bottom_units\":\"screen\",\"fill_alpha\":{\"value\":0.5},\"fill_color\":{\"value\":\"lightgrey\"},\"left_units\":\"screen\",\"level\":\"overlay\",\"line_alpha\":{\"value\":1.0},\"line_color\":{\"value\":\"black\"},\"line_dash\":[4,4],\"line_width\":{\"value\":2},\"render_mode\":\"css\",\"right_units\":\"screen\",\"top_units\":\"screen\"},\"id\":\"1453\",\"type\":\"BoxAnnotation\"},{\"attributes\":{\"data_source\":{\"id\":\"1408\",\"type\":\"ColumnDataSource\"},\"glyph\":{\"id\":\"1409\",\"type\":\"Circle\"},\"hover_glyph\":null,\"muted_glyph\":null,\"nonselection_glyph\":{\"id\":\"1410\",\"type\":\"Circle\"},\"selection_glyph\":null,\"view\":{\"id\":\"1412\",\"type\":\"CDSView\"}},\"id\":\"1411\",\"type\":\"GlyphRenderer\"},{\"attributes\":{\"callback\":null,\"data\":{\"x\":{\"__ndarray__\":\"r3w6U3SjJ0DUrHUEWwYkwA25NkMDryzAMQEQ/LV5db/HoqlpbWrwv0/upfqjHjzA9hSNE3jNMMC5cLjOP0MywICkgXg94TPAzsqB7l6zPMDI9ynKS6svwNLGRQH86jfArBBnOSVjN8CnR1n89OctwMZAbcLoeDDAJAOO5gVSOsB8TDRF2D4hwJiKdI/vKTvAt1G91TIWNMA9DUD/pgI3wLYCFzMfHjvAC0aeCywkP8CGGhQoAzY7wOrsaWWp/BzASrNkLoiIEUBewxoBJXgjwMmHrt8RdjnA0xVIqQGmKsBt5jHDBqkxwDIx/MPH6C7A2hnXGz4dOMAIG9wlcJMywHMhIlPG3TbARwBEURUFMcAtuQm/tSo6wPIkWF2PXxHAwB8iildEGsAhZqoc85oSwHbZ6qmVASPApEJkkntYL8DwssBNLkr+v9kMLbXiZAHApZynzZufMcAdnCozufoWwBZm+cvvIzTAGldZCJnVHsD5DDkxgN0wwJCw34QumgpAGSekI8YsGsB/gbtsL5MVwDOEjb6m/ifA1VN021gc8b/+cPYWFB41wHCof4xkvA9AvnONx7IU/j8QIkn5Uc0XwBMDeb+22TLAbbHQbAGINcCd56Gka4UkQEeSs6jExRfAQ2Rm6nX+E8Azgw3i/Jg+wFrDT4b7ETjAvsph6n3EJMB+oROCWDIwwLNafEw9gjXAPUH31mFQIkAQ1iZmRXQmwHOO1GGUqCbAyG/3OacCJkDgUz+tTB0wwCebB6XlQTvAun7vb8FrMECczwJk0jEaQEFGGtxlNiNAShaM6+6eM8Bhg0u8k84qwHRnjJlJKRjAl1L71tc9L8BCMq8KuwcCwP1Mr9ksGTbANrpzPNlxJ8Dw0JBWDTopwFaYeNfQ8QxAlC81+uCuB0AHplt3Wy8iwDoANum4NC7AH0HcvSHU/z/yJkWef38rwF8/axTE8zTA\",\"dtype\":\"float64\",\"shape\":[90]},\"y\":{\"__ndarray__\":\"SWUjdYr7NsDo+b7zK98wwO6/jNPo9UJAVbbT5VVOMsDBJ3kLJ64EwH1YKMgP8jNAPJduok2xDEDKR2L4hSc6wLcToAhvyBZAudflTzbWN0CP42WEsLi0Pzvx3mwRjj5A20zr8MvdEEB56rgQK6FDQGqcsHhGHzDAdris1N55A0AatDZQIz1FQHSi2D3csTpALdiP7TXs3j8Z5RGT4qwhQM/djMzvEDXAYMviTKahIcAe4ooutsgrQK1a+xXcrj3AivRXPwh2NMC9g/0KDRw+wFfiOgS64BzA3Bz24SMzJ0CRzGORhOodwN1dV9V3CTPAKQdGvuGmMsAnyI7fZ5UgwI9VAqkN3TbADmiV8x/oNMBVIWjjOGEiwLFmo0ej2jDAs5EGkKn0N8A3n4hOCYgVwK3DUpkIkhrAXpSrbihEJMBUWbTo67YqwGB6hNfTvUDA1TbBDIQAMMAvRHoBlxIpQK0e9jzx6yvAikue6Cyo9D9Uv3T05iIcQNPoYMbrnSXAH5TTVS9LMMCX8dsSI/86wIWH/f8eWu+/Crf93/fI1b/TrRYuFOIbwOYpZfkicDTA2aX/rCyLD0A80KAlgzkdwFzZjcvzjy7Au25trUo3OcB712aUvN4wwFVAiowmNDPA066usI4SNsC7TtZ+RxIwQEZ8eW94UwFAI99RIUhjOsD0dMdd9Kf1v1RcoV51DyPAfoCZnZRwPMDZXe8LoccjwL+6BpG1cSDAzGdDbRfYPcDJUT2syyUkwCbagVpxjA7A6zieVvCuMsAaUCdn4QveP7qtbaCun0FASHQ0rDaVGkCgXkqBonY8wNxkNa58QEDAvRHzX6VgJ8B711/mMonwP4fjmMMO7EFAYx4CNC8CKcCpQVToFFchwGIinsyiFRZAeQqSlEbLLsD84NE1CjgHQLEqgNLOPfa/Pzu88sHEO8DUT+l5sI4jQHrJaGgQqUBA\",\"dtype\":\"float64\",\"shape\":[90]}},\"selected\":{\"id\":\"1457\",\"type\":\"Selection\"},\"selection_policy\":{\"id\":\"1456\",\"type\":\"UnionRenderers\"}},\"id\":\"1413\",\"type\":\"ColumnDataSource\"},{\"attributes\":{},\"id\":\"1454\",\"type\":\"UnionRenderers\"},{\"attributes\":{\"fill_alpha\":{\"value\":0.1},\"fill_color\":{\"value\":\"#1f77b4\"},\"line_alpha\":{\"value\":0.1},\"line_color\":{\"value\":\"#1f77b4\"},\"x\":{\"field\":\"x\"},\"y\":{\"field\":\"y\"}},\"id\":\"1410\",\"type\":\"Circle\"},{\"attributes\":{\"source\":{\"id\":\"1408\",\"type\":\"ColumnDataSource\"}},\"id\":\"1412\",\"type\":\"CDSView\"},{\"attributes\":{},\"id\":\"1455\",\"type\":\"Selection\"},{\"attributes\":{},\"id\":\"1395\",\"type\":\"PanTool\"},{\"attributes\":{},\"id\":\"1386\",\"type\":\"BasicTicker\"},{\"attributes\":{},\"id\":\"1456\",\"type\":\"UnionRenderers\"},{\"attributes\":{},\"id\":\"1396\",\"type\":\"WheelZoomTool\"},{\"attributes\":{},\"id\":\"1381\",\"type\":\"LinearScale\"},{\"attributes\":{},\"id\":\"1457\",\"type\":\"Selection\"},{\"attributes\":{\"overlay\":{\"id\":\"1453\",\"type\":\"BoxAnnotation\"}},\"id\":\"1397\",\"type\":\"BoxZoomTool\"},{\"attributes\":{},\"id\":\"1391\",\"type\":\"BasicTicker\"},{\"attributes\":{},\"id\":\"1398\",\"type\":\"SaveTool\"},{\"attributes\":{},\"id\":\"1399\",\"type\":\"ResetTool\"},{\"attributes\":{\"axis_label\":\"PC2 (4.34%)\",\"formatter\":{\"id\":\"1451\",\"type\":\"BasicTickFormatter\"},\"ticker\":{\"id\":\"1391\",\"type\":\"BasicTicker\"}},\"id\":\"1390\",\"type\":\"LinearAxis\"},{\"attributes\":{},\"id\":\"1400\",\"type\":\"HelpTool\"},{\"attributes\":{\"dimension\":1,\"ticker\":{\"id\":\"1391\",\"type\":\"BasicTicker\"}},\"id\":\"1394\",\"type\":\"Grid\"},{\"attributes\":{\"active_drag\":\"auto\",\"active_inspect\":\"auto\",\"active_multi\":null,\"active_scroll\":\"auto\",\"active_tap\":\"auto\",\"tools\":[{\"id\":\"1395\",\"type\":\"PanTool\"},{\"id\":\"1396\",\"type\":\"WheelZoomTool\"},{\"id\":\"1397\",\"type\":\"BoxZoomTool\"},{\"id\":\"1398\",\"type\":\"SaveTool\"},{\"id\":\"1399\",\"type\":\"ResetTool\"},{\"id\":\"1400\",\"type\":\"HelpTool\"}]},\"id\":\"1401\",\"type\":\"Toolbar\"},{\"attributes\":{\"callback\":null},\"id\":\"1379\",\"type\":\"DataRange1d\"},{\"attributes\":{\"fill_color\":{\"value\":\"blue\"},\"line_color\":{\"value\":\"blue\"},\"x\":{\"field\":\"x\"},\"y\":{\"field\":\"y\"}},\"id\":\"1414\",\"type\":\"Circle\"},{\"attributes\":{\"fill_alpha\":{\"value\":0.1},\"fill_color\":{\"value\":\"#1f77b4\"},\"line_alpha\":{\"value\":0.1},\"line_color\":{\"value\":\"#1f77b4\"},\"x\":{\"field\":\"x\"},\"y\":{\"field\":\"y\"}},\"id\":\"1415\",\"type\":\"Circle\"},{\"attributes\":{\"data_source\":{\"id\":\"1413\",\"type\":\"ColumnDataSource\"},\"glyph\":{\"id\":\"1414\",\"type\":\"Circle\"},\"hover_glyph\":null,\"muted_glyph\":null,\"nonselection_glyph\":{\"id\":\"1415\",\"type\":\"Circle\"},\"selection_glyph\":null,\"view\":{\"id\":\"1417\",\"type\":\"CDSView\"}},\"id\":\"1416\",\"type\":\"GlyphRenderer\"},{\"attributes\":{\"callback\":null,\"data\":{\"x\":{\"__ndarray__\":\"Dq7fNG/ZJkBY5akTCONLQGb3T+26HENA2+F45a/OJ0A+JQtYuggxwMclG5/G+xdApYP2gR10I0A3HiM2XrNBQNiOs4ipuhdAMKfuYwmhPED949T2x+grQDvwNWw3Eh5Axip2+RdmOkBMaK26QqNMQLygwDyKsEVAoYGu1FToMcBt/YFUOrkrQEdMTS6sgzpAgO2H7Ye+TUAPRy+5QxVPQMIVm/y+ODlAmaTj5CPNTUAU77J6yXFKQFvxE58baUVAYpWdPdxUKkAGJqtYjfdBQE1u1lwi4EZAMnZzibN0SECqWw5yqmxKQN1ek4f7K0FAysHtN/+MOkAO0LlLVmg0QNQNQXxgUUFAPd/CO/QrVEC95q9DZpMrQHb8e3lxmATAHRhOYh5/PkCnr3vvifUyQCtme02yAzVATeUPDNgyQEC6639iDPEsQGogX/VFfUZAyDbFMA1qT0ARGOcQsxs2QFYOicPHYTpAT4m5K/zeQkA=\",\"dtype\":\"float64\",\"shape\":[46]},\"y\":{\"__ndarray__\":\"2hj+K4X0BkAj47n5AJ81wF1jdJKTKDvAAonUrxgY+T90GkH3vuk6QHdMAWPEpS5ABHLTkA6hL0DjG3VYvrQhQC87ukLW8RvAX2kywkuLMkAkV8lfgP4sQA/1wqSZXRtAAFDi38HRBsCZlfU106YEQORgO6PDxijAHohBcumMQEC30j2lqLAjwFixwupljR7Ar00pha0OIsCrBQh9al4uwG3cLA36tA1AuMlownL4OMC1keS665cwwJauUjRjpRpA4v2Egz3IAcDuyCxRfnMkwONMp66qwzDA0p6z+SVPMMD+VDyPhj0zwKb2HyVJ4R3A+bJ3Rma6EEAmodSlxMRBQEHzIkvPa01AKBCUmVGLEsB4JCFLCfQwwAjmxFWGqkZAXTW/3xqtHUBSaZkJWDUtQKg8OGl9bSlARORyNSRh0D+DmYhY9QkgQOYI4pwItTHA5OLVdfOlI8CBgsx3axhJQKcfJ+x0iytAc4Cv7TRIFMA=\",\"dtype\":\"float64\",\"shape\":[46]}},\"selected\":{\"id\":\"1455\",\"type\":\"Selection\"},\"selection_policy\":{\"id\":\"1454\",\"type\":\"UnionRenderers\"}},\"id\":\"1408\",\"type\":\"ColumnDataSource\"},{\"attributes\":{\"fill_color\":{\"value\":\"orange\"},\"line_color\":{\"value\":\"orange\"},\"x\":{\"field\":\"x\"},\"y\":{\"field\":\"y\"}},\"id\":\"1409\",\"type\":\"Circle\"},{\"attributes\":{\"source\":{\"id\":\"1413\",\"type\":\"ColumnDataSource\"}},\"id\":\"1417\",\"type\":\"CDSView\"},{\"attributes\":{\"callback\":null},\"id\":\"1377\",\"type\":\"DataRange1d\"},{\"attributes\":{\"text\":\"PCA of Test data\"},\"id\":\"1375\",\"type\":\"Title\"},{\"attributes\":{\"ticker\":{\"id\":\"1386\",\"type\":\"BasicTicker\"}},\"id\":\"1389\",\"type\":\"Grid\"},{\"attributes\":{},\"id\":\"1383\",\"type\":\"LinearScale\"},{\"attributes\":{},\"id\":\"1449\",\"type\":\"BasicTickFormatter\"},{\"attributes\":{},\"id\":\"1451\",\"type\":\"BasicTickFormatter\"},{\"attributes\":{\"axis_label\":\"PC1 (8.19%)\",\"formatter\":{\"id\":\"1449\",\"type\":\"BasicTickFormatter\"},\"ticker\":{\"id\":\"1386\",\"type\":\"BasicTicker\"}},\"id\":\"1385\",\"type\":\"LinearAxis\"}],\"root_ids\":[\"1374\"]},\"title\":\"Bokeh Application\",\"version\":\"1.2.0\"}};\n",
+       "  var render_items = [{\"docid\":\"1b8f65fb-f686-4315-b452-a60ed4f3292c\",\"roots\":{\"1374\":\"5e85ed04-5d19-436d-9c6d-96802c0b59f3\"}}];\n",
+       "  root.Bokeh.embed.embed_items_notebook(docs_json, render_items);\n",
+       "\n",
+       "  }\n",
+       "  if (root.Bokeh !== undefined) {\n",
+       "    embed_document(root);\n",
+       "  } else {\n",
+       "    var attempts = 0;\n",
+       "    var timer = setInterval(function(root) {\n",
+       "      if (root.Bokeh !== undefined) {\n",
+       "        embed_document(root);\n",
+       "        clearInterval(timer);\n",
+       "      }\n",
+       "      attempts++;\n",
+       "      if (attempts > 100) {\n",
+       "        console.log(\"Bokeh: ERROR: Unable to run BokehJS code because BokehJS library is missing\");\n",
+       "        clearInterval(timer);\n",
+       "      }\n",
+       "    }, 10, root)\n",
+       "  }\n",
+       "})(window);"
+      ],
+      "application/vnd.bokehjs_exec.v0+json": ""
+     },
+     "metadata": {
+      "application/vnd.bokehjs_exec.v0+json": {
+       "id": "1374"
+      }
+     },
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "z_ts = pca.transform(x_ts)\n",
+    "p = figure(plot_width=400, plot_height=400, title=\"PCA of Test data\")\n",
+    "p.circle(z_ts[y_ts==0, 0], z_ts[y_ts==0, 1], line_color=\"orange\", fill_color=\"orange\")\n",
+    "p.circle(z_ts[y_ts==1, 0], z_ts[y_ts==1, 1], line_color=\"blue\", fill_color=\"blue\")\n",
+    "p.xaxis.axis_label = \"PC1 (%.2f%%)\" % (100*vars[0])\n",
+    "p.yaxis.axis_label = \"PC2 (%.2f%%)\" % (100*vars[1])\n",
+    "show(p)"
    ]
   },
   {
@@ -761,7 +2220,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 30,
    "metadata": {
     "colab": {},
     "colab_type": "code",
@@ -774,21 +2233,34 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 31,
    "metadata": {
     "colab": {},
     "colab_type": "code",
     "id": "Cg8TpDATk3XI",
     "outputId": "e9658389-474c-4bf5-f196-11d4518311b7"
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',\n",
+       "                     metric_params=None, n_jobs=None, n_neighbors=10, p=2,\n",
+       "                     weights='uniform')"
+      ]
+     },
+     "execution_count": 31,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "knn.fit(x_tr, y_tr)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 32,
    "metadata": {
     "colab": {},
     "colab_type": "code",
@@ -845,13 +2317,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 33,
    "metadata": {
     "colab": {},
     "colab_type": "code",
     "id": "JISD2EVQ9Q9Z"
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[27, 19],\n",
+       "       [ 0, 90]])"
+      ]
+     },
+     "execution_count": 33,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "from sklearn.metrics import confusion_matrix\n",
     "conf = confusion_matrix(y_ts, y_pred_knn)\n",
@@ -870,13 +2354,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 34,
    "metadata": {
     "colab": {},
     "colab_type": "code",
     "id": "pZVN8GKKdOhy"
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "46"
+      ]
+     },
+     "execution_count": 34,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "np.sum(y_ts==0) # total number of \"class 0\" samples in the test set"
    ]
@@ -893,13 +2388,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 35,
    "metadata": {
     "colab": {},
     "colab_type": "code",
     "id": "1PVj7JbxdVk0"
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "90"
+      ]
+     },
+     "execution_count": 35,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "np.sum(y_ts==1) # total number of \"class 1\" samples in the test set"
    ]
@@ -928,13 +2434,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 36,
    "metadata": {
     "colab": {},
     "colab_type": "code",
     "id": "-1-40TyQeAIt"
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.8602941176470589"
+      ]
+     },
+     "execution_count": 36,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "(conf[0,0] + conf[1,1])/y_ts.shape[0] # y_ts.shape[0] is the sample size of the test set"
    ]
@@ -951,13 +2468,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 37,
    "metadata": {
     "colab": {},
     "colab_type": "code",
     "id": "q0emRGAvfWi4"
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.8602941176470589\n"
+     ]
+    }
+   ],
    "source": [
     "tp = conf[1,1]\n",
     "tn = conf[0,0]\n",
@@ -984,13 +2509,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 38,
    "metadata": {
     "colab": {},
     "colab_type": "code",
     "id": "a9JlR-LNe5ZI"
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1.0"
+      ]
+     },
+     "execution_count": 38,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "conf[1,1] / (conf[1,1] + conf[1,0])"
    ]
@@ -1021,13 +2557,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 39,
    "metadata": {
     "colab": {},
     "colab_type": "code",
     "id": "3KeLJcCbkSo6"
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.8602941176470589"
+      ]
+     },
+     "execution_count": 39,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "from sklearn.metrics import accuracy_score\n",
     "accuracy_score(y_ts, y_pred_knn)"
@@ -1045,13 +2592,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 40,
    "metadata": {
     "colab": {},
     "colab_type": "code",
     "id": "MgfhssjZmsg3"
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1.0"
+      ]
+     },
+     "execution_count": 40,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "from sklearn.metrics import recall_score\n",
     "recall_score(y_ts, y_pred_knn)"
@@ -1069,13 +2627,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 41,
    "metadata": {
     "colab": {},
     "colab_type": "code",
     "id": "AKiUXIkPm-N3"
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.8602941176470589\n",
+      "1.0\n"
+     ]
+    }
+   ],
    "source": [
     "from sklearn import metrics\n",
     "print(metrics.accuracy_score(y_ts, y_pred_knn))\n",
@@ -1094,14 +2661,30 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 42,
    "metadata": {
     "colab": {},
     "colab_type": "code",
     "id": "HXgvIJM2k3XQ",
     "outputId": "0d2d0773-a292-40cb-d8e7-df6b6ee29ff2"
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "           0       1.00      0.59      0.74        46\n",
+      "           1       0.83      1.00      0.90        90\n",
+      "\n",
+      "    accuracy                           0.86       136\n",
+      "   macro avg       0.91      0.79      0.82       136\n",
+      "weighted avg       0.88      0.86      0.85       136\n",
+      "\n"
+     ]
+    }
+   ],
    "source": [
     "from sklearn import metrics\n",
     "print(metrics.classification_report(y_ts, y_pred_knn))"
@@ -1159,14 +2742,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 43,
    "metadata": {
     "colab": {},
     "colab_type": "code",
     "id": "OuoRfictk3XW",
     "outputId": "9119acba-9d18-4076-eb3c-8346ba420579"
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.6961630553262051\n"
+     ]
+    }
+   ],
    "source": [
     "print(metrics.matthews_corrcoef(y_ts, y_pred_knn))"
    ]
@@ -1241,7 +2832,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 44,
    "metadata": {
     "colab": {},
     "colab_type": "code",
@@ -1291,13 +2882,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 45,
    "metadata": {
     "colab": {},
     "colab_type": "code",
     "id": "n12boA3k3Neo"
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Accuracy = 0.750\n",
+      "MCC = 0.538\n"
+     ]
+    }
+   ],
    "source": [
     "from sklearn import metrics\n",
     "knn = neighbors.KNeighborsClassifier(n_neighbors=10)\n",
@@ -1356,7 +2956,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 46,
    "metadata": {
     "colab": {},
     "colab_type": "code",
@@ -1390,13 +2990,70 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 48,
    "metadata": {
     "colab": {},
     "colab_type": "code",
     "id": "-uoahY6yNcIv"
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "### Fold 1 / 5 ###\n",
+      "TRAIN size: 108\n",
+      "-- class 0: 36 class 1: 72\n",
+      "TEST size: 28\n",
+      "-- class 0: 9 class 1: 19\n",
+      "\n",
+      "Model performance\n",
+      "Accuracy on TEST set: 0.893\n",
+      "MCC on TEST set: 0.750\n",
+      "\n",
+      "### Fold 2 / 5 ###\n",
+      "TRAIN size: 109\n",
+      "-- class 0: 36 class 1: 73\n",
+      "TEST size: 27\n",
+      "-- class 0: 9 class 1: 18\n",
+      "\n",
+      "Model performance\n",
+      "Accuracy on TEST set: 0.852\n",
+      "MCC on TEST set: 0.674\n",
+      "\n",
+      "### Fold 3 / 5 ###\n",
+      "TRAIN size: 109\n",
+      "-- class 0: 36 class 1: 73\n",
+      "TEST size: 27\n",
+      "-- class 0: 9 class 1: 18\n",
+      "\n",
+      "Model performance\n",
+      "Accuracy on TEST set: 0.852\n",
+      "MCC on TEST set: 0.674\n",
+      "\n",
+      "### Fold 4 / 5 ###\n",
+      "TRAIN size: 109\n",
+      "-- class 0: 36 class 1: 73\n",
+      "TEST size: 27\n",
+      "-- class 0: 9 class 1: 18\n",
+      "\n",
+      "Model performance\n",
+      "Accuracy on TEST set: 0.815\n",
+      "MCC on TEST set: 0.590\n",
+      "\n",
+      "### Fold 5 / 5 ###\n",
+      "TRAIN size: 109\n",
+      "-- class 0: 36 class 1: 73\n",
+      "TEST size: 27\n",
+      "-- class 0: 9 class 1: 18\n",
+      "\n",
+      "Model performance\n",
+      "Accuracy on TEST set: 0.926\n",
+      "MCC on TEST set: 0.837\n",
+      "\n"
+     ]
+    }
+   ],
    "source": [
     "## get the number of splitting operations\n",
     "N = skf.get_n_splits(x_tr, y_tr)\n",
@@ -1412,7 +3069,7 @@
     "## computing kNN accuracy & MCC on each test partition\n",
     "i = 1\n",
     "for (idx_tr, idx_ts) in skf.split(x_tr, y_tr):\n",
-    "    print(f\"### Fold {i+1} / {N:d} ###\")\n",
+    "    print(f\"### Fold {i} / {N:d} ###\")\n",
     "    X_train, Y_train = x_tr[idx_tr], y_tr[idx_tr]\n",
     "    X_test, Y_test = x_tr[idx_ts], y_tr[idx_ts]\n",
     "    print(\"TRAIN size:\", X_train.shape[0])\n",
@@ -1451,9 +3108,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 49,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Average cross-validation accuracy: 0.867\n",
+      "Average cross-validation MCC: 0.705\n"
+     ]
+    }
+   ],
    "source": [
     "## note: we need to convert the lists to numpy arrays before computing the means\n",
     "acc_avg = np.mean(np.array(acc_list))\n",
@@ -1472,13 +3138,30 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 50,
    "metadata": {
     "colab": {},
     "colab_type": "code",
     "id": "-uoahY6yNcIv"
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "### Iteration 1 ###\n",
+      "### Iteration 2 ###\n",
+      "### Iteration 3 ###\n",
+      "### Iteration 4 ###\n",
+      "### Iteration 5 ###\n",
+      "### Iteration 6 ###\n",
+      "### Iteration 7 ###\n",
+      "### Iteration 8 ###\n",
+      "### Iteration 9 ###\n",
+      "### Iteration 10 ###\n"
+     ]
+    }
+   ],
    "source": [
     "## how many repetitions?\n",
     "N_CV = 10\n",
@@ -1519,13 +3202,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 51,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Average cross-validation accuracy: 0.854\n",
+      "Average cross-validation MCC: 0.672\n"
+     ]
+    }
+   ],
    "source": [
     "## note: we need to convert the lists to numpy arrays before computing the means\n",
     "# acc_avg = ...\n",
     "# mcc_avg = ...\n",
+    "acc_avg = np.mean(np.array(acc_list))\n",
+    "mcc_avg = np.mean(np.array(mcc_list))\n",
     "\n",
     "print(f\"Average cross-validation accuracy: {acc_avg:.3f}\")\n",
     "print(f\"Average cross-validation MCC: {mcc_avg:.3f}\")"
@@ -1572,7 +3266,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.8"
+   "version": "3.7.3"
   }
  },
  "nbformat": 4,
diff --git a/chierici_practical_part2.ipynb b/chierici_practical_part2.ipynb
index a658b47..07f83ea 100644
--- a/chierici_practical_part2.ipynb
+++ b/chierici_practical_part2.ipynb
@@ -35,7 +35,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {
     "colab": {},
     "colab_type": "code",
@@ -56,9 +56,322 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "    <div class=\"bk-root\">\n",
+       "        <a href=\"https://bokeh.pydata.org\" target=\"_blank\" class=\"bk-logo bk-logo-small bk-logo-notebook\"></a>\n",
+       "        <span id=\"1001\">Loading BokehJS ...</span>\n",
+       "    </div>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/javascript": [
+       "\n",
+       "(function(root) {\n",
+       "  function now() {\n",
+       "    return new Date();\n",
+       "  }\n",
+       "\n",
+       "  var force = true;\n",
+       "\n",
+       "  if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n",
+       "    root._bokeh_onload_callbacks = [];\n",
+       "    root._bokeh_is_loading = undefined;\n",
+       "  }\n",
+       "\n",
+       "  var JS_MIME_TYPE = 'application/javascript';\n",
+       "  var HTML_MIME_TYPE = 'text/html';\n",
+       "  var EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n",
+       "  var CLASS_NAME = 'output_bokeh rendered_html';\n",
+       "\n",
+       "  /**\n",
+       "   * Render data to the DOM node\n",
+       "   */\n",
+       "  function render(props, node) {\n",
+       "    var script = document.createElement(\"script\");\n",
+       "    node.appendChild(script);\n",
+       "  }\n",
+       "\n",
+       "  /**\n",
+       "   * Handle when an output is cleared or removed\n",
+       "   */\n",
+       "  function handleClearOutput(event, handle) {\n",
+       "    var cell = handle.cell;\n",
+       "\n",
+       "    var id = cell.output_area._bokeh_element_id;\n",
+       "    var server_id = cell.output_area._bokeh_server_id;\n",
+       "    // Clean up Bokeh references\n",
+       "    if (id != null && id in Bokeh.index) {\n",
+       "      Bokeh.index[id].model.document.clear();\n",
+       "      delete Bokeh.index[id];\n",
+       "    }\n",
+       "\n",
+       "    if (server_id !== undefined) {\n",
+       "      // Clean up Bokeh references\n",
+       "      var cmd = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n",
+       "      cell.notebook.kernel.execute(cmd, {\n",
+       "        iopub: {\n",
+       "          output: function(msg) {\n",
+       "            var id = msg.content.text.trim();\n",
+       "            if (id in Bokeh.index) {\n",
+       "              Bokeh.index[id].model.document.clear();\n",
+       "              delete Bokeh.index[id];\n",
+       "            }\n",
+       "          }\n",
+       "        }\n",
+       "      });\n",
+       "      // Destroy server and session\n",
+       "      var cmd = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n",
+       "      cell.notebook.kernel.execute(cmd);\n",
+       "    }\n",
+       "  }\n",
+       "\n",
+       "  /**\n",
+       "   * Handle when a new output is added\n",
+       "   */\n",
+       "  function handleAddOutput(event, handle) {\n",
+       "    var output_area = handle.output_area;\n",
+       "    var output = handle.output;\n",
+       "\n",
+       "    // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n",
+       "    if ((output.output_type != \"display_data\") || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n",
+       "      return\n",
+       "    }\n",
+       "\n",
+       "    var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n",
+       "\n",
+       "    if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n",
+       "      toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n",
+       "      // store reference to embed id on output_area\n",
+       "      output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n",
+       "    }\n",
+       "    if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n",
+       "      var bk_div = document.createElement(\"div\");\n",
+       "      bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n",
+       "      var script_attrs = bk_div.children[0].attributes;\n",
+       "      for (var i = 0; i < script_attrs.length; i++) {\n",
+       "        toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n",
+       "      }\n",
+       "      // store reference to server id on output_area\n",
+       "      output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n",
+       "    }\n",
+       "  }\n",
+       "\n",
+       "  function register_renderer(events, OutputArea) {\n",
+       "\n",
+       "    function append_mime(data, metadata, element) {\n",
+       "      // create a DOM node to render to\n",
+       "      var toinsert = this.create_output_subarea(\n",
+       "        metadata,\n",
+       "        CLASS_NAME,\n",
+       "        EXEC_MIME_TYPE\n",
+       "      );\n",
+       "      this.keyboard_manager.register_events(toinsert);\n",
+       "      // Render to node\n",
+       "      var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n",
+       "      render(props, toinsert[toinsert.length - 1]);\n",
+       "      element.append(toinsert);\n",
+       "      return toinsert\n",
+       "    }\n",
+       "\n",
+       "    /* Handle when an output is cleared or removed */\n",
+       "    events.on('clear_output.CodeCell', handleClearOutput);\n",
+       "    events.on('delete.Cell', handleClearOutput);\n",
+       "\n",
+       "    /* Handle when a new output is added */\n",
+       "    events.on('output_added.OutputArea', handleAddOutput);\n",
+       "\n",
+       "    /**\n",
+       "     * Register the mime type and append_mime function with output_area\n",
+       "     */\n",
+       "    OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n",
+       "      /* Is output safe? */\n",
+       "      safe: true,\n",
+       "      /* Index of renderer in `output_area.display_order` */\n",
+       "      index: 0\n",
+       "    });\n",
+       "  }\n",
+       "\n",
+       "  // register the mime type if in Jupyter Notebook environment and previously unregistered\n",
+       "  if (root.Jupyter !== undefined) {\n",
+       "    var events = require('base/js/events');\n",
+       "    var OutputArea = require('notebook/js/outputarea').OutputArea;\n",
+       "\n",
+       "    if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n",
+       "      register_renderer(events, OutputArea);\n",
+       "    }\n",
+       "  }\n",
+       "\n",
+       "  \n",
+       "  if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n",
+       "    root._bokeh_timeout = Date.now() + 5000;\n",
+       "    root._bokeh_failed_load = false;\n",
+       "  }\n",
+       "\n",
+       "  var NB_LOAD_WARNING = {'data': {'text/html':\n",
+       "     \"<div style='background-color: #fdd'>\\n\"+\n",
+       "     \"<p>\\n\"+\n",
+       "     \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n",
+       "     \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n",
+       "     \"</p>\\n\"+\n",
+       "     \"<ul>\\n\"+\n",
+       "     \"<li>re-rerun `output_notebook()` to attempt to load from CDN again, or</li>\\n\"+\n",
+       "     \"<li>use INLINE resources instead, as so:</li>\\n\"+\n",
+       "     \"</ul>\\n\"+\n",
+       "     \"<code>\\n\"+\n",
+       "     \"from bokeh.resources import INLINE\\n\"+\n",
+       "     \"output_notebook(resources=INLINE)\\n\"+\n",
+       "     \"</code>\\n\"+\n",
+       "     \"</div>\"}};\n",
+       "\n",
+       "  function display_loaded() {\n",
+       "    var el = document.getElementById(\"1001\");\n",
+       "    if (el != null) {\n",
+       "      el.textContent = \"BokehJS is loading...\";\n",
+       "    }\n",
+       "    if (root.Bokeh !== undefined) {\n",
+       "      if (el != null) {\n",
+       "        el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n",
+       "      }\n",
+       "    } else if (Date.now() < root._bokeh_timeout) {\n",
+       "      setTimeout(display_loaded, 100)\n",
+       "    }\n",
+       "  }\n",
+       "\n",
+       "\n",
+       "  function run_callbacks() {\n",
+       "    try {\n",
+       "      root._bokeh_onload_callbacks.forEach(function(callback) {\n",
+       "        if (callback != null)\n",
+       "          callback();\n",
+       "      });\n",
+       "    } finally {\n",
+       "      delete root._bokeh_onload_callbacks\n",
+       "    }\n",
+       "    console.debug(\"Bokeh: all callbacks have finished\");\n",
+       "  }\n",
+       "\n",
+       "  function load_libs(css_urls, js_urls, callback) {\n",
+       "    if (css_urls == null) css_urls = [];\n",
+       "    if (js_urls == null) js_urls = [];\n",
+       "\n",
+       "    root._bokeh_onload_callbacks.push(callback);\n",
+       "    if (root._bokeh_is_loading > 0) {\n",
+       "      console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n",
+       "      return null;\n",
+       "    }\n",
+       "    if (js_urls == null || js_urls.length === 0) {\n",
+       "      run_callbacks();\n",
+       "      return null;\n",
+       "    }\n",
+       "    console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n",
+       "    root._bokeh_is_loading = css_urls.length + js_urls.length;\n",
+       "\n",
+       "    function on_load() {\n",
+       "      root._bokeh_is_loading--;\n",
+       "      if (root._bokeh_is_loading === 0) {\n",
+       "        console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n",
+       "        run_callbacks()\n",
+       "      }\n",
+       "    }\n",
+       "\n",
+       "    function on_error() {\n",
+       "      console.error(\"failed to load \" + url);\n",
+       "    }\n",
+       "\n",
+       "    for (var i = 0; i < css_urls.length; i++) {\n",
+       "      var url = css_urls[i];\n",
+       "      const element = document.createElement(\"link\");\n",
+       "      element.onload = on_load;\n",
+       "      element.onerror = on_error;\n",
+       "      element.rel = \"stylesheet\";\n",
+       "      element.type = \"text/css\";\n",
+       "      element.href = url;\n",
+       "      console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n",
+       "      document.body.appendChild(element);\n",
+       "    }\n",
+       "\n",
+       "    for (var i = 0; i < js_urls.length; i++) {\n",
+       "      var url = js_urls[i];\n",
+       "      var element = document.createElement('script');\n",
+       "      element.onload = on_load;\n",
+       "      element.onerror = on_error;\n",
+       "      element.async = false;\n",
+       "      element.src = url;\n",
+       "      console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n",
+       "      document.head.appendChild(element);\n",
+       "    }\n",
+       "  };var element = document.getElementById(\"1001\");\n",
+       "  if (element == null) {\n",
+       "    console.error(\"Bokeh: ERROR: autoload.js configured with elementid '1001' but no matching script tag was found. \")\n",
+       "    return false;\n",
+       "  }\n",
+       "\n",
+       "  function inject_raw_css(css) {\n",
+       "    const element = document.createElement(\"style\");\n",
+       "    element.appendChild(document.createTextNode(css));\n",
+       "    document.body.appendChild(element);\n",
+       "  }\n",
+       "\n",
+       "  var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.2.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.2.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.2.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.2.0.min.js\"];\n",
+       "  var css_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.2.0.min.css\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.2.0.min.css\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.2.0.min.css\"];\n",
+       "\n",
+       "  var inline_js = [\n",
+       "    function(Bokeh) {\n",
+       "      Bokeh.set_log_level(\"info\");\n",
+       "    },\n",
+       "    \n",
+       "    function(Bokeh) {\n",
+       "      \n",
+       "    },\n",
+       "    function(Bokeh) {} // ensure no trailing comma for IE\n",
+       "  ];\n",
+       "\n",
+       "  function run_inline_js() {\n",
+       "    \n",
+       "    if ((root.Bokeh !== undefined) || (force === true)) {\n",
+       "      for (var i = 0; i < inline_js.length; i++) {\n",
+       "        inline_js[i].call(root, root.Bokeh);\n",
+       "      }if (force === true) {\n",
+       "        display_loaded();\n",
+       "      }} else if (Date.now() < root._bokeh_timeout) {\n",
+       "      setTimeout(run_inline_js, 100);\n",
+       "    } else if (!root._bokeh_failed_load) {\n",
+       "      console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n",
+       "      root._bokeh_failed_load = true;\n",
+       "    } else if (force !== true) {\n",
+       "      var cell = $(document.getElementById(\"1001\")).parents('.cell').data().cell;\n",
+       "      cell.output_area.append_execute_result(NB_LOAD_WARNING)\n",
+       "    }\n",
+       "\n",
+       "  }\n",
+       "\n",
+       "  if (root._bokeh_is_loading === 0) {\n",
+       "    console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n",
+       "    run_inline_js();\n",
+       "  } else {\n",
+       "    load_libs(css_urls, js_urls, function() {\n",
+       "      console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n",
+       "      run_inline_js();\n",
+       "    });\n",
+       "  }\n",
+       "}(window));"
+      ],
+      "application/vnd.bokehjs_load.v0+json": "\n(function(root) {\n  function now() {\n    return new Date();\n  }\n\n  var force = true;\n\n  if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n    root._bokeh_onload_callbacks = [];\n    root._bokeh_is_loading = undefined;\n  }\n\n  \n\n  \n  if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n    root._bokeh_timeout = Date.now() + 5000;\n    root._bokeh_failed_load = false;\n  }\n\n  var NB_LOAD_WARNING = {'data': {'text/html':\n     \"<div style='background-color: #fdd'>\\n\"+\n     \"<p>\\n\"+\n     \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n     \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n     \"</p>\\n\"+\n     \"<ul>\\n\"+\n     \"<li>re-rerun `output_notebook()` to attempt to load from CDN again, or</li>\\n\"+\n     \"<li>use INLINE resources instead, as so:</li>\\n\"+\n     \"</ul>\\n\"+\n     \"<code>\\n\"+\n     \"from bokeh.resources import INLINE\\n\"+\n     \"output_notebook(resources=INLINE)\\n\"+\n     \"</code>\\n\"+\n     \"</div>\"}};\n\n  function display_loaded() {\n    var el = document.getElementById(\"1001\");\n    if (el != null) {\n      el.textContent = \"BokehJS is loading...\";\n    }\n    if (root.Bokeh !== undefined) {\n      if (el != null) {\n        el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n      }\n    } else if (Date.now() < root._bokeh_timeout) {\n      setTimeout(display_loaded, 100)\n    }\n  }\n\n\n  function run_callbacks() {\n    try {\n      root._bokeh_onload_callbacks.forEach(function(callback) {\n        if (callback != null)\n          callback();\n      });\n    } finally {\n      delete root._bokeh_onload_callbacks\n    }\n    console.debug(\"Bokeh: all callbacks have finished\");\n  }\n\n  function load_libs(css_urls, js_urls, callback) {\n    if (css_urls == null) css_urls = [];\n    if (js_urls == null) js_urls = [];\n\n    root._bokeh_onload_callbacks.push(callback);\n    if (root._bokeh_is_loading > 0) {\n      console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n      return null;\n    }\n    if (js_urls == null || js_urls.length === 0) {\n      run_callbacks();\n      return null;\n    }\n    console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n    root._bokeh_is_loading = css_urls.length + js_urls.length;\n\n    function on_load() {\n      root._bokeh_is_loading--;\n      if (root._bokeh_is_loading === 0) {\n        console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n        run_callbacks()\n      }\n    }\n\n    function on_error() {\n      console.error(\"failed to load \" + url);\n    }\n\n    for (var i = 0; i < css_urls.length; i++) {\n      var url = css_urls[i];\n      const element = document.createElement(\"link\");\n      element.onload = on_load;\n      element.onerror = on_error;\n      element.rel = \"stylesheet\";\n      element.type = \"text/css\";\n      element.href = url;\n      console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n      document.body.appendChild(element);\n    }\n\n    for (var i = 0; i < js_urls.length; i++) {\n      var url = js_urls[i];\n      var element = document.createElement('script');\n      element.onload = on_load;\n      element.onerror = on_error;\n      element.async = false;\n      element.src = url;\n      console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n      document.head.appendChild(element);\n    }\n  };var element = document.getElementById(\"1001\");\n  if (element == null) {\n    console.error(\"Bokeh: ERROR: autoload.js configured with elementid '1001' but no matching script tag was found. \")\n    return false;\n  }\n\n  function inject_raw_css(css) {\n    const element = document.createElement(\"style\");\n    element.appendChild(document.createTextNode(css));\n    document.body.appendChild(element);\n  }\n\n  var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.2.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.2.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.2.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.2.0.min.js\"];\n  var css_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.2.0.min.css\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.2.0.min.css\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.2.0.min.css\"];\n\n  var inline_js = [\n    function(Bokeh) {\n      Bokeh.set_log_level(\"info\");\n    },\n    \n    function(Bokeh) {\n      \n    },\n    function(Bokeh) {} // ensure no trailing comma for IE\n  ];\n\n  function run_inline_js() {\n    \n    if ((root.Bokeh !== undefined) || (force === true)) {\n      for (var i = 0; i < inline_js.length; i++) {\n        inline_js[i].call(root, root.Bokeh);\n      }if (force === true) {\n        display_loaded();\n      }} else if (Date.now() < root._bokeh_timeout) {\n      setTimeout(run_inline_js, 100);\n    } else if (!root._bokeh_failed_load) {\n      console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n      root._bokeh_failed_load = true;\n    } else if (force !== true) {\n      var cell = $(document.getElementById(\"1001\")).parents('.cell').data().cell;\n      cell.output_area.append_execute_result(NB_LOAD_WARNING)\n    }\n\n  }\n\n  if (root._bokeh_is_loading === 0) {\n    console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n    run_inline_js();\n  } else {\n    load_libs(css_urls, js_urls, function() {\n      console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n      run_inline_js();\n    });\n  }\n}(window));"
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
    "source": [
     "output_notebook()"
    ]
@@ -75,7 +388,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -85,7 +398,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {
     "colab": {},
     "colab_type": "code",
@@ -111,7 +424,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "metadata": {
     "colab": {},
     "colab_type": "code",
@@ -145,7 +458,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "metadata": {
     "colab": {},
     "colab_type": "code",
@@ -169,7 +482,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "metadata": {
     "colab": {},
     "colab_type": "code",
@@ -193,7 +506,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 9,
    "metadata": {
     "colab": {},
     "colab_type": "code",
@@ -206,7 +519,8 @@
     "class_lab_tr = labs_tr[['CLASS']]\n",
     "class_lab_ts = labs_ts[['CLASS']]\n",
     "y_tr = class_lab_tr.values.ravel()\n",
-    "y_ts = class_lab_ts.values.ravel()"
+    "y_ts = class_lab_ts.values.ravel()\n",
+    "pd.Series.ravel?"
    ]
   },
   {
@@ -231,7 +545,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -246,7 +560,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 11,
    "metadata": {
     "colab": {},
     "colab_type": "code",
@@ -281,14 +595,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 12,
    "metadata": {
     "colab": {},
     "colab_type": "code",
     "id": "Qqc3TmFBLKKn",
     "outputId": "d9ef6c64-9f18-4bea-9167-decaa0ca1820"
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[LibSVM]"
+     ]
+    }
+   ],
    "source": [
     "## fit the model and get the predictions\n",
     "svc.fit(x_tr, y_tr)\n",
@@ -307,14 +629,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 13,
    "metadata": {
     "colab": {},
     "colab_type": "code",
     "id": "Ku0JSF_ALKKs",
     "outputId": "94585c0e-534a-445d-d0ba-92a9bf3a9388"
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "MCC =  0.8857501367027195\n",
+      "ACC =  0.9485294117647058\n",
+      "SENS =  0.9555555555555556\n"
+     ]
+    }
+   ],
    "source": [
     "from sklearn import metrics\n",
     "print('MCC = ', metrics.matthews_corrcoef(class_lab_ts, class_pred_ts))\n",
@@ -334,14 +666,30 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 14,
    "metadata": {
     "colab": {},
     "colab_type": "code",
     "id": "whSZnHGALKKx",
     "outputId": "2c471734-3504-4af7-8ebb-74e5a02be301"
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "           0       0.91      0.93      0.92        46\n",
+      "           1       0.97      0.96      0.96        90\n",
+      "\n",
+      "    accuracy                           0.95       136\n",
+      "   macro avg       0.94      0.95      0.94       136\n",
+      "weighted avg       0.95      0.95      0.95       136\n",
+      "\n"
+     ]
+    }
+   ],
    "source": [
     "print(metrics.classification_report(class_lab_ts, class_pred_ts))"
    ]
@@ -358,15 +706,33 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 18,
    "metadata": {
     "colab": {},
     "colab_type": "code",
     "id": "ZT6XjB20LKK0"
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "MCC =  0.8704408378703687\n",
+      "ACC =  0.9411764705882353\n",
+      "SENS =  0.9444444444444444\n"
+     ]
+    }
+   ],
    "source": [
-    "## space for exercise\n"
+    "## space for exercise\n",
+    "from sklearn.ensemble import RandomForestClassifier\n",
+    "clf = RandomForestClassifier(n_estimators = 500)\n",
+    "clf.fit(x_tr,y_tr)\n",
+    "y_pred = clf.predict(x_ts)\n",
+    "\n",
+    "print('MCC = ', metrics.matthews_corrcoef(class_lab_ts, y_pred))\n",
+    "print('ACC = ', metrics.accuracy_score(class_lab_ts, y_pred))\n",
+    "print('SENS = ', metrics.recall_score(class_lab_ts, y_pred))"
    ]
   },
   {
@@ -393,7 +759,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 19,
    "metadata": {
     "colab": {},
     "colab_type": "code",
@@ -401,7 +767,58 @@
     "outputId": "099e6404-c7fd-414a-b49a-4092af095c57",
     "scrolled": true
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "C =  1e-06\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/karencopeland/anaconda3/lib/python3.7/site-packages/sklearn/metrics/classification.py:872: RuntimeWarning: invalid value encountered in double_scalars\n",
+      "  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "MCC =  0.0\n",
+      "ACC =  0.6617647058823529\n",
+      "SENS =  1.0 \n",
+      "\n",
+      "C =  1e-05\n",
+      "MCC =  0.6310547428675068\n",
+      "ACC =  0.8308823529411765\n",
+      "SENS =  1.0 \n",
+      "\n",
+      "C =  0.0001\n",
+      "MCC =  0.9014492753623189\n",
+      "ACC =  0.9558823529411765\n",
+      "SENS =  0.9666666666666667 \n",
+      "\n",
+      "C =  0.001\n",
+      "MCC =  0.8857501367027195\n",
+      "ACC =  0.9485294117647058\n",
+      "SENS =  0.9555555555555556 \n",
+      "\n",
+      "C =  0.01\n",
+      "MCC =  0.8857501367027195\n",
+      "ACC =  0.9485294117647058\n",
+      "SENS =  0.9555555555555556 \n",
+      "\n",
+      "C =  0.1\n",
+      "MCC =  0.8857501367027195\n",
+      "ACC =  0.9485294117647058\n",
+      "SENS =  0.9555555555555556 \n",
+      "\n"
+     ]
+    }
+   ],
    "source": [
     "## define the sequence of C values we want to use in the search of the best one\n",
     "C_list = [0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1]\n",
@@ -437,15 +854,209 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 22,
    "metadata": {
     "colab": {},
     "colab_type": "code",
     "id": "BPtC-EBSLKK_"
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Gamma =  0.001  C =  1e-06\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/karencopeland/anaconda3/lib/python3.7/site-packages/sklearn/metrics/classification.py:872: RuntimeWarning: invalid value encountered in double_scalars\n",
+      "  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "MCC =  0.0\n",
+      "ACC =  0.6617647058823529\n",
+      "SENS =  1.0 \n",
+      "\n",
+      "Gamma =  0.01  C =  1e-06\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/karencopeland/anaconda3/lib/python3.7/site-packages/sklearn/metrics/classification.py:872: RuntimeWarning: invalid value encountered in double_scalars\n",
+      "  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "MCC =  0.0\n",
+      "ACC =  0.6617647058823529\n",
+      "SENS =  1.0 \n",
+      "\n",
+      "Gamma =  0.1  C =  1e-06\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/karencopeland/anaconda3/lib/python3.7/site-packages/sklearn/metrics/classification.py:872: RuntimeWarning: invalid value encountered in double_scalars\n",
+      "  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "MCC =  0.0\n",
+      "ACC =  0.6617647058823529\n",
+      "SENS =  1.0 \n",
+      "\n",
+      "Gamma =  1  C =  1e-06\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/karencopeland/anaconda3/lib/python3.7/site-packages/sklearn/metrics/classification.py:872: RuntimeWarning: invalid value encountered in double_scalars\n",
+      "  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "MCC =  0.0\n",
+      "ACC =  0.6617647058823529\n",
+      "SENS =  1.0 \n",
+      "\n",
+      "Gamma =  0.001  C =  1e-05\n",
+      "MCC =  0.6310547428675068\n",
+      "ACC =  0.8308823529411765\n",
+      "SENS =  1.0 \n",
+      "\n",
+      "Gamma =  0.01  C =  1e-05\n",
+      "MCC =  0.6310547428675068\n",
+      "ACC =  0.8308823529411765\n",
+      "SENS =  1.0 \n",
+      "\n",
+      "Gamma =  0.1  C =  1e-05\n",
+      "MCC =  0.6310547428675068\n",
+      "ACC =  0.8308823529411765\n",
+      "SENS =  1.0 \n",
+      "\n",
+      "Gamma =  1  C =  1e-05\n",
+      "MCC =  0.6310547428675068\n",
+      "ACC =  0.8308823529411765\n",
+      "SENS =  1.0 \n",
+      "\n",
+      "Gamma =  0.001  C =  0.0001\n",
+      "MCC =  0.9014492753623189\n",
+      "ACC =  0.9558823529411765\n",
+      "SENS =  0.9666666666666667 \n",
+      "\n",
+      "Gamma =  0.01  C =  0.0001\n",
+      "MCC =  0.9014492753623189\n",
+      "ACC =  0.9558823529411765\n",
+      "SENS =  0.9666666666666667 \n",
+      "\n",
+      "Gamma =  0.1  C =  0.0001\n",
+      "MCC =  0.9014492753623189\n",
+      "ACC =  0.9558823529411765\n",
+      "SENS =  0.9666666666666667 \n",
+      "\n",
+      "Gamma =  1  C =  0.0001\n",
+      "MCC =  0.9014492753623189\n",
+      "ACC =  0.9558823529411765\n",
+      "SENS =  0.9666666666666667 \n",
+      "\n",
+      "Gamma =  0.001  C =  0.001\n",
+      "MCC =  0.8857501367027195\n",
+      "ACC =  0.9485294117647058\n",
+      "SENS =  0.9555555555555556 \n",
+      "\n",
+      "Gamma =  0.01  C =  0.001\n",
+      "MCC =  0.8857501367027195\n",
+      "ACC =  0.9485294117647058\n",
+      "SENS =  0.9555555555555556 \n",
+      "\n",
+      "Gamma =  0.1  C =  0.001\n",
+      "MCC =  0.8857501367027195\n",
+      "ACC =  0.9485294117647058\n",
+      "SENS =  0.9555555555555556 \n",
+      "\n",
+      "Gamma =  1  C =  0.001\n",
+      "MCC =  0.8857501367027195\n",
+      "ACC =  0.9485294117647058\n",
+      "SENS =  0.9555555555555556 \n",
+      "\n",
+      "Gamma =  0.001  C =  0.01\n",
+      "MCC =  0.8857501367027195\n",
+      "ACC =  0.9485294117647058\n",
+      "SENS =  0.9555555555555556 \n",
+      "\n",
+      "Gamma =  0.01  C =  0.01\n",
+      "MCC =  0.8857501367027195\n",
+      "ACC =  0.9485294117647058\n",
+      "SENS =  0.9555555555555556 \n",
+      "\n",
+      "Gamma =  0.1  C =  0.01\n",
+      "MCC =  0.8857501367027195\n",
+      "ACC =  0.9485294117647058\n",
+      "SENS =  0.9555555555555556 \n",
+      "\n",
+      "Gamma =  1  C =  0.01\n",
+      "MCC =  0.8857501367027195\n",
+      "ACC =  0.9485294117647058\n",
+      "SENS =  0.9555555555555556 \n",
+      "\n",
+      "Gamma =  0.001  C =  0.1\n",
+      "MCC =  0.8857501367027195\n",
+      "ACC =  0.9485294117647058\n",
+      "SENS =  0.9555555555555556 \n",
+      "\n",
+      "Gamma =  0.01  C =  0.1\n",
+      "MCC =  0.8857501367027195\n",
+      "ACC =  0.9485294117647058\n",
+      "SENS =  0.9555555555555556 \n",
+      "\n",
+      "Gamma =  0.1  C =  0.1\n",
+      "MCC =  0.8857501367027195\n",
+      "ACC =  0.9485294117647058\n",
+      "SENS =  0.9555555555555556 \n",
+      "\n",
+      "Gamma =  1  C =  0.1\n",
+      "MCC =  0.8857501367027195\n",
+      "ACC =  0.9485294117647058\n",
+      "SENS =  0.9555555555555556 \n",
+      "\n"
+     ]
+    }
+   ],
    "source": [
-    "## space for exercise"
+    "## space for exercise\n",
+    "C_list = [0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1]\n",
+    "gamma = [0.001, 0.01, 0.1, 1]\n",
+    "for C in C_list:\n",
+    "    for G in gamma:\n",
+    "        print('Gamma = ', G, ' C = ', C)\n",
+    "        svc = svm.SVC(kernel = 'linear', C=C, gamma=G)\n",
+    "        svc.fit(x_tr, class_lab_tr.values.ravel())\n",
+    "        class_pred_ts = svc.predict(x_ts)\n",
+    "        print('MCC = ', metrics.matthews_corrcoef(class_lab_ts, class_pred_ts))\n",
+    "        print('ACC = ', metrics.accuracy_score(class_lab_ts, class_pred_ts))\n",
+    "        print('SENS = ', metrics.recall_score(class_lab_ts, class_pred_ts), \"\\n\")"
    ]
   },
   {
@@ -460,14 +1071,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 23,
    "metadata": {
     "colab": {},
     "colab_type": "code",
     "id": "utM1ALBfLKLC",
     "outputId": "d96dc041-2f6f-4f1a-bca5-70310d1f79ee"
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'C': 0.001, 'gamma': 0.001}"
+      ]
+     },
+     "execution_count": 23,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "from sklearn.model_selection import GridSearchCV\n",
     "\n",
@@ -505,14 +1127,31 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 24,
    "metadata": {
     "colab": {},
     "colab_type": "code",
     "id": "2lZAaTXJLKLH",
     "outputId": "2155231c-e50c-4c06-82c4-6b6a5f7c4ee2"
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n",
+       "                       max_depth=None, max_features='auto', max_leaf_nodes=None,\n",
+       "                       min_impurity_decrease=0.0, min_impurity_split=None,\n",
+       "                       min_samples_leaf=1, min_samples_split=2,\n",
+       "                       min_weight_fraction_leaf=0.0, n_estimators=250,\n",
+       "                       n_jobs=None, oob_score=False, random_state=None,\n",
+       "                       verbose=0, warm_start=False)"
+      ]
+     },
+     "execution_count": 24,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "# Build a forest and compute the feature importances\n",
     "rf = RandomForestClassifier(n_estimators=250)\n",
@@ -531,14 +1170,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 25,
    "metadata": {
     "colab": {},
     "colab_type": "code",
     "id": "rspvHmO0LKLK",
     "outputId": "7b131d8f-ebc8-4d03-9f38-ad90de735367"
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "MCC =  0.9184310967112843\n",
+      "ACC =  0.9632352941176471\n",
+      "SENS =  0.9666666666666667\n"
+     ]
+    }
+   ],
    "source": [
     "class_pred_ts = rf.predict(x_ts)\n",
     "print('MCC = ', metrics.matthews_corrcoef(class_lab_ts, class_pred_ts))\n",
@@ -558,14 +1207,32 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 26,
    "metadata": {
     "colab": {},
     "colab_type": "code",
     "id": "7g9k5EHsLKLU",
     "outputId": "aa26094b-0e4a-48f0-be91-ecd2874ab204"
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Feature ranking (top 10 features):\n",
+      "1. feature 12762 (0.016504)\n",
+      "2. feature 3426 (0.010583)\n",
+      "3. feature 5149 (0.008672)\n",
+      "4. feature 2901 (0.008246)\n",
+      "5. feature 450 (0.008059)\n",
+      "6. feature 31676 (0.007249)\n",
+      "7. feature 8119 (0.006618)\n",
+      "8. feature 18004 (0.006257)\n",
+      "9. feature 5719 (0.006225)\n",
+      "10. feature 12773 (0.006135)\n"
+     ]
+    }
+   ],
    "source": [
     "importances = rf.feature_importances_\n",
     "indices = np.argsort(importances)[::-1]\n",
@@ -588,14 +1255,31 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 27,
    "metadata": {
     "colab": {},
     "colab_type": "code",
     "id": "2fSkitN7LKLY",
     "outputId": "73191a71-9657-4582-ede6-7fb14cd3fc05"
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "ABCG4.Gene_AceView\n",
+      "RRM1.Gene_AceView\n",
+      "FLII.Gene_AceView\n",
+      "LRBA.Gene_AceView\n",
+      "CHD5.Gene_RefSeq\n",
+      "luspuby.Gene_AceView\n",
+      "AURKA.Gene_AceView\n",
+      "FANCM.Gene_AceView\n",
+      "LOC100287397.Gene_RefSeq\n",
+      "ACN9.Gene_AceView\n"
+     ]
+    }
+   ],
    "source": [
     "columnsNamesArr = data_tr.columns.values\n",
     "for i in range(10):\n",
@@ -732,7 +1416,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.8"
+   "version": "3.7.3"
   }
  },
  "nbformat": 4,

	ALB.Gene_AceView	CD24L4.1.Gene_AceView	RPS11.Gene_RefSeq	RPS18.Gene_AceView	C5orf13.Gene_AceView	CCT2.Gene_AceView	COL1A1.Gene_AceView	DDX1.Gene_AceView	EEF1A1.Gene_AceView	FLT3LG_.Gene_AceView	...	zeedor.Gene_AceView	zorsa.Gene_AceView	zudee.Gene_AceView
0	9.29	18.82	21.17	20.90	20.02	16.31	18.60	15.73	21.71	20.02	...	0.00	0.00	0.00
1	9.25	20.25	22.44	22.00	21.05	17.06	19.39	22.84	22.72	21.26	...	5.54	3.39	5.45
2	8.99	20.09	22.09	21.71	21.65	16.85	23.02	15.79	22.24	20.75	...	0.00	3.75	0.00
3	7.32	19.82	20.52	20.90	21.58	16.49	18.91	15.45	22.06	19.59	...	0.00	0.00	0.00
4	10.56	21.19	20.69	21.29	20.28	16.22	17.15	16.01	21.84	19.74	...	0.00	5.20	0.00
	sampleID	CLASS	SEX	RND
0	SEQC_NB001	0	1	1
1	SEQC_NB003	0	0	0
2	SEQC_NB005	0	0	1
3	SEQC_NB011	1	1	1
4	SEQC_NB013	0	1	1