diff --git a/chierici_practical_part1.ipynb b/chierici_practical_part1.ipynb
index 1a9668c..753d381 100644
--- a/chierici_practical_part1.ipynb
+++ b/chierici_practical_part1.ipynb
@@ -55,7 +55,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 1,
"metadata": {
"colab": {},
"colab_type": "code",
@@ -73,9 +73,298 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 2,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ "
\n",
+ "
Loading BokehJS ...\n",
+ "
"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/javascript": [
+ "\n",
+ "(function(root) {\n",
+ " function now() {\n",
+ " return new Date();\n",
+ " }\n",
+ "\n",
+ " var force = true;\n",
+ "\n",
+ " if (typeof (root._bokeh_onload_callbacks) === \"undefined\" || force === true) {\n",
+ " root._bokeh_onload_callbacks = [];\n",
+ " root._bokeh_is_loading = undefined;\n",
+ " }\n",
+ "\n",
+ " var JS_MIME_TYPE = 'application/javascript';\n",
+ " var HTML_MIME_TYPE = 'text/html';\n",
+ " var EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n",
+ " var CLASS_NAME = 'output_bokeh rendered_html';\n",
+ "\n",
+ " /**\n",
+ " * Render data to the DOM node\n",
+ " */\n",
+ " function render(props, node) {\n",
+ " var script = document.createElement(\"script\");\n",
+ " node.appendChild(script);\n",
+ " }\n",
+ "\n",
+ " /**\n",
+ " * Handle when an output is cleared or removed\n",
+ " */\n",
+ " function handleClearOutput(event, handle) {\n",
+ " var cell = handle.cell;\n",
+ "\n",
+ " var id = cell.output_area._bokeh_element_id;\n",
+ " var server_id = cell.output_area._bokeh_server_id;\n",
+ " // Clean up Bokeh references\n",
+ " if (id !== undefined) {\n",
+ " Bokeh.index[id].model.document.clear();\n",
+ " delete Bokeh.index[id];\n",
+ " }\n",
+ "\n",
+ " if (server_id !== undefined) {\n",
+ " // Clean up Bokeh references\n",
+ " var cmd = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n",
+ " cell.notebook.kernel.execute(cmd, {\n",
+ " iopub: {\n",
+ " output: function(msg) {\n",
+ " var element_id = msg.content.text.trim();\n",
+ " Bokeh.index[element_id].model.document.clear();\n",
+ " delete Bokeh.index[element_id];\n",
+ " }\n",
+ " }\n",
+ " });\n",
+ " // Destroy server and session\n",
+ " var cmd = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n",
+ " cell.notebook.kernel.execute(cmd);\n",
+ " }\n",
+ " }\n",
+ "\n",
+ " /**\n",
+ " * Handle when a new output is added\n",
+ " */\n",
+ " function handleAddOutput(event, handle) {\n",
+ " var output_area = handle.output_area;\n",
+ " var output = handle.output;\n",
+ "\n",
+ " // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n",
+ " if ((output.output_type != \"display_data\") || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n",
+ " return\n",
+ " }\n",
+ "\n",
+ " var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n",
+ "\n",
+ " if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n",
+ " toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n",
+ " // store reference to embed id on output_area\n",
+ " output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n",
+ " }\n",
+ " if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n",
+ " var bk_div = document.createElement(\"div\");\n",
+ " bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n",
+ " var script_attrs = bk_div.children[0].attributes;\n",
+ " for (var i = 0; i < script_attrs.length; i++) {\n",
+ " toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n",
+ " }\n",
+ " // store reference to server id on output_area\n",
+ " output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n",
+ " }\n",
+ " }\n",
+ "\n",
+ " function register_renderer(events, OutputArea) {\n",
+ "\n",
+ " function append_mime(data, metadata, element) {\n",
+ " // create a DOM node to render to\n",
+ " var toinsert = this.create_output_subarea(\n",
+ " metadata,\n",
+ " CLASS_NAME,\n",
+ " EXEC_MIME_TYPE\n",
+ " );\n",
+ " this.keyboard_manager.register_events(toinsert);\n",
+ " // Render to node\n",
+ " var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n",
+ " render(props, toinsert[toinsert.length - 1]);\n",
+ " element.append(toinsert);\n",
+ " return toinsert\n",
+ " }\n",
+ "\n",
+ " /* Handle when an output is cleared or removed */\n",
+ " events.on('clear_output.CodeCell', handleClearOutput);\n",
+ " events.on('delete.Cell', handleClearOutput);\n",
+ "\n",
+ " /* Handle when a new output is added */\n",
+ " events.on('output_added.OutputArea', handleAddOutput);\n",
+ "\n",
+ " /**\n",
+ " * Register the mime type and append_mime function with output_area\n",
+ " */\n",
+ " OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n",
+ " /* Is output safe? */\n",
+ " safe: true,\n",
+ " /* Index of renderer in `output_area.display_order` */\n",
+ " index: 0\n",
+ " });\n",
+ " }\n",
+ "\n",
+ " // register the mime type if in Jupyter Notebook environment and previously unregistered\n",
+ " if (root.Jupyter !== undefined) {\n",
+ " var events = require('base/js/events');\n",
+ " var OutputArea = require('notebook/js/outputarea').OutputArea;\n",
+ "\n",
+ " if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n",
+ " register_renderer(events, OutputArea);\n",
+ " }\n",
+ " }\n",
+ "\n",
+ " \n",
+ " if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n",
+ " root._bokeh_timeout = Date.now() + 5000;\n",
+ " root._bokeh_failed_load = false;\n",
+ " }\n",
+ "\n",
+ " var NB_LOAD_WARNING = {'data': {'text/html':\n",
+ " \"\\n\"+\n",
+ " \"
\\n\"+\n",
+ " \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n",
+ " \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n",
+ " \"
\\n\"+\n",
+ " \"
\\n\"+\n",
+ " \"- re-rerun `output_notebook()` to attempt to load from CDN again, or
\\n\"+\n",
+ " \"- use INLINE resources instead, as so:
\\n\"+\n",
+ " \"
\\n\"+\n",
+ " \"
\\n\"+\n",
+ " \"from bokeh.resources import INLINE\\n\"+\n",
+ " \"output_notebook(resources=INLINE)\\n\"+\n",
+ " \"\\n\"+\n",
+ " \"
\"}};\n",
+ "\n",
+ " function display_loaded() {\n",
+ " var el = document.getElementById(\"52087260-467f-4d6b-a799-4d1bf12bb4de\");\n",
+ " if (el != null) {\n",
+ " el.textContent = \"BokehJS is loading...\";\n",
+ " }\n",
+ " if (root.Bokeh !== undefined) {\n",
+ " if (el != null) {\n",
+ " el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n",
+ " }\n",
+ " } else if (Date.now() < root._bokeh_timeout) {\n",
+ " setTimeout(display_loaded, 100)\n",
+ " }\n",
+ " }\n",
+ "\n",
+ "\n",
+ " function run_callbacks() {\n",
+ " try {\n",
+ " root._bokeh_onload_callbacks.forEach(function(callback) { callback() });\n",
+ " }\n",
+ " finally {\n",
+ " delete root._bokeh_onload_callbacks\n",
+ " }\n",
+ " console.info(\"Bokeh: all callbacks have finished\");\n",
+ " }\n",
+ "\n",
+ " function load_libs(js_urls, callback) {\n",
+ " root._bokeh_onload_callbacks.push(callback);\n",
+ " if (root._bokeh_is_loading > 0) {\n",
+ " console.log(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n",
+ " return null;\n",
+ " }\n",
+ " if (js_urls == null || js_urls.length === 0) {\n",
+ " run_callbacks();\n",
+ " return null;\n",
+ " }\n",
+ " console.log(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n",
+ " root._bokeh_is_loading = js_urls.length;\n",
+ " for (var i = 0; i < js_urls.length; i++) {\n",
+ " var url = js_urls[i];\n",
+ " var s = document.createElement('script');\n",
+ " s.src = url;\n",
+ " s.async = false;\n",
+ " s.onreadystatechange = s.onload = function() {\n",
+ " root._bokeh_is_loading--;\n",
+ " if (root._bokeh_is_loading === 0) {\n",
+ " console.log(\"Bokeh: all BokehJS libraries loaded\");\n",
+ " run_callbacks()\n",
+ " }\n",
+ " };\n",
+ " s.onerror = function() {\n",
+ " console.warn(\"failed to load library \" + url);\n",
+ " };\n",
+ " console.log(\"Bokeh: injecting script tag for BokehJS library: \", url);\n",
+ " document.getElementsByTagName(\"head\")[0].appendChild(s);\n",
+ " }\n",
+ " };var element = document.getElementById(\"52087260-467f-4d6b-a799-4d1bf12bb4de\");\n",
+ " if (element == null) {\n",
+ " console.log(\"Bokeh: ERROR: autoload.js configured with elementid '52087260-467f-4d6b-a799-4d1bf12bb4de' but no matching script tag was found. \")\n",
+ " return false;\n",
+ " }\n",
+ "\n",
+ " var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-0.12.16.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.16.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-0.12.16.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-0.12.16.min.js\"];\n",
+ "\n",
+ " var inline_js = [\n",
+ " function(Bokeh) {\n",
+ " Bokeh.set_log_level(\"info\");\n",
+ " },\n",
+ " \n",
+ " function(Bokeh) {\n",
+ " \n",
+ " },\n",
+ " function(Bokeh) {\n",
+ " console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-0.12.16.min.css\");\n",
+ " Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-0.12.16.min.css\");\n",
+ " console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.16.min.css\");\n",
+ " Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.16.min.css\");\n",
+ " console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-tables-0.12.16.min.css\");\n",
+ " Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-tables-0.12.16.min.css\");\n",
+ " }\n",
+ " ];\n",
+ "\n",
+ " function run_inline_js() {\n",
+ " \n",
+ " if ((root.Bokeh !== undefined) || (force === true)) {\n",
+ " for (var i = 0; i < inline_js.length; i++) {\n",
+ " inline_js[i].call(root, root.Bokeh);\n",
+ " }if (force === true) {\n",
+ " display_loaded();\n",
+ " }} else if (Date.now() < root._bokeh_timeout) {\n",
+ " setTimeout(run_inline_js, 100);\n",
+ " } else if (!root._bokeh_failed_load) {\n",
+ " console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n",
+ " root._bokeh_failed_load = true;\n",
+ " } else if (force !== true) {\n",
+ " var cell = $(document.getElementById(\"52087260-467f-4d6b-a799-4d1bf12bb4de\")).parents('.cell').data().cell;\n",
+ " cell.output_area.append_execute_result(NB_LOAD_WARNING)\n",
+ " }\n",
+ "\n",
+ " }\n",
+ "\n",
+ " if (root._bokeh_is_loading === 0) {\n",
+ " console.log(\"Bokeh: BokehJS loaded, going straight to plotting\");\n",
+ " run_inline_js();\n",
+ " } else {\n",
+ " load_libs(js_urls, function() {\n",
+ " console.log(\"Bokeh: BokehJS plotting callback run at\", now());\n",
+ " run_inline_js();\n",
+ " });\n",
+ " }\n",
+ "}(window));"
+ ],
+ "application/vnd.bokehjs_load.v0+json": "\n(function(root) {\n function now() {\n return new Date();\n }\n\n var force = true;\n\n if (typeof (root._bokeh_onload_callbacks) === \"undefined\" || force === true) {\n root._bokeh_onload_callbacks = [];\n root._bokeh_is_loading = undefined;\n }\n\n \n\n \n if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n var NB_LOAD_WARNING = {'data': {'text/html':\n \"\\n\"+\n \"
\\n\"+\n \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n \"
\\n\"+\n \"
\\n\"+\n \"- re-rerun `output_notebook()` to attempt to load from CDN again, or
\\n\"+\n \"- use INLINE resources instead, as so:
\\n\"+\n \"
\\n\"+\n \"
\\n\"+\n \"from bokeh.resources import INLINE\\n\"+\n \"output_notebook(resources=INLINE)\\n\"+\n \"\\n\"+\n \"
\"}};\n\n function display_loaded() {\n var el = document.getElementById(\"52087260-467f-4d6b-a799-4d1bf12bb4de\");\n if (el != null) {\n el.textContent = \"BokehJS is loading...\";\n }\n if (root.Bokeh !== undefined) {\n if (el != null) {\n el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(display_loaded, 100)\n }\n }\n\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) { callback() });\n }\n finally {\n delete root._bokeh_onload_callbacks\n }\n console.info(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(js_urls, callback) {\n root._bokeh_onload_callbacks.push(callback);\n if (root._bokeh_is_loading > 0) {\n console.log(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls == null || js_urls.length === 0) {\n run_callbacks();\n return null;\n }\n console.log(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n root._bokeh_is_loading = js_urls.length;\n for (var i = 0; i < js_urls.length; i++) {\n var url = js_urls[i];\n var s = document.createElement('script');\n s.src = url;\n s.async = false;\n s.onreadystatechange = s.onload = function() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.log(\"Bokeh: all BokehJS libraries loaded\");\n run_callbacks()\n }\n };\n s.onerror = function() {\n console.warn(\"failed to load library \" + url);\n };\n console.log(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.getElementsByTagName(\"head\")[0].appendChild(s);\n }\n };var element = document.getElementById(\"52087260-467f-4d6b-a799-4d1bf12bb4de\");\n if (element == null) {\n console.log(\"Bokeh: ERROR: autoload.js configured with elementid '52087260-467f-4d6b-a799-4d1bf12bb4de' but no matching script tag was found. \")\n return false;\n }\n\n var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-0.12.16.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.16.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-0.12.16.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-0.12.16.min.js\"];\n\n var inline_js = [\n function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\n \n function(Bokeh) {\n \n },\n function(Bokeh) {\n console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-0.12.16.min.css\");\n Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-0.12.16.min.css\");\n console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.16.min.css\");\n Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.16.min.css\");\n console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-tables-0.12.16.min.css\");\n Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-tables-0.12.16.min.css\");\n }\n ];\n\n function run_inline_js() {\n \n if ((root.Bokeh !== undefined) || (force === true)) {\n for (var i = 0; i < inline_js.length; i++) {\n inline_js[i].call(root, root.Bokeh);\n }if (force === true) {\n display_loaded();\n }} else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n } else if (force !== true) {\n var cell = $(document.getElementById(\"52087260-467f-4d6b-a799-4d1bf12bb4de\")).parents('.cell').data().cell;\n cell.output_area.append_execute_result(NB_LOAD_WARNING)\n }\n\n }\n\n if (root._bokeh_is_loading === 0) {\n console.log(\"Bokeh: BokehJS loaded, going straight to plotting\");\n run_inline_js();\n } else {\n load_libs(js_urls, function() {\n console.log(\"Bokeh: BokehJS plotting callback run at\", now());\n run_inline_js();\n });\n }\n}(window));"
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
"source": [
"output_notebook()"
]
@@ -92,7 +381,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 3,
"metadata": {
"colab": {},
"colab_type": "code",
@@ -106,7 +395,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 4,
"metadata": {
"colab": {},
"colab_type": "code",
@@ -132,7 +421,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 5,
"metadata": {
"colab": {},
"colab_type": "code",
@@ -161,7 +450,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 6,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
@@ -202,7 +491,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 7,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
@@ -223,7 +512,280 @@
"id": "CoWDeDBl2wHQ",
"outputId": "5a41d67f-63d7-4365-c170-ee04ef5727c1"
},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Help on function read_csv in module pandas.io.parsers:\n",
+ "\n",
+ "read_csv(filepath_or_buffer, sep=',', delimiter=None, header='infer', names=None, index_col=None, usecols=None, squeeze=False, prefix=None, mangle_dupe_cols=True, dtype=None, engine=None, converters=None, true_values=None, false_values=None, skipinitialspace=False, skiprows=None, nrows=None, na_values=None, keep_default_na=True, na_filter=True, verbose=False, skip_blank_lines=True, parse_dates=False, infer_datetime_format=False, keep_date_col=False, date_parser=None, dayfirst=False, iterator=False, chunksize=None, compression='infer', thousands=None, decimal=b'.', lineterminator=None, quotechar='\"', quoting=0, escapechar=None, comment=None, encoding=None, dialect=None, tupleize_cols=None, error_bad_lines=True, warn_bad_lines=True, skipfooter=0, doublequote=True, delim_whitespace=False, low_memory=True, memory_map=False, float_precision=None)\n",
+ " Read CSV (comma-separated) file into DataFrame\n",
+ " \n",
+ " Also supports optionally iterating or breaking of the file\n",
+ " into chunks.\n",
+ " \n",
+ " Additional help can be found in the `online docs for IO Tools\n",
+ " `_.\n",
+ " \n",
+ " Parameters\n",
+ " ----------\n",
+ " filepath_or_buffer : str, pathlib.Path, py._path.local.LocalPath or any \\\n",
+ " object with a read() method (such as a file handle or StringIO)\n",
+ " The string could be a URL. Valid URL schemes include http, ftp, s3, and\n",
+ " file. For file URLs, a host is expected. For instance, a local file could\n",
+ " be file://localhost/path/to/table.csv\n",
+ " sep : str, default ','\n",
+ " Delimiter to use. If sep is None, the C engine cannot automatically detect\n",
+ " the separator, but the Python parsing engine can, meaning the latter will\n",
+ " be used and automatically detect the separator by Python's builtin sniffer\n",
+ " tool, ``csv.Sniffer``. In addition, separators longer than 1 character and\n",
+ " different from ``'\\s+'`` will be interpreted as regular expressions and\n",
+ " will also force the use of the Python parsing engine. Note that regex\n",
+ " delimiters are prone to ignoring quoted data. Regex example: ``'\\r\\t'``\n",
+ " delimiter : str, default ``None``\n",
+ " Alternative argument name for sep.\n",
+ " delim_whitespace : boolean, default False\n",
+ " Specifies whether or not whitespace (e.g. ``' '`` or ``'\\t'``) will be\n",
+ " used as the sep. Equivalent to setting ``sep='\\s+'``. If this option\n",
+ " is set to True, nothing should be passed in for the ``delimiter``\n",
+ " parameter.\n",
+ " \n",
+ " .. versionadded:: 0.18.1 support for the Python parser.\n",
+ " \n",
+ " header : int or list of ints, default 'infer'\n",
+ " Row number(s) to use as the column names, and the start of the\n",
+ " data. Default behavior is to infer the column names: if no names\n",
+ " are passed the behavior is identical to ``header=0`` and column\n",
+ " names are inferred from the first line of the file, if column\n",
+ " names are passed explicitly then the behavior is identical to\n",
+ " ``header=None``. Explicitly pass ``header=0`` to be able to\n",
+ " replace existing names. The header can be a list of integers that\n",
+ " specify row locations for a multi-index on the columns\n",
+ " e.g. [0,1,3]. Intervening rows that are not specified will be\n",
+ " skipped (e.g. 2 in this example is skipped). Note that this\n",
+ " parameter ignores commented lines and empty lines if\n",
+ " ``skip_blank_lines=True``, so header=0 denotes the first line of\n",
+ " data rather than the first line of the file.\n",
+ " names : array-like, default None\n",
+ " List of column names to use. If file contains no header row, then you\n",
+ " should explicitly pass header=None. Duplicates in this list will cause\n",
+ " a ``UserWarning`` to be issued.\n",
+ " index_col : int or sequence or False, default None\n",
+ " Column to use as the row labels of the DataFrame. If a sequence is given, a\n",
+ " MultiIndex is used. If you have a malformed file with delimiters at the end\n",
+ " of each line, you might consider index_col=False to force pandas to _not_\n",
+ " use the first column as the index (row names)\n",
+ " usecols : list-like or callable, default None\n",
+ " Return a subset of the columns. If list-like, all elements must either\n",
+ " be positional (i.e. integer indices into the document columns) or strings\n",
+ " that correspond to column names provided either by the user in `names` or\n",
+ " inferred from the document header row(s). For example, a valid list-like\n",
+ " `usecols` parameter would be [0, 1, 2] or ['foo', 'bar', 'baz']. Element\n",
+ " order is ignored, so ``usecols=[0, 1]`` is the same as ``[1, 0]``.\n",
+ " To instantiate a DataFrame from ``data`` with element order preserved use\n",
+ " ``pd.read_csv(data, usecols=['foo', 'bar'])[['foo', 'bar']]`` for columns\n",
+ " in ``['foo', 'bar']`` order or\n",
+ " ``pd.read_csv(data, usecols=['foo', 'bar'])[['bar', 'foo']]``\n",
+ " for ``['bar', 'foo']`` order.\n",
+ " \n",
+ " If callable, the callable function will be evaluated against the column\n",
+ " names, returning names where the callable function evaluates to True. An\n",
+ " example of a valid callable argument would be ``lambda x: x.upper() in\n",
+ " ['AAA', 'BBB', 'DDD']``. Using this parameter results in much faster\n",
+ " parsing time and lower memory usage.\n",
+ " squeeze : boolean, default False\n",
+ " If the parsed data only contains one column then return a Series\n",
+ " prefix : str, default None\n",
+ " Prefix to add to column numbers when no header, e.g. 'X' for X0, X1, ...\n",
+ " mangle_dupe_cols : boolean, default True\n",
+ " Duplicate columns will be specified as 'X', 'X.1', ...'X.N', rather than\n",
+ " 'X'...'X'. Passing in False will cause data to be overwritten if there\n",
+ " are duplicate names in the columns.\n",
+ " dtype : Type name or dict of column -> type, default None\n",
+ " Data type for data or columns. E.g. {'a': np.float64, 'b': np.int32}\n",
+ " Use `str` or `object` together with suitable `na_values` settings\n",
+ " to preserve and not interpret dtype.\n",
+ " If converters are specified, they will be applied INSTEAD\n",
+ " of dtype conversion.\n",
+ " engine : {'c', 'python'}, optional\n",
+ " Parser engine to use. The C engine is faster while the python engine is\n",
+ " currently more feature-complete.\n",
+ " converters : dict, default None\n",
+ " Dict of functions for converting values in certain columns. Keys can either\n",
+ " be integers or column labels\n",
+ " true_values : list, default None\n",
+ " Values to consider as True\n",
+ " false_values : list, default None\n",
+ " Values to consider as False\n",
+ " skipinitialspace : boolean, default False\n",
+ " Skip spaces after delimiter.\n",
+ " skiprows : list-like or integer or callable, default None\n",
+ " Line numbers to skip (0-indexed) or number of lines to skip (int)\n",
+ " at the start of the file.\n",
+ " \n",
+ " If callable, the callable function will be evaluated against the row\n",
+ " indices, returning True if the row should be skipped and False otherwise.\n",
+ " An example of a valid callable argument would be ``lambda x: x in [0, 2]``.\n",
+ " skipfooter : int, default 0\n",
+ " Number of lines at bottom of file to skip (Unsupported with engine='c')\n",
+ " nrows : int, default None\n",
+ " Number of rows of file to read. Useful for reading pieces of large files\n",
+ " na_values : scalar, str, list-like, or dict, default None\n",
+ " Additional strings to recognize as NA/NaN. If dict passed, specific\n",
+ " per-column NA values. By default the following values are interpreted as\n",
+ " NaN: '', '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', '-nan',\n",
+ " '1.#IND', '1.#QNAN', 'N/A', 'NA', 'NULL', 'NaN', 'n/a', 'nan',\n",
+ " 'null'.\n",
+ " keep_default_na : bool, default True\n",
+ " Whether or not to include the default NaN values when parsing the data.\n",
+ " Depending on whether `na_values` is passed in, the behavior is as follows:\n",
+ " \n",
+ " * If `keep_default_na` is True, and `na_values` are specified, `na_values`\n",
+ " is appended to the default NaN values used for parsing.\n",
+ " * If `keep_default_na` is True, and `na_values` are not specified, only\n",
+ " the default NaN values are used for parsing.\n",
+ " * If `keep_default_na` is False, and `na_values` are specified, only\n",
+ " the NaN values specified `na_values` are used for parsing.\n",
+ " * If `keep_default_na` is False, and `na_values` are not specified, no\n",
+ " strings will be parsed as NaN.\n",
+ " \n",
+ " Note that if `na_filter` is passed in as False, the `keep_default_na` and\n",
+ " `na_values` parameters will be ignored.\n",
+ " na_filter : boolean, default True\n",
+ " Detect missing value markers (empty strings and the value of na_values). In\n",
+ " data without any NAs, passing na_filter=False can improve the performance\n",
+ " of reading a large file\n",
+ " verbose : boolean, default False\n",
+ " Indicate number of NA values placed in non-numeric columns\n",
+ " skip_blank_lines : boolean, default True\n",
+ " If True, skip over blank lines rather than interpreting as NaN values\n",
+ " parse_dates : boolean or list of ints or names or list of lists or dict, default False\n",
+ " \n",
+ " * boolean. If True -> try parsing the index.\n",
+ " * list of ints or names. e.g. If [1, 2, 3] -> try parsing columns 1, 2, 3\n",
+ " each as a separate date column.\n",
+ " * list of lists. e.g. If [[1, 3]] -> combine columns 1 and 3 and parse as\n",
+ " a single date column.\n",
+ " * dict, e.g. {'foo' : [1, 3]} -> parse columns 1, 3 as date and call result\n",
+ " 'foo'\n",
+ " \n",
+ " If a column or index contains an unparseable date, the entire column or\n",
+ " index will be returned unaltered as an object data type. For non-standard\n",
+ " datetime parsing, use ``pd.to_datetime`` after ``pd.read_csv``\n",
+ " \n",
+ " Note: A fast-path exists for iso8601-formatted dates.\n",
+ " infer_datetime_format : boolean, default False\n",
+ " If True and `parse_dates` is enabled, pandas will attempt to infer the\n",
+ " format of the datetime strings in the columns, and if it can be inferred,\n",
+ " switch to a faster method of parsing them. In some cases this can increase\n",
+ " the parsing speed by 5-10x.\n",
+ " keep_date_col : boolean, default False\n",
+ " If True and `parse_dates` specifies combining multiple columns then\n",
+ " keep the original columns.\n",
+ " date_parser : function, default None\n",
+ " Function to use for converting a sequence of string columns to an array of\n",
+ " datetime instances. The default uses ``dateutil.parser.parser`` to do the\n",
+ " conversion. Pandas will try to call `date_parser` in three different ways,\n",
+ " advancing to the next if an exception occurs: 1) Pass one or more arrays\n",
+ " (as defined by `parse_dates`) as arguments; 2) concatenate (row-wise) the\n",
+ " string values from the columns defined by `parse_dates` into a single array\n",
+ " and pass that; and 3) call `date_parser` once for each row using one or\n",
+ " more strings (corresponding to the columns defined by `parse_dates`) as\n",
+ " arguments.\n",
+ " dayfirst : boolean, default False\n",
+ " DD/MM format dates, international and European format\n",
+ " iterator : boolean, default False\n",
+ " Return TextFileReader object for iteration or getting chunks with\n",
+ " ``get_chunk()``.\n",
+ " chunksize : int, default None\n",
+ " Return TextFileReader object for iteration.\n",
+ " See the `IO Tools docs\n",
+ " `_\n",
+ " for more information on ``iterator`` and ``chunksize``.\n",
+ " compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default 'infer'\n",
+ " For on-the-fly decompression of on-disk data. If 'infer' and\n",
+ " `filepath_or_buffer` is path-like, then detect compression from the\n",
+ " following extensions: '.gz', '.bz2', '.zip', or '.xz' (otherwise no\n",
+ " decompression). If using 'zip', the ZIP file must contain only one data\n",
+ " file to be read in. Set to None for no decompression.\n",
+ " \n",
+ " .. versionadded:: 0.18.1 support for 'zip' and 'xz' compression.\n",
+ " \n",
+ " thousands : str, default None\n",
+ " Thousands separator\n",
+ " decimal : str, default '.'\n",
+ " Character to recognize as decimal point (e.g. use ',' for European data).\n",
+ " float_precision : string, default None\n",
+ " Specifies which converter the C engine should use for floating-point\n",
+ " values. The options are `None` for the ordinary converter,\n",
+ " `high` for the high-precision converter, and `round_trip` for the\n",
+ " round-trip converter.\n",
+ " lineterminator : str (length 1), default None\n",
+ " Character to break file into lines. Only valid with C parser.\n",
+ " quotechar : str (length 1), optional\n",
+ " The character used to denote the start and end of a quoted item. Quoted\n",
+ " items can include the delimiter and it will be ignored.\n",
+ " quoting : int or csv.QUOTE_* instance, default 0\n",
+ " Control field quoting behavior per ``csv.QUOTE_*`` constants. Use one of\n",
+ " QUOTE_MINIMAL (0), QUOTE_ALL (1), QUOTE_NONNUMERIC (2) or QUOTE_NONE (3).\n",
+ " doublequote : boolean, default ``True``\n",
+ " When quotechar is specified and quoting is not ``QUOTE_NONE``, indicate\n",
+ " whether or not to interpret two consecutive quotechar elements INSIDE a\n",
+ " field as a single ``quotechar`` element.\n",
+ " escapechar : str (length 1), default None\n",
+ " One-character string used to escape delimiter when quoting is QUOTE_NONE.\n",
+ " comment : str, default None\n",
+ " Indicates remainder of line should not be parsed. If found at the beginning\n",
+ " of a line, the line will be ignored altogether. This parameter must be a\n",
+ " single character. Like empty lines (as long as ``skip_blank_lines=True``),\n",
+ " fully commented lines are ignored by the parameter `header` but not by\n",
+ " `skiprows`. For example, if ``comment='#'``, parsing\n",
+ " ``#empty\\na,b,c\\n1,2,3`` with ``header=0`` will result in 'a,b,c' being\n",
+ " treated as the header.\n",
+ " encoding : str, default None\n",
+ " Encoding to use for UTF when reading/writing (ex. 'utf-8'). `List of Python\n",
+ " standard encodings\n",
+ " `_\n",
+ " dialect : str or csv.Dialect instance, default None\n",
+ " If provided, this parameter will override values (default or not) for the\n",
+ " following parameters: `delimiter`, `doublequote`, `escapechar`,\n",
+ " `skipinitialspace`, `quotechar`, and `quoting`. If it is necessary to\n",
+ " override values, a ParserWarning will be issued. See csv.Dialect\n",
+ " documentation for more details.\n",
+ " tupleize_cols : boolean, default False\n",
+ " .. deprecated:: 0.21.0\n",
+ " This argument will be removed and will always convert to MultiIndex\n",
+ " \n",
+ " Leave a list of tuples on columns as is (default is to convert to\n",
+ " a MultiIndex on the columns)\n",
+ " error_bad_lines : boolean, default True\n",
+ " Lines with too many fields (e.g. a csv line with too many commas) will by\n",
+ " default cause an exception to be raised, and no DataFrame will be returned.\n",
+ " If False, then these \"bad lines\" will dropped from the DataFrame that is\n",
+ " returned.\n",
+ " warn_bad_lines : boolean, default True\n",
+ " If error_bad_lines is False, and warn_bad_lines is True, a warning for each\n",
+ " \"bad line\" will be output.\n",
+ " low_memory : boolean, default True\n",
+ " Internally process the file in chunks, resulting in lower memory use\n",
+ " while parsing, but possibly mixed type inference. To ensure no mixed\n",
+ " types either set False, or specify the type with the `dtype` parameter.\n",
+ " Note that the entire file is read into a single DataFrame regardless,\n",
+ " use the `chunksize` or `iterator` parameter to return the data in chunks.\n",
+ " (Only valid with C parser)\n",
+ " memory_map : boolean, default False\n",
+ " If a filepath is provided for `filepath_or_buffer`, map the file object\n",
+ " directly onto memory and access the data directly from there. Using this\n",
+ " option can improve performance because there is no longer any I/O overhead.\n",
+ " \n",
+ " Returns\n",
+ " -------\n",
+ " result : DataFrame or TextParser\n",
+ "\n"
+ ]
+ }
+ ],
"source": [
"help(pd.read_csv)"
]
@@ -240,14 +802,25 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 8,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "2iltS1Q-k3Wn",
"outputId": "ea81462d-8a49-406b-c933-182c49379053"
},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(136, 52230)"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"data_tr.shape"
]
@@ -266,14 +839,242 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 9,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "TVfPwU6-k3Wt",
"outputId": "884dd460-6c53-4bf4-9c37-f7c7299b37a2"
},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " sampleID | \n",
+ " ALB.Gene_AceView | \n",
+ " CD24L4.1.Gene_AceView | \n",
+ " RPS11.Gene_RefSeq | \n",
+ " RPS18.Gene_AceView | \n",
+ " C5orf13.Gene_AceView | \n",
+ " CCT2.Gene_AceView | \n",
+ " COL1A1.Gene_AceView | \n",
+ " DDX1.Gene_AceView | \n",
+ " EEF1A1.Gene_AceView | \n",
+ " ... | \n",
+ " zawskaw.Gene_AceView | \n",
+ " zeedor.Gene_AceView | \n",
+ " zergor.Gene_AceView | \n",
+ " zorsa.Gene_AceView | \n",
+ " zoychabu.Gene_AceView | \n",
+ " zoysteeby.Gene_AceView | \n",
+ " zudee.Gene_AceView | \n",
+ " zureyby.Gene_AceView | \n",
+ " zuswoybu.Gene_AceView | \n",
+ " zyjee.Gene_AceView | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " SEQC_NB001 | \n",
+ " 9.29 | \n",
+ " 18.82 | \n",
+ " 21.17 | \n",
+ " 20.90 | \n",
+ " 20.02 | \n",
+ " 16.31 | \n",
+ " 18.60 | \n",
+ " 15.73 | \n",
+ " 21.71 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 0.00 | \n",
+ " 0.0 | \n",
+ " 0.00 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.00 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " SEQC_NB003 | \n",
+ " 9.25 | \n",
+ " 20.25 | \n",
+ " 22.44 | \n",
+ " 22.00 | \n",
+ " 21.05 | \n",
+ " 17.06 | \n",
+ " 19.39 | \n",
+ " 22.84 | \n",
+ " 22.72 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 5.54 | \n",
+ " 0.0 | \n",
+ " 3.39 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 5.45 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " SEQC_NB005 | \n",
+ " 8.99 | \n",
+ " 20.09 | \n",
+ " 22.09 | \n",
+ " 21.71 | \n",
+ " 21.65 | \n",
+ " 16.85 | \n",
+ " 23.02 | \n",
+ " 15.79 | \n",
+ " 22.24 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 0.00 | \n",
+ " 0.0 | \n",
+ " 3.75 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.00 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " SEQC_NB011 | \n",
+ " 7.32 | \n",
+ " 19.82 | \n",
+ " 20.52 | \n",
+ " 20.90 | \n",
+ " 21.58 | \n",
+ " 16.49 | \n",
+ " 18.91 | \n",
+ " 15.45 | \n",
+ " 22.06 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 0.00 | \n",
+ " 0.0 | \n",
+ " 0.00 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.00 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " SEQC_NB013 | \n",
+ " 10.56 | \n",
+ " 21.19 | \n",
+ " 20.69 | \n",
+ " 21.29 | \n",
+ " 20.28 | \n",
+ " 16.22 | \n",
+ " 17.15 | \n",
+ " 16.01 | \n",
+ " 21.84 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 0.00 | \n",
+ " 0.0 | \n",
+ " 5.20 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.00 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 52230 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " sampleID ALB.Gene_AceView CD24L4.1.Gene_AceView RPS11.Gene_RefSeq \\\n",
+ "0 SEQC_NB001 9.29 18.82 21.17 \n",
+ "1 SEQC_NB003 9.25 20.25 22.44 \n",
+ "2 SEQC_NB005 8.99 20.09 22.09 \n",
+ "3 SEQC_NB011 7.32 19.82 20.52 \n",
+ "4 SEQC_NB013 10.56 21.19 20.69 \n",
+ "\n",
+ " RPS18.Gene_AceView C5orf13.Gene_AceView CCT2.Gene_AceView \\\n",
+ "0 20.90 20.02 16.31 \n",
+ "1 22.00 21.05 17.06 \n",
+ "2 21.71 21.65 16.85 \n",
+ "3 20.90 21.58 16.49 \n",
+ "4 21.29 20.28 16.22 \n",
+ "\n",
+ " COL1A1.Gene_AceView DDX1.Gene_AceView EEF1A1.Gene_AceView \\\n",
+ "0 18.60 15.73 21.71 \n",
+ "1 19.39 22.84 22.72 \n",
+ "2 23.02 15.79 22.24 \n",
+ "3 18.91 15.45 22.06 \n",
+ "4 17.15 16.01 21.84 \n",
+ "\n",
+ " ... zawskaw.Gene_AceView zeedor.Gene_AceView \\\n",
+ "0 ... 0.0 0.00 \n",
+ "1 ... 0.0 5.54 \n",
+ "2 ... 0.0 0.00 \n",
+ "3 ... 0.0 0.00 \n",
+ "4 ... 0.0 0.00 \n",
+ "\n",
+ " zergor.Gene_AceView zorsa.Gene_AceView zoychabu.Gene_AceView \\\n",
+ "0 0.0 0.00 0.0 \n",
+ "1 0.0 3.39 0.0 \n",
+ "2 0.0 3.75 0.0 \n",
+ "3 0.0 0.00 0.0 \n",
+ "4 0.0 5.20 0.0 \n",
+ "\n",
+ " zoysteeby.Gene_AceView zudee.Gene_AceView zureyby.Gene_AceView \\\n",
+ "0 0.0 0.00 0.0 \n",
+ "1 0.0 5.45 0.0 \n",
+ "2 0.0 0.00 0.0 \n",
+ "3 0.0 0.00 0.0 \n",
+ "4 0.0 0.00 0.0 \n",
+ "\n",
+ " zuswoybu.Gene_AceView zyjee.Gene_AceView \n",
+ "0 0.0 0.0 \n",
+ "1 0.0 0.0 \n",
+ "2 0.0 0.0 \n",
+ "3 0.0 0.0 \n",
+ "4 0.0 0.0 \n",
+ "\n",
+ "[5 rows x 52230 columns]"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"data_tr.head()"
]
@@ -290,7 +1091,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 10,
"metadata": {
"colab": {},
"colab_type": "code",
@@ -314,14 +1115,242 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 11,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "QgcQgBVek3W1",
"outputId": "6cbec2e0-0001-4e03-c040-0bbddd51db5b"
},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " ALB.Gene_AceView | \n",
+ " CD24L4.1.Gene_AceView | \n",
+ " RPS11.Gene_RefSeq | \n",
+ " RPS18.Gene_AceView | \n",
+ " C5orf13.Gene_AceView | \n",
+ " CCT2.Gene_AceView | \n",
+ " COL1A1.Gene_AceView | \n",
+ " DDX1.Gene_AceView | \n",
+ " EEF1A1.Gene_AceView | \n",
+ " FLT3LG_.Gene_AceView | \n",
+ " ... | \n",
+ " zawskaw.Gene_AceView | \n",
+ " zeedor.Gene_AceView | \n",
+ " zergor.Gene_AceView | \n",
+ " zorsa.Gene_AceView | \n",
+ " zoychabu.Gene_AceView | \n",
+ " zoysteeby.Gene_AceView | \n",
+ " zudee.Gene_AceView | \n",
+ " zureyby.Gene_AceView | \n",
+ " zuswoybu.Gene_AceView | \n",
+ " zyjee.Gene_AceView | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 9.29 | \n",
+ " 18.82 | \n",
+ " 21.17 | \n",
+ " 20.90 | \n",
+ " 20.02 | \n",
+ " 16.31 | \n",
+ " 18.60 | \n",
+ " 15.73 | \n",
+ " 21.71 | \n",
+ " 20.02 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 0.00 | \n",
+ " 0.0 | \n",
+ " 0.00 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.00 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 9.25 | \n",
+ " 20.25 | \n",
+ " 22.44 | \n",
+ " 22.00 | \n",
+ " 21.05 | \n",
+ " 17.06 | \n",
+ " 19.39 | \n",
+ " 22.84 | \n",
+ " 22.72 | \n",
+ " 21.26 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 5.54 | \n",
+ " 0.0 | \n",
+ " 3.39 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 5.45 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 8.99 | \n",
+ " 20.09 | \n",
+ " 22.09 | \n",
+ " 21.71 | \n",
+ " 21.65 | \n",
+ " 16.85 | \n",
+ " 23.02 | \n",
+ " 15.79 | \n",
+ " 22.24 | \n",
+ " 20.75 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 0.00 | \n",
+ " 0.0 | \n",
+ " 3.75 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.00 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 7.32 | \n",
+ " 19.82 | \n",
+ " 20.52 | \n",
+ " 20.90 | \n",
+ " 21.58 | \n",
+ " 16.49 | \n",
+ " 18.91 | \n",
+ " 15.45 | \n",
+ " 22.06 | \n",
+ " 19.59 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 0.00 | \n",
+ " 0.0 | \n",
+ " 0.00 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.00 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 10.56 | \n",
+ " 21.19 | \n",
+ " 20.69 | \n",
+ " 21.29 | \n",
+ " 20.28 | \n",
+ " 16.22 | \n",
+ " 17.15 | \n",
+ " 16.01 | \n",
+ " 21.84 | \n",
+ " 19.74 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 0.00 | \n",
+ " 0.0 | \n",
+ " 5.20 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.00 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 52229 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " ALB.Gene_AceView CD24L4.1.Gene_AceView RPS11.Gene_RefSeq \\\n",
+ "0 9.29 18.82 21.17 \n",
+ "1 9.25 20.25 22.44 \n",
+ "2 8.99 20.09 22.09 \n",
+ "3 7.32 19.82 20.52 \n",
+ "4 10.56 21.19 20.69 \n",
+ "\n",
+ " RPS18.Gene_AceView C5orf13.Gene_AceView CCT2.Gene_AceView \\\n",
+ "0 20.90 20.02 16.31 \n",
+ "1 22.00 21.05 17.06 \n",
+ "2 21.71 21.65 16.85 \n",
+ "3 20.90 21.58 16.49 \n",
+ "4 21.29 20.28 16.22 \n",
+ "\n",
+ " COL1A1.Gene_AceView DDX1.Gene_AceView EEF1A1.Gene_AceView \\\n",
+ "0 18.60 15.73 21.71 \n",
+ "1 19.39 22.84 22.72 \n",
+ "2 23.02 15.79 22.24 \n",
+ "3 18.91 15.45 22.06 \n",
+ "4 17.15 16.01 21.84 \n",
+ "\n",
+ " FLT3LG_.Gene_AceView ... zawskaw.Gene_AceView \\\n",
+ "0 20.02 ... 0.0 \n",
+ "1 21.26 ... 0.0 \n",
+ "2 20.75 ... 0.0 \n",
+ "3 19.59 ... 0.0 \n",
+ "4 19.74 ... 0.0 \n",
+ "\n",
+ " zeedor.Gene_AceView zergor.Gene_AceView zorsa.Gene_AceView \\\n",
+ "0 0.00 0.0 0.00 \n",
+ "1 5.54 0.0 3.39 \n",
+ "2 0.00 0.0 3.75 \n",
+ "3 0.00 0.0 0.00 \n",
+ "4 0.00 0.0 5.20 \n",
+ "\n",
+ " zoychabu.Gene_AceView zoysteeby.Gene_AceView zudee.Gene_AceView \\\n",
+ "0 0.0 0.0 0.00 \n",
+ "1 0.0 0.0 5.45 \n",
+ "2 0.0 0.0 0.00 \n",
+ "3 0.0 0.0 0.00 \n",
+ "4 0.0 0.0 0.00 \n",
+ "\n",
+ " zureyby.Gene_AceView zuswoybu.Gene_AceView zyjee.Gene_AceView \n",
+ "0 0.0 0.0 0.0 \n",
+ "1 0.0 0.0 0.0 \n",
+ "2 0.0 0.0 0.0 \n",
+ "3 0.0 0.0 0.0 \n",
+ "4 0.0 0.0 0.0 \n",
+ "\n",
+ "[5 rows x 52229 columns]"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"data_tr.head()"
]
@@ -338,14 +1367,95 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 12,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "7Vbq9mqXk3W5",
"outputId": "6bc4f986-30e2-4953-fc78-80b80982ba47"
},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " sampleID | \n",
+ " CLASS | \n",
+ " SEX | \n",
+ " RND | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " SEQC_NB001 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " SEQC_NB003 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " SEQC_NB005 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " SEQC_NB011 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " SEQC_NB013 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " sampleID CLASS SEX RND\n",
+ "0 SEQC_NB001 0 1 1\n",
+ "1 SEQC_NB003 0 0 0\n",
+ "2 SEQC_NB005 0 0 1\n",
+ "3 SEQC_NB011 1 1 1\n",
+ "4 SEQC_NB013 0 1 1"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"labs_tr = pd.read_csv(LABS_TR, sep = \"\\t\")\n",
"labs_ts = pd.read_csv(LABS_TS, sep = \"\\t\")\n",
@@ -364,14 +1474,77 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 13,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "pyTfzujJk3W9",
"outputId": "cd7cf62a-c5b1-491a-853e-631b5cc9a4d2"
},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " CLASS | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " CLASS\n",
+ "0 0\n",
+ "1 0\n",
+ "2 0\n",
+ "3 1\n",
+ "4 0"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"class_lab_tr = labs_tr[['CLASS']]\n",
"class_lab_ts = labs_ts[['CLASS']]\n",
@@ -391,7 +1564,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 14,
"metadata": {
"colab": {},
"colab_type": "code",
@@ -438,13 +1611,30 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 15,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "XYs3b6JJ3qrn"
},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([[ 9.29, 18.82, 21.17, ..., 0. , 0. , 0. ],\n",
+ " [ 9.25, 20.25, 22.44, ..., 0. , 0. , 0. ],\n",
+ " [ 8.99, 20.09, 22.09, ..., 0. , 0. , 0. ],\n",
+ " ...,\n",
+ " [ 8.47, 20.75, 20.08, ..., 0. , 0. , 0. ],\n",
+ " [ 8.58, 20.57, 20.67, ..., 0. , 0. , 0. ],\n",
+ " [ 8.62, 20.13, 21.04, ..., 0. , 0. , 0. ]])"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"x_tr"
]
@@ -461,13 +1651,30 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 16,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "iwI8uSvC4BbC"
},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
+ " 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1,\n",
+ " 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1,\n",
+ " 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,\n",
+ " 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1,\n",
+ " 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0,\n",
+ " 1, 1, 1, 1])"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"y_tr"
]
@@ -512,7 +1719,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
@@ -566,7 +1773,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 18,
"metadata": {
"colab": {},
"colab_type": "code",
@@ -592,7 +1799,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 19,
"metadata": {
"colab": {},
"colab_type": "code",
@@ -605,7 +1812,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
@@ -615,9 +1822,20 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 21,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(136, 2)"
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"z_tr.shape"
]
@@ -634,13 +1852,21 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 22,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "Cma7FaOd6F1M"
},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[0.08194369 0.0433671 ]\n"
+ ]
+ }
+ ],
"source": [
"print(pca.explained_variance_ratio_)"
]
@@ -669,9 +1895,60 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 23,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/javascript": [
+ "(function(root) {\n",
+ " function embed_document(root) {\n",
+ " \n",
+ " var docs_json = {\"f581f7f5-ad4a-4b4d-9c36-5011c136da3e\":{\"roots\":{\"references\":[{\"attributes\":{\"fill_alpha\":{\"value\":0.1},\"fill_color\":{\"value\":\"#1f77b4\"},\"line_alpha\":{\"value\":0.1},\"line_color\":{\"value\":\"#1f77b4\"},\"x\":{\"field\":\"x\"},\"y\":{\"field\":\"y\"}},\"id\":\"578311d8-8ca8-43a6-806a-dd2afe910f4c\",\"type\":\"Circle\"},{\"attributes\":{},\"id\":\"0ac9d99f-4445-4568-ac9e-69f19a680599\",\"type\":\"Selection\"},{\"attributes\":{\"active_drag\":\"auto\",\"active_inspect\":\"auto\",\"active_scroll\":\"auto\",\"active_tap\":\"auto\",\"tools\":[{\"id\":\"7ab9fdfb-298b-4b61-a00a-fecfda8ae28d\",\"type\":\"PanTool\"},{\"id\":\"30243c20-8f2c-4f7f-a05e-e3956e59775e\",\"type\":\"WheelZoomTool\"},{\"id\":\"1beec0d7-5cc8-481c-9e26-923bd16df227\",\"type\":\"BoxZoomTool\"},{\"id\":\"5ebd41b2-5f1c-4b19-ad41-495539ca8c9b\",\"type\":\"SaveTool\"},{\"id\":\"350ce84d-0611-4f8a-9f43-8258d5482ea6\",\"type\":\"ResetTool\"},{\"id\":\"7d66043f-2f80-4c49-b7c1-1e7b29a0af66\",\"type\":\"HelpTool\"}]},\"id\":\"5e61b569-21fd-4636-b89b-c58313b596d7\",\"type\":\"Toolbar\"},{\"attributes\":{},\"id\":\"207017d5-7c07-4632-9ca7-0467df0c7e70\",\"type\":\"UnionRenderers\"},{\"attributes\":{\"fill_color\":{\"value\":\"orange\"},\"line_color\":{\"value\":\"orange\"},\"x\":{\"field\":\"x\"},\"y\":{\"field\":\"y\"}},\"id\":\"55788bac-0509-4db3-851e-2ad5507b44bc\",\"type\":\"Circle\"},{\"attributes\":{},\"id\":\"20a06083-a08f-402e-a166-53e00ae3d1c6\",\"type\":\"Selection\"},{\"attributes\":{},\"id\":\"9104b613-185a-4605-897a-65b840848b66\",\"type\":\"BasicTickFormatter\"},{\"attributes\":{\"below\":[{\"id\":\"66a59647-cfb3-48f5-853c-e378edb23566\",\"type\":\"LinearAxis\"}],\"left\":[{\"id\":\"636f73f8-4a94-4984-a157-007a9c1f92e9\",\"type\":\"LinearAxis\"}],\"plot_height\":400,\"plot_width\":400,\"renderers\":[{\"id\":\"66a59647-cfb3-48f5-853c-e378edb23566\",\"type\":\"LinearAxis\"},{\"id\":\"6414f74a-4633-4213-ba59-4d8b09cf032c\",\"type\":\"Grid\"},{\"id\":\"636f73f8-4a94-4984-a157-007a9c1f92e9\",\"type\":\"LinearAxis\"},{\"id\":\"4aaa0bc4-228d-4266-b8dc-4213fe5042f6\",\"type\":\"Grid\"},{\"id\":\"4a9f91d0-bd1e-4b38-bffa-5b3399973856\",\"type\":\"BoxAnnotation\"},{\"id\":\"4703dd56-5d0b-49f2-963e-c2562d3796aa\",\"type\":\"GlyphRenderer\"},{\"id\":\"790faec9-4ab0-4793-9e2e-62921cf31ce8\",\"type\":\"GlyphRenderer\"}],\"title\":{\"id\":\"28b7adc3-9383-4511-afaa-69e6be05de92\",\"type\":\"Title\"},\"toolbar\":{\"id\":\"5e61b569-21fd-4636-b89b-c58313b596d7\",\"type\":\"Toolbar\"},\"x_range\":{\"id\":\"9fcb5a73-6302-457a-8b58-231b497ac950\",\"type\":\"DataRange1d\"},\"x_scale\":{\"id\":\"f599e90b-7a15-4166-865e-6dd7eee1ea31\",\"type\":\"LinearScale\"},\"y_range\":{\"id\":\"1f914f1e-26b6-4362-baa6-f4ed9bae91ae\",\"type\":\"DataRange1d\"},\"y_scale\":{\"id\":\"d9ed750a-2348-490a-9871-5c18a14160f8\",\"type\":\"LinearScale\"}},\"id\":\"162587d8-457e-4ad3-8646-f6b67d9afcaa\",\"subtype\":\"Figure\",\"type\":\"Plot\"},{\"attributes\":{\"callback\":null,\"data\":{\"x\":{\"__ndarray__\":\"s4dscZTUFsCsUR4MMntMQEacKLQ+z0dAZjHJmj/mKMDavt88hvAvQPepnplzfeQ/iRqKdbXJRUDD41gKeYQzQOEzq9HxPkdAof95CJrpUUCXVoj1RC9SQFRDbl6DGfW//wiVFHpZJEDanZ+ntBVJQPXwzwnJIlNAD4fQrjFbQkApN+KVnwFMQJZQEet7gUJAB8OIkj+NPkA6POlDoostwF2LPGBkvxtAb0MN5dbzFEBG8wS/hXFOQNvWE0osDk1AAx0Rr8ddOEBhAMw5xFA0QMbOy//t9VFAuP2PyC4xN0C918OCG0c4QNvLJkWY3EFAWeQZTmXUSUB6OcmTg1E5QP7aMVQ/kSHA3uDhUYTsDkBfqygeHqg1QBm4Scy720JAYGv9cKPgREC3MSH71K8tQLpNrTITFlRAHXxspr3+SEDjaqfxTofxv1Zkp2gpcSNAyZ7O8pNKUEDDp6vqvAxQQO/AHnRFIlNA\",\"dtype\":\"float64\",\"shape\":[45]},\"y\":{\"__ndarray__\":\"5PVobXIYNEAbI7kdXVw2wEpKDc6nBgHAtMNFMWd/NMAQK6V1J5EJwB7i+V5UxxZA8RXRZPOeIcBhsnMT6sE3wJFnXaFJpxhAhHAj30TDz7/KqmMKc+w9wH/oBR/sckJAJuQKfyZNL0B5ResFsvEoQF959I4Mn0PAoDzn6xhBNkB4LAS7Jfk1wDIPw4n7pChAzQEh+La0F8DzZ9i1DoIowBGmZti8ByRA6Nnjzr/EQsAKobWnnulFQG7mTrGWVzTA9GZR60VCFkCj1E4NSO1BQORR0RpiqwrArZcb6qPaTUBb9JxAoDQ8QETAWzE+bjNAbjnJ9riZNUDio8BMqBFGQJr/aX26KUNALA4ZgbopSUBeJV1jAgM1QH555P1vjyxAc92EhwMyRUBzj8fL9JlDQF+DmsrFiBVA58tVN1cLQMAVcmzzh0k/QDILsl0qHyhAZg3FVBo0OcDfvGZhQ6Y2wHad6WtZ/hjA\",\"dtype\":\"float64\",\"shape\":[45]}},\"selected\":{\"id\":\"0ac9d99f-4445-4568-ac9e-69f19a680599\",\"type\":\"Selection\"},\"selection_policy\":{\"id\":\"7766407d-4b55-46d4-bb9a-74298e71f74a\",\"type\":\"UnionRenderers\"}},\"id\":\"6f41ebaf-8e24-4b1f-9943-8e92b50e40a2\",\"type\":\"ColumnDataSource\"},{\"attributes\":{\"plot\":null,\"text\":\"PCA of Train data\"},\"id\":\"28b7adc3-9383-4511-afaa-69e6be05de92\",\"type\":\"Title\"},{\"attributes\":{\"callback\":null},\"id\":\"1f914f1e-26b6-4362-baa6-f4ed9bae91ae\",\"type\":\"DataRange1d\"},{\"attributes\":{},\"id\":\"d9ed750a-2348-490a-9871-5c18a14160f8\",\"type\":\"LinearScale\"},{\"attributes\":{},\"id\":\"f599e90b-7a15-4166-865e-6dd7eee1ea31\",\"type\":\"LinearScale\"},{\"attributes\":{\"source\":{\"id\":\"6f41ebaf-8e24-4b1f-9943-8e92b50e40a2\",\"type\":\"ColumnDataSource\"}},\"id\":\"677bd7ee-c5eb-42a9-a2d6-be65c7da20d5\",\"type\":\"CDSView\"},{\"attributes\":{\"axis_label\":\"PC1\",\"formatter\":{\"id\":\"00c06ead-0e6d-4bb5-a14e-4e78d94233df\",\"type\":\"BasicTickFormatter\"},\"plot\":{\"id\":\"162587d8-457e-4ad3-8646-f6b67d9afcaa\",\"subtype\":\"Figure\",\"type\":\"Plot\"},\"ticker\":{\"id\":\"f88296ac-67a5-4059-b98c-b4e252b6fa1d\",\"type\":\"BasicTicker\"}},\"id\":\"66a59647-cfb3-48f5-853c-e378edb23566\",\"type\":\"LinearAxis\"},{\"attributes\":{},\"id\":\"f88296ac-67a5-4059-b98c-b4e252b6fa1d\",\"type\":\"BasicTicker\"},{\"attributes\":{\"callback\":null},\"id\":\"9fcb5a73-6302-457a-8b58-231b497ac950\",\"type\":\"DataRange1d\"},{\"attributes\":{\"plot\":{\"id\":\"162587d8-457e-4ad3-8646-f6b67d9afcaa\",\"subtype\":\"Figure\",\"type\":\"Plot\"},\"ticker\":{\"id\":\"f88296ac-67a5-4059-b98c-b4e252b6fa1d\",\"type\":\"BasicTicker\"}},\"id\":\"6414f74a-4633-4213-ba59-4d8b09cf032c\",\"type\":\"Grid\"},{\"attributes\":{\"axis_label\":\"PC2\",\"formatter\":{\"id\":\"9104b613-185a-4605-897a-65b840848b66\",\"type\":\"BasicTickFormatter\"},\"plot\":{\"id\":\"162587d8-457e-4ad3-8646-f6b67d9afcaa\",\"subtype\":\"Figure\",\"type\":\"Plot\"},\"ticker\":{\"id\":\"6da6b1cc-0947-4d17-b3ea-e1d619d0cc1d\",\"type\":\"BasicTicker\"}},\"id\":\"636f73f8-4a94-4984-a157-007a9c1f92e9\",\"type\":\"LinearAxis\"},{\"attributes\":{},\"id\":\"6da6b1cc-0947-4d17-b3ea-e1d619d0cc1d\",\"type\":\"BasicTicker\"},{\"attributes\":{\"dimension\":1,\"plot\":{\"id\":\"162587d8-457e-4ad3-8646-f6b67d9afcaa\",\"subtype\":\"Figure\",\"type\":\"Plot\"},\"ticker\":{\"id\":\"6da6b1cc-0947-4d17-b3ea-e1d619d0cc1d\",\"type\":\"BasicTicker\"}},\"id\":\"4aaa0bc4-228d-4266-b8dc-4213fe5042f6\",\"type\":\"Grid\"},{\"attributes\":{\"bottom_units\":\"screen\",\"fill_alpha\":{\"value\":0.5},\"fill_color\":{\"value\":\"lightgrey\"},\"left_units\":\"screen\",\"level\":\"overlay\",\"line_alpha\":{\"value\":1.0},\"line_color\":{\"value\":\"black\"},\"line_dash\":[4,4],\"line_width\":{\"value\":2},\"plot\":null,\"render_mode\":\"css\",\"right_units\":\"screen\",\"top_units\":\"screen\"},\"id\":\"4a9f91d0-bd1e-4b38-bffa-5b3399973856\",\"type\":\"BoxAnnotation\"},{\"attributes\":{\"fill_color\":{\"value\":\"blue\"},\"line_color\":{\"value\":\"blue\"},\"x\":{\"field\":\"x\"},\"y\":{\"field\":\"y\"}},\"id\":\"bc1afe64-4989-434a-9ccf-3b70f18a4f8f\",\"type\":\"Circle\"},{\"attributes\":{\"callback\":null,\"data\":{\"x\":{\"__ndarray__\":\"/Si/hE7yN8C6LbeVIKI7wMMdNO2D8inAf+ipwzArOcDAL+3Xn/s1wDpQqZT/RC3A0VDAJ94uPcBFfI+FtRMowC2xhdPCKDbAh/tMbl75GsCLjArEw/I5wFIeYVW4nzjA+3HQp7TxKsDw+0WEIxQ2wAcO2cuCFz/A4CDm9licNcDGtBtiHbs5wHoGXaS4TTjAbVP5QWQsuD/era7rU1o3wBKDDJQ51zTAFZbmKzXTN8DIH3gJwY04wP/xZL+6Lj3AUkAxbHHkPsAIDXLNd+AiwJ8e4cfbYTvAYfI139CSN8A69w0ghAQOwC6ADoIpczXAxEDA7yA4QMCjNRjYobcowFv7IezNkTHA7S9ynqPANMCd9IoXjBg7wOLmNaL24TPA46nyBgS0NcDWCO503F4nwNbB0lfCnPu/XvHMQOpZ/b+9PLtTrjkwwPjSg1Z3kD7AT/8gRchQMsCZtgXC/hAwwJlJaCSHEBzAy0yjL87sPcDBp9RjzaMtwHxLFCQxBCNAcBQkl+BpAsB/Qr8LB2MuwDVCpP4iizfABZaDj85/PkDWX3fIOSE9wK48LhtqRzjAX70SodsvOcC+2/mOfvhBwGJMpTrvDzDAK39eBbxjMcCwB+gEkcoCQEnRjZb8iSnAz5Dh2+j+EsAGyAlM39oHQH9cMAtzIzHAZ5wsm+mrIsCDkyzz8YM1wC5kNz067DnANeanmreYJcAjXi1Yz7IrwA9R6qUBfTTAQks43xlAPMBYeSwB/y01wE5I/c2VkzLAANgvIXagREAfaGCbqiguwN+beVChYjvAr+Z9nW/MM8Cpd1pFUEYVwJkdq+B5LDTAhyAPgUT+HsATLFQzmOw1wGeQb5ytOCfAuci/rTgkMsAiDs6wl9A3wFw5pdAaRTrAvZR9mMg6JEBUVyucjKIywM0trS/48ANAxao8AjDuOsDVRS6rQjIVwEG2mc3L7TPASSgSW+zsL8A=\",\"dtype\":\"float64\",\"shape\":[91]},\"y\":{\"__ndarray__\":\"SmsRgBg8IUC7oD0b4RAvwB35A0JM8CnAzl/S3gASSkBQJfXGvD0YQGoNRpWAkzZA1lwfrDmBHkA06UHxc0kYQLJMpdZFZSVA0oe0tBALNMArRmWoxfwiQB9tLdeqCiXAEZb3q3/yJcA+RjvKlI8ywAC+oHGJGzLA70Yo+Dj2jj9zV2of099BQCzMeir/JzbA+MjwoBiKRsBN8tZT0QQzwFwIlUXLmirAntwY8xrtLMDl9QASdnQqwKf2eC5KKzBA4FF8NTxo+T/2Yz1KISY7wN/tM+cnMTtAs/VrBGJsMkAO7adwqZlAwKTby3spuQJAZThzpKyvK0BVxCjuMI00wLZmpr6nPzXAJTwBBDRkN8DXDMhBSWUMwF1IK1kH9B3AZqrQl1vXK0A3EeHgZhVCwLGyZkqlax/AOzHcEWMThT/Ku1RCqQwhwKe/3zZ4OSpAEp6Gdw51TUAcwlPljvMtwG8hT9sUCjbA+X0D8p+aGUB8Y+YlKhgvwIq/wa5PsC/AfQqxRAXdIUAQOz28XxYhwIvoWu+DLDHAw5EC14kYM0DS02Dp/lgWwOYhg1GW6jPAp3XzbwprK8CYCgw0UtQqwAqj9yrVFSHAdmNTQPu9KsAk1tK2sLgswIyMXflGJCjAxuEk8BUAQEAxsLks9bAUQOh0mgDoEzfAAJlW4YL0OcAS5j6QbBwswNEV4Ni+azLAMt49bRyOJsDpE8fUyZodQOw4mT27ditARLpzHfjfJEA5yWe0uVIDQGMEyRFfGCfAbrwjHpwSI8BGqySOoR8WQGe1HsA8u0BALsA+Ef1tNUAxoOniUqJAwGjvlTruXhrAtIczEyab97/+m3lEManJv3XEyYZYuijAQQWdtYxSO0CpGUBQNz8lwA1hTiQy1zJAW1NzRuA1N8CPSam/Y5I2wIlPufEzrjDATpXG7kayBsDPwfIv1iUpwOQFGffjryNAOhm6zdXaJkA=\",\"dtype\":\"float64\",\"shape\":[91]}},\"selected\":{\"id\":\"20a06083-a08f-402e-a166-53e00ae3d1c6\",\"type\":\"Selection\"},\"selection_policy\":{\"id\":\"207017d5-7c07-4632-9ca7-0467df0c7e70\",\"type\":\"UnionRenderers\"}},\"id\":\"2fad3807-14d5-4105-8d10-df5d2260103e\",\"type\":\"ColumnDataSource\"},{\"attributes\":{},\"id\":\"7ab9fdfb-298b-4b61-a00a-fecfda8ae28d\",\"type\":\"PanTool\"},{\"attributes\":{\"data_source\":{\"id\":\"6f41ebaf-8e24-4b1f-9943-8e92b50e40a2\",\"type\":\"ColumnDataSource\"},\"glyph\":{\"id\":\"55788bac-0509-4db3-851e-2ad5507b44bc\",\"type\":\"Circle\"},\"hover_glyph\":null,\"muted_glyph\":null,\"nonselection_glyph\":{\"id\":\"578311d8-8ca8-43a6-806a-dd2afe910f4c\",\"type\":\"Circle\"},\"selection_glyph\":null,\"view\":{\"id\":\"677bd7ee-c5eb-42a9-a2d6-be65c7da20d5\",\"type\":\"CDSView\"}},\"id\":\"4703dd56-5d0b-49f2-963e-c2562d3796aa\",\"type\":\"GlyphRenderer\"},{\"attributes\":{\"fill_alpha\":{\"value\":0.1},\"fill_color\":{\"value\":\"#1f77b4\"},\"line_alpha\":{\"value\":0.1},\"line_color\":{\"value\":\"#1f77b4\"},\"x\":{\"field\":\"x\"},\"y\":{\"field\":\"y\"}},\"id\":\"055b002b-75de-4d85-bdaf-f896de75b7a5\",\"type\":\"Circle\"},{\"attributes\":{},\"id\":\"30243c20-8f2c-4f7f-a05e-e3956e59775e\",\"type\":\"WheelZoomTool\"},{\"attributes\":{\"overlay\":{\"id\":\"4a9f91d0-bd1e-4b38-bffa-5b3399973856\",\"type\":\"BoxAnnotation\"}},\"id\":\"1beec0d7-5cc8-481c-9e26-923bd16df227\",\"type\":\"BoxZoomTool\"},{\"attributes\":{},\"id\":\"5ebd41b2-5f1c-4b19-ad41-495539ca8c9b\",\"type\":\"SaveTool\"},{\"attributes\":{},\"id\":\"350ce84d-0611-4f8a-9f43-8258d5482ea6\",\"type\":\"ResetTool\"},{\"attributes\":{},\"id\":\"7d66043f-2f80-4c49-b7c1-1e7b29a0af66\",\"type\":\"HelpTool\"},{\"attributes\":{},\"id\":\"00c06ead-0e6d-4bb5-a14e-4e78d94233df\",\"type\":\"BasicTickFormatter\"},{\"attributes\":{\"data_source\":{\"id\":\"2fad3807-14d5-4105-8d10-df5d2260103e\",\"type\":\"ColumnDataSource\"},\"glyph\":{\"id\":\"bc1afe64-4989-434a-9ccf-3b70f18a4f8f\",\"type\":\"Circle\"},\"hover_glyph\":null,\"muted_glyph\":null,\"nonselection_glyph\":{\"id\":\"055b002b-75de-4d85-bdaf-f896de75b7a5\",\"type\":\"Circle\"},\"selection_glyph\":null,\"view\":{\"id\":\"a2ddf5c0-8f4e-4312-96e6-f199ecabe5a8\",\"type\":\"CDSView\"}},\"id\":\"790faec9-4ab0-4793-9e2e-62921cf31ce8\",\"type\":\"GlyphRenderer\"},{\"attributes\":{\"source\":{\"id\":\"2fad3807-14d5-4105-8d10-df5d2260103e\",\"type\":\"ColumnDataSource\"}},\"id\":\"a2ddf5c0-8f4e-4312-96e6-f199ecabe5a8\",\"type\":\"CDSView\"},{\"attributes\":{},\"id\":\"7766407d-4b55-46d4-bb9a-74298e71f74a\",\"type\":\"UnionRenderers\"}],\"root_ids\":[\"162587d8-457e-4ad3-8646-f6b67d9afcaa\"]},\"title\":\"Bokeh Application\",\"version\":\"0.12.16\"}};\n",
+ " var render_items = [{\"docid\":\"f581f7f5-ad4a-4b4d-9c36-5011c136da3e\",\"elementid\":\"fd92f5ec-a952-4c56-9f57-280a5da5f624\",\"modelid\":\"162587d8-457e-4ad3-8646-f6b67d9afcaa\"}];\n",
+ " root.Bokeh.embed.embed_items_notebook(docs_json, render_items);\n",
+ "\n",
+ " }\n",
+ " if (root.Bokeh !== undefined) {\n",
+ " embed_document(root);\n",
+ " } else {\n",
+ " var attempts = 0;\n",
+ " var timer = setInterval(function(root) {\n",
+ " if (root.Bokeh !== undefined) {\n",
+ " embed_document(root);\n",
+ " clearInterval(timer);\n",
+ " }\n",
+ " attempts++;\n",
+ " if (attempts > 100) {\n",
+ " console.log(\"Bokeh: ERROR: Unable to run BokehJS code because BokehJS library is missing\")\n",
+ " clearInterval(timer);\n",
+ " }\n",
+ " }, 10, root)\n",
+ " }\n",
+ "})(window);"
+ ],
+ "application/vnd.bokehjs_exec.v0+json": ""
+ },
+ "metadata": {
+ "application/vnd.bokehjs_exec.v0+json": {
+ "id": "162587d8-457e-4ad3-8646-f6b67d9afcaa"
+ }
+ },
+ "output_type": "display_data"
+ }
+ ],
"source": [
"p = figure(plot_width=400, plot_height=400, title=\"PCA of Train data\")\n",
"p.circle(z_tr[y_tr==0, 0], z_tr[y_tr==0, 1], line_color=\"orange\", fill_color=\"orange\")\n",
@@ -693,9 +1970,60 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 27,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/javascript": [
+ "(function(root) {\n",
+ " function embed_document(root) {\n",
+ " \n",
+ " var docs_json = {\"8569d5e9-157c-4830-b35b-978236e6dc15\":{\"roots\":{\"references\":[{\"attributes\":{\"plot\":null,\"text\":\"PCA of Train data\"},\"id\":\"6cdf3505-a1b2-4907-ac0b-5aff466f3395\",\"type\":\"Title\"},{\"attributes\":{},\"id\":\"6a0308b0-15ee-4b19-85fd-d89463b3e43c\",\"type\":\"Selection\"},{\"attributes\":{},\"id\":\"d9d33ed3-0481-437f-ba80-47ea8a19d71e\",\"type\":\"BasicTickFormatter\"},{\"attributes\":{\"callback\":null},\"id\":\"e48717ee-bf3b-4969-8118-daa1b2c244ea\",\"type\":\"DataRange1d\"},{\"attributes\":{},\"id\":\"c0456d6f-b7f4-495b-98c7-9a353993e75f\",\"type\":\"ResetTool\"},{\"attributes\":{},\"id\":\"83504468-f82a-4bf7-ae7b-5534254eb89f\",\"type\":\"UnionRenderers\"},{\"attributes\":{\"fill_alpha\":{\"value\":0.1},\"fill_color\":{\"value\":\"#1f77b4\"},\"line_alpha\":{\"value\":0.1},\"line_color\":{\"value\":\"#1f77b4\"},\"x\":{\"field\":\"x\"},\"y\":{\"field\":\"y\"}},\"id\":\"ca1a5fd6-322b-4699-9f7f-c77e4d41f869\",\"type\":\"Circle\"},{\"attributes\":{\"callback\":null,\"data\":{\"x\":{\"__ndarray__\":\"/Si/hE7yN8C6LbeVIKI7wMMdNO2D8inAf+ipwzArOcDAL+3Xn/s1wDpQqZT/RC3A0VDAJ94uPcBFfI+FtRMowC2xhdPCKDbAh/tMbl75GsCLjArEw/I5wFIeYVW4nzjA+3HQp7TxKsDw+0WEIxQ2wAcO2cuCFz/A4CDm9licNcDGtBtiHbs5wHoGXaS4TTjAbVP5QWQsuD/era7rU1o3wBKDDJQ51zTAFZbmKzXTN8DIH3gJwY04wP/xZL+6Lj3AUkAxbHHkPsAIDXLNd+AiwJ8e4cfbYTvAYfI139CSN8A69w0ghAQOwC6ADoIpczXAxEDA7yA4QMCjNRjYobcowFv7IezNkTHA7S9ynqPANMCd9IoXjBg7wOLmNaL24TPA46nyBgS0NcDWCO503F4nwNbB0lfCnPu/XvHMQOpZ/b+9PLtTrjkwwPjSg1Z3kD7AT/8gRchQMsCZtgXC/hAwwJlJaCSHEBzAy0yjL87sPcDBp9RjzaMtwHxLFCQxBCNAcBQkl+BpAsB/Qr8LB2MuwDVCpP4iizfABZaDj85/PkDWX3fIOSE9wK48LhtqRzjAX70SodsvOcC+2/mOfvhBwGJMpTrvDzDAK39eBbxjMcCwB+gEkcoCQEnRjZb8iSnAz5Dh2+j+EsAGyAlM39oHQH9cMAtzIzHAZ5wsm+mrIsCDkyzz8YM1wC5kNz067DnANeanmreYJcAjXi1Yz7IrwA9R6qUBfTTAQks43xlAPMBYeSwB/y01wE5I/c2VkzLAANgvIXagREAfaGCbqiguwN+beVChYjvAr+Z9nW/MM8Cpd1pFUEYVwJkdq+B5LDTAhyAPgUT+HsATLFQzmOw1wGeQb5ytOCfAuci/rTgkMsAiDs6wl9A3wFw5pdAaRTrAvZR9mMg6JEBUVyucjKIywM0trS/48ANAxao8AjDuOsDVRS6rQjIVwEG2mc3L7TPASSgSW+zsL8A=\",\"dtype\":\"float64\",\"shape\":[91]},\"y\":{\"__ndarray__\":\"SmsRgBg8IUC7oD0b4RAvwB35A0JM8CnAzl/S3gASSkBQJfXGvD0YQGoNRpWAkzZA1lwfrDmBHkA06UHxc0kYQLJMpdZFZSVA0oe0tBALNMArRmWoxfwiQB9tLdeqCiXAEZb3q3/yJcA+RjvKlI8ywAC+oHGJGzLA70Yo+Dj2jj9zV2of099BQCzMeir/JzbA+MjwoBiKRsBN8tZT0QQzwFwIlUXLmirAntwY8xrtLMDl9QASdnQqwKf2eC5KKzBA4FF8NTxo+T/2Yz1KISY7wN/tM+cnMTtAs/VrBGJsMkAO7adwqZlAwKTby3spuQJAZThzpKyvK0BVxCjuMI00wLZmpr6nPzXAJTwBBDRkN8DXDMhBSWUMwF1IK1kH9B3AZqrQl1vXK0A3EeHgZhVCwLGyZkqlax/AOzHcEWMThT/Ku1RCqQwhwKe/3zZ4OSpAEp6Gdw51TUAcwlPljvMtwG8hT9sUCjbA+X0D8p+aGUB8Y+YlKhgvwIq/wa5PsC/AfQqxRAXdIUAQOz28XxYhwIvoWu+DLDHAw5EC14kYM0DS02Dp/lgWwOYhg1GW6jPAp3XzbwprK8CYCgw0UtQqwAqj9yrVFSHAdmNTQPu9KsAk1tK2sLgswIyMXflGJCjAxuEk8BUAQEAxsLks9bAUQOh0mgDoEzfAAJlW4YL0OcAS5j6QbBwswNEV4Ni+azLAMt49bRyOJsDpE8fUyZodQOw4mT27ditARLpzHfjfJEA5yWe0uVIDQGMEyRFfGCfAbrwjHpwSI8BGqySOoR8WQGe1HsA8u0BALsA+Ef1tNUAxoOniUqJAwGjvlTruXhrAtIczEyab97/+m3lEManJv3XEyYZYuijAQQWdtYxSO0CpGUBQNz8lwA1hTiQy1zJAW1NzRuA1N8CPSam/Y5I2wIlPufEzrjDATpXG7kayBsDPwfIv1iUpwOQFGffjryNAOhm6zdXaJkA=\",\"dtype\":\"float64\",\"shape\":[91]}},\"selected\":{\"id\":\"609655c7-ca0a-423a-a954-f4399ff7e1b1\",\"type\":\"Selection\"},\"selection_policy\":{\"id\":\"83504468-f82a-4bf7-ae7b-5534254eb89f\",\"type\":\"UnionRenderers\"}},\"id\":\"2c6b05b4-3dea-437e-85b6-d6fd26465ccd\",\"type\":\"ColumnDataSource\"},{\"attributes\":{},\"id\":\"609655c7-ca0a-423a-a954-f4399ff7e1b1\",\"type\":\"Selection\"},{\"attributes\":{\"data_source\":{\"id\":\"5f5a8c6c-8687-4192-aa48-f8aa6e6ebcdb\",\"type\":\"ColumnDataSource\"},\"glyph\":{\"id\":\"c48cf889-1333-4f02-b9b9-47dcabe0bb11\",\"type\":\"Circle\"},\"hover_glyph\":null,\"muted_glyph\":null,\"nonselection_glyph\":{\"id\":\"b8b8b120-30d7-4df9-919b-208d14e0d65b\",\"type\":\"Circle\"},\"selection_glyph\":null,\"view\":{\"id\":\"495c8052-152a-4c48-8c3d-e63a2614b1ca\",\"type\":\"CDSView\"}},\"id\":\"bd77004f-0489-4dec-8f84-df0960786485\",\"type\":\"GlyphRenderer\"},{\"attributes\":{},\"id\":\"8c4d7994-41c3-4c45-a459-a1ba0ffab41a\",\"type\":\"SaveTool\"},{\"attributes\":{},\"id\":\"fa323ccb-57fd-41f8-8522-884d3656e065\",\"type\":\"UnionRenderers\"},{\"attributes\":{\"fill_color\":{\"value\":\"blue\"},\"line_color\":{\"value\":\"blue\"},\"x\":{\"field\":\"x\"},\"y\":{\"field\":\"y\"}},\"id\":\"38e529b8-e78d-4d85-bc5a-176b1ed6e178\",\"type\":\"Circle\"},{\"attributes\":{\"axis_label\":\"PC2 (4.34%)\",\"formatter\":{\"id\":\"4abadc12-fd9e-4b6a-89e7-431cf3a75ddf\",\"type\":\"BasicTickFormatter\"},\"plot\":{\"id\":\"390e51ff-5e3c-4937-b317-74c5a60029ce\",\"subtype\":\"Figure\",\"type\":\"Plot\"},\"ticker\":{\"id\":\"7fdb54ab-10a6-4b09-b51e-3b050fc3bb97\",\"type\":\"BasicTicker\"}},\"id\":\"c99f729d-8d33-4caf-8de5-d53abcd686b9\",\"type\":\"LinearAxis\"},{\"attributes\":{\"plot\":{\"id\":\"390e51ff-5e3c-4937-b317-74c5a60029ce\",\"subtype\":\"Figure\",\"type\":\"Plot\"},\"ticker\":{\"id\":\"039a5711-406b-4a5a-a42f-c8de30ced1c4\",\"type\":\"BasicTicker\"}},\"id\":\"8c2df2b2-e145-4505-987a-82c74afe4988\",\"type\":\"Grid\"},{\"attributes\":{},\"id\":\"1d874b4c-adce-4779-97b8-3ea94b8bb012\",\"type\":\"LinearScale\"},{\"attributes\":{},\"id\":\"ad0b799f-0136-47e4-8b25-ad01995ce2af\",\"type\":\"PanTool\"},{\"attributes\":{},\"id\":\"4abadc12-fd9e-4b6a-89e7-431cf3a75ddf\",\"type\":\"BasicTickFormatter\"},{\"attributes\":{},\"id\":\"0582132a-56c4-4ab9-b4cf-259ed9fb9b1c\",\"type\":\"WheelZoomTool\"},{\"attributes\":{},\"id\":\"7fdb54ab-10a6-4b09-b51e-3b050fc3bb97\",\"type\":\"BasicTicker\"},{\"attributes\":{\"active_drag\":\"auto\",\"active_inspect\":\"auto\",\"active_scroll\":\"auto\",\"active_tap\":\"auto\",\"tools\":[{\"id\":\"ad0b799f-0136-47e4-8b25-ad01995ce2af\",\"type\":\"PanTool\"},{\"id\":\"0582132a-56c4-4ab9-b4cf-259ed9fb9b1c\",\"type\":\"WheelZoomTool\"},{\"id\":\"2a316f43-749b-438b-bdac-d17e0e6e238a\",\"type\":\"BoxZoomTool\"},{\"id\":\"8c4d7994-41c3-4c45-a459-a1ba0ffab41a\",\"type\":\"SaveTool\"},{\"id\":\"c0456d6f-b7f4-495b-98c7-9a353993e75f\",\"type\":\"ResetTool\"},{\"id\":\"1e63ae9d-2b09-409b-bc14-bff773d7b7dc\",\"type\":\"HelpTool\"}]},\"id\":\"9a9bcbb2-3163-4ff0-8839-6b822fe59f97\",\"type\":\"Toolbar\"},{\"attributes\":{\"fill_alpha\":{\"value\":0.1},\"fill_color\":{\"value\":\"#1f77b4\"},\"line_alpha\":{\"value\":0.1},\"line_color\":{\"value\":\"#1f77b4\"},\"x\":{\"field\":\"x\"},\"y\":{\"field\":\"y\"}},\"id\":\"b8b8b120-30d7-4df9-919b-208d14e0d65b\",\"type\":\"Circle\"},{\"attributes\":{},\"id\":\"2da31b5d-fb5f-498f-835e-4f2c71c9b3a4\",\"type\":\"LinearScale\"},{\"attributes\":{\"below\":[{\"id\":\"819d78a8-e36f-497f-a1f1-9eaea3c71e05\",\"type\":\"LinearAxis\"}],\"left\":[{\"id\":\"c99f729d-8d33-4caf-8de5-d53abcd686b9\",\"type\":\"LinearAxis\"}],\"plot_height\":400,\"plot_width\":400,\"renderers\":[{\"id\":\"819d78a8-e36f-497f-a1f1-9eaea3c71e05\",\"type\":\"LinearAxis\"},{\"id\":\"8c2df2b2-e145-4505-987a-82c74afe4988\",\"type\":\"Grid\"},{\"id\":\"c99f729d-8d33-4caf-8de5-d53abcd686b9\",\"type\":\"LinearAxis\"},{\"id\":\"6a6dff96-2af8-4b2a-ac25-7dcdbf91f7d2\",\"type\":\"Grid\"},{\"id\":\"0da4ffd8-03dd-4d25-896c-a2cb21a991dc\",\"type\":\"BoxAnnotation\"},{\"id\":\"bd77004f-0489-4dec-8f84-df0960786485\",\"type\":\"GlyphRenderer\"},{\"id\":\"438675e5-f102-4891-8bcf-c2a9d3c48b11\",\"type\":\"GlyphRenderer\"}],\"title\":{\"id\":\"6cdf3505-a1b2-4907-ac0b-5aff466f3395\",\"type\":\"Title\"},\"toolbar\":{\"id\":\"9a9bcbb2-3163-4ff0-8839-6b822fe59f97\",\"type\":\"Toolbar\"},\"x_range\":{\"id\":\"29b236fa-aed7-4bee-8d07-f0d3fcb965a1\",\"type\":\"DataRange1d\"},\"x_scale\":{\"id\":\"1d874b4c-adce-4779-97b8-3ea94b8bb012\",\"type\":\"LinearScale\"},\"y_range\":{\"id\":\"e48717ee-bf3b-4969-8118-daa1b2c244ea\",\"type\":\"DataRange1d\"},\"y_scale\":{\"id\":\"2da31b5d-fb5f-498f-835e-4f2c71c9b3a4\",\"type\":\"LinearScale\"}},\"id\":\"390e51ff-5e3c-4937-b317-74c5a60029ce\",\"subtype\":\"Figure\",\"type\":\"Plot\"},{\"attributes\":{\"fill_color\":{\"value\":\"orange\"},\"line_color\":{\"value\":\"orange\"},\"x\":{\"field\":\"x\"},\"y\":{\"field\":\"y\"}},\"id\":\"c48cf889-1333-4f02-b9b9-47dcabe0bb11\",\"type\":\"Circle\"},{\"attributes\":{\"axis_label\":\"PC1 (8.19%)\",\"formatter\":{\"id\":\"d9d33ed3-0481-437f-ba80-47ea8a19d71e\",\"type\":\"BasicTickFormatter\"},\"plot\":{\"id\":\"390e51ff-5e3c-4937-b317-74c5a60029ce\",\"subtype\":\"Figure\",\"type\":\"Plot\"},\"ticker\":{\"id\":\"039a5711-406b-4a5a-a42f-c8de30ced1c4\",\"type\":\"BasicTicker\"}},\"id\":\"819d78a8-e36f-497f-a1f1-9eaea3c71e05\",\"type\":\"LinearAxis\"},{\"attributes\":{\"bottom_units\":\"screen\",\"fill_alpha\":{\"value\":0.5},\"fill_color\":{\"value\":\"lightgrey\"},\"left_units\":\"screen\",\"level\":\"overlay\",\"line_alpha\":{\"value\":1.0},\"line_color\":{\"value\":\"black\"},\"line_dash\":[4,4],\"line_width\":{\"value\":2},\"plot\":null,\"render_mode\":\"css\",\"right_units\":\"screen\",\"top_units\":\"screen\"},\"id\":\"0da4ffd8-03dd-4d25-896c-a2cb21a991dc\",\"type\":\"BoxAnnotation\"},{\"attributes\":{\"callback\":null},\"id\":\"29b236fa-aed7-4bee-8d07-f0d3fcb965a1\",\"type\":\"DataRange1d\"},{\"attributes\":{},\"id\":\"039a5711-406b-4a5a-a42f-c8de30ced1c4\",\"type\":\"BasicTicker\"},{\"attributes\":{\"dimension\":1,\"plot\":{\"id\":\"390e51ff-5e3c-4937-b317-74c5a60029ce\",\"subtype\":\"Figure\",\"type\":\"Plot\"},\"ticker\":{\"id\":\"7fdb54ab-10a6-4b09-b51e-3b050fc3bb97\",\"type\":\"BasicTicker\"}},\"id\":\"6a6dff96-2af8-4b2a-ac25-7dcdbf91f7d2\",\"type\":\"Grid\"},{\"attributes\":{\"data_source\":{\"id\":\"2c6b05b4-3dea-437e-85b6-d6fd26465ccd\",\"type\":\"ColumnDataSource\"},\"glyph\":{\"id\":\"38e529b8-e78d-4d85-bc5a-176b1ed6e178\",\"type\":\"Circle\"},\"hover_glyph\":null,\"muted_glyph\":null,\"nonselection_glyph\":{\"id\":\"ca1a5fd6-322b-4699-9f7f-c77e4d41f869\",\"type\":\"Circle\"},\"selection_glyph\":null,\"view\":{\"id\":\"d819e735-b24e-4248-861f-06dd05f4e277\",\"type\":\"CDSView\"}},\"id\":\"438675e5-f102-4891-8bcf-c2a9d3c48b11\",\"type\":\"GlyphRenderer\"},{\"attributes\":{\"source\":{\"id\":\"2c6b05b4-3dea-437e-85b6-d6fd26465ccd\",\"type\":\"ColumnDataSource\"}},\"id\":\"d819e735-b24e-4248-861f-06dd05f4e277\",\"type\":\"CDSView\"},{\"attributes\":{},\"id\":\"1e63ae9d-2b09-409b-bc14-bff773d7b7dc\",\"type\":\"HelpTool\"},{\"attributes\":{\"source\":{\"id\":\"5f5a8c6c-8687-4192-aa48-f8aa6e6ebcdb\",\"type\":\"ColumnDataSource\"}},\"id\":\"495c8052-152a-4c48-8c3d-e63a2614b1ca\",\"type\":\"CDSView\"},{\"attributes\":{\"callback\":null,\"data\":{\"x\":{\"__ndarray__\":\"s4dscZTUFsCsUR4MMntMQEacKLQ+z0dAZjHJmj/mKMDavt88hvAvQPepnplzfeQ/iRqKdbXJRUDD41gKeYQzQOEzq9HxPkdAof95CJrpUUCXVoj1RC9SQFRDbl6DGfW//wiVFHpZJEDanZ+ntBVJQPXwzwnJIlNAD4fQrjFbQkApN+KVnwFMQJZQEet7gUJAB8OIkj+NPkA6POlDoostwF2LPGBkvxtAb0MN5dbzFEBG8wS/hXFOQNvWE0osDk1AAx0Rr8ddOEBhAMw5xFA0QMbOy//t9VFAuP2PyC4xN0C918OCG0c4QNvLJkWY3EFAWeQZTmXUSUB6OcmTg1E5QP7aMVQ/kSHA3uDhUYTsDkBfqygeHqg1QBm4Scy720JAYGv9cKPgREC3MSH71K8tQLpNrTITFlRAHXxspr3+SEDjaqfxTofxv1Zkp2gpcSNAyZ7O8pNKUEDDp6vqvAxQQO/AHnRFIlNA\",\"dtype\":\"float64\",\"shape\":[45]},\"y\":{\"__ndarray__\":\"5PVobXIYNEAbI7kdXVw2wEpKDc6nBgHAtMNFMWd/NMAQK6V1J5EJwB7i+V5UxxZA8RXRZPOeIcBhsnMT6sE3wJFnXaFJpxhAhHAj30TDz7/KqmMKc+w9wH/oBR/sckJAJuQKfyZNL0B5ResFsvEoQF959I4Mn0PAoDzn6xhBNkB4LAS7Jfk1wDIPw4n7pChAzQEh+La0F8DzZ9i1DoIowBGmZti8ByRA6Nnjzr/EQsAKobWnnulFQG7mTrGWVzTA9GZR60VCFkCj1E4NSO1BQORR0RpiqwrArZcb6qPaTUBb9JxAoDQ8QETAWzE+bjNAbjnJ9riZNUDio8BMqBFGQJr/aX26KUNALA4ZgbopSUBeJV1jAgM1QH555P1vjyxAc92EhwMyRUBzj8fL9JlDQF+DmsrFiBVA58tVN1cLQMAVcmzzh0k/QDILsl0qHyhAZg3FVBo0OcDfvGZhQ6Y2wHad6WtZ/hjA\",\"dtype\":\"float64\",\"shape\":[45]}},\"selected\":{\"id\":\"6a0308b0-15ee-4b19-85fd-d89463b3e43c\",\"type\":\"Selection\"},\"selection_policy\":{\"id\":\"fa323ccb-57fd-41f8-8522-884d3656e065\",\"type\":\"UnionRenderers\"}},\"id\":\"5f5a8c6c-8687-4192-aa48-f8aa6e6ebcdb\",\"type\":\"ColumnDataSource\"},{\"attributes\":{\"overlay\":{\"id\":\"0da4ffd8-03dd-4d25-896c-a2cb21a991dc\",\"type\":\"BoxAnnotation\"}},\"id\":\"2a316f43-749b-438b-bdac-d17e0e6e238a\",\"type\":\"BoxZoomTool\"}],\"root_ids\":[\"390e51ff-5e3c-4937-b317-74c5a60029ce\"]},\"title\":\"Bokeh Application\",\"version\":\"0.12.16\"}};\n",
+ " var render_items = [{\"docid\":\"8569d5e9-157c-4830-b35b-978236e6dc15\",\"elementid\":\"d0c9488f-b9af-4105-b2c0-dd93310660ef\",\"modelid\":\"390e51ff-5e3c-4937-b317-74c5a60029ce\"}];\n",
+ " root.Bokeh.embed.embed_items_notebook(docs_json, render_items);\n",
+ "\n",
+ " }\n",
+ " if (root.Bokeh !== undefined) {\n",
+ " embed_document(root);\n",
+ " } else {\n",
+ " var attempts = 0;\n",
+ " var timer = setInterval(function(root) {\n",
+ " if (root.Bokeh !== undefined) {\n",
+ " embed_document(root);\n",
+ " clearInterval(timer);\n",
+ " }\n",
+ " attempts++;\n",
+ " if (attempts > 100) {\n",
+ " console.log(\"Bokeh: ERROR: Unable to run BokehJS code because BokehJS library is missing\")\n",
+ " clearInterval(timer);\n",
+ " }\n",
+ " }, 10, root)\n",
+ " }\n",
+ "})(window);"
+ ],
+ "application/vnd.bokehjs_exec.v0+json": ""
+ },
+ "metadata": {
+ "application/vnd.bokehjs_exec.v0+json": {
+ "id": "390e51ff-5e3c-4937-b317-74c5a60029ce"
+ }
+ },
+ "output_type": "display_data"
+ }
+ ],
"source": [
"vars = pca.explained_variance_ratio_\n",
"p = figure(plot_width=400, plot_height=400, title=\"PCA of Train data\")\n",
@@ -718,11 +2046,70 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 32,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/javascript": [
+ "(function(root) {\n",
+ " function embed_document(root) {\n",
+ " \n",
+ " var docs_json = {\"9462ff19-ef8a-43b7-bc28-c2ae0b0ca382\":{\"roots\":{\"references\":[{\"attributes\":{},\"id\":\"b378cb1e-4305-4c29-a742-a30eacc219c4\",\"type\":\"PanTool\"},{\"attributes\":{\"bottom_units\":\"screen\",\"fill_alpha\":{\"value\":0.5},\"fill_color\":{\"value\":\"lightgrey\"},\"left_units\":\"screen\",\"level\":\"overlay\",\"line_alpha\":{\"value\":1.0},\"line_color\":{\"value\":\"black\"},\"line_dash\":[4,4],\"line_width\":{\"value\":2},\"plot\":null,\"render_mode\":\"css\",\"right_units\":\"screen\",\"top_units\":\"screen\"},\"id\":\"2089e332-ca04-4e4d-bbe0-b5b0bbd065db\",\"type\":\"BoxAnnotation\"},{\"attributes\":{\"active_drag\":\"auto\",\"active_inspect\":\"auto\",\"active_scroll\":\"auto\",\"active_tap\":\"auto\",\"tools\":[{\"id\":\"b378cb1e-4305-4c29-a742-a30eacc219c4\",\"type\":\"PanTool\"},{\"id\":\"43630d74-5f58-4a69-b8cc-09b48ec548bc\",\"type\":\"WheelZoomTool\"},{\"id\":\"2cfc85ae-c3f1-433a-a8af-2f294f9e339c\",\"type\":\"BoxZoomTool\"},{\"id\":\"0d7d47eb-f357-4219-be39-7f6c3913df48\",\"type\":\"SaveTool\"},{\"id\":\"8783b23a-5b83-442b-baae-22d06a3d77de\",\"type\":\"ResetTool\"},{\"id\":\"83816b07-f311-491a-8543-c26247ea8a36\",\"type\":\"HelpTool\"}]},\"id\":\"ada31b3a-3c21-46c4-9768-41986fcef0ea\",\"type\":\"Toolbar\"},{\"attributes\":{},\"id\":\"43630d74-5f58-4a69-b8cc-09b48ec548bc\",\"type\":\"WheelZoomTool\"},{\"attributes\":{\"overlay\":{\"id\":\"2089e332-ca04-4e4d-bbe0-b5b0bbd065db\",\"type\":\"BoxAnnotation\"}},\"id\":\"2cfc85ae-c3f1-433a-a8af-2f294f9e339c\",\"type\":\"BoxZoomTool\"},{\"attributes\":{},\"id\":\"0d7d47eb-f357-4219-be39-7f6c3913df48\",\"type\":\"SaveTool\"},{\"attributes\":{\"callback\":null},\"id\":\"eb051b83-72e5-4188-b22d-798ba987aab6\",\"type\":\"DataRange1d\"},{\"attributes\":{},\"id\":\"8783b23a-5b83-442b-baae-22d06a3d77de\",\"type\":\"ResetTool\"},{\"attributes\":{},\"id\":\"28f202ff-4ff3-4908-8881-242f880d380f\",\"type\":\"UnionRenderers\"},{\"attributes\":{\"axis_label\":\"PC2 (4.34%)\",\"formatter\":{\"id\":\"ebe9c16d-612d-4f7b-bbac-a61ecdc4d0f1\",\"type\":\"BasicTickFormatter\"},\"plot\":{\"id\":\"3b01317b-4560-4a65-ba6d-7e2742aa01c3\",\"subtype\":\"Figure\",\"type\":\"Plot\"},\"ticker\":{\"id\":\"0066e7da-e2c2-41e8-a3fe-3990150548a4\",\"type\":\"BasicTicker\"}},\"id\":\"313df7bc-1e81-4fb1-8f77-7f2be3aa608f\",\"type\":\"LinearAxis\"},{\"attributes\":{},\"id\":\"83816b07-f311-491a-8543-c26247ea8a36\",\"type\":\"HelpTool\"},{\"attributes\":{},\"id\":\"ebe9c16d-612d-4f7b-bbac-a61ecdc4d0f1\",\"type\":\"BasicTickFormatter\"},{\"attributes\":{\"fill_alpha\":{\"value\":0.1},\"fill_color\":{\"value\":\"#1f77b4\"},\"line_alpha\":{\"value\":0.1},\"line_color\":{\"value\":\"#1f77b4\"},\"x\":{\"field\":\"x\"},\"y\":{\"field\":\"y\"}},\"id\":\"c0b5b80d-9461-4c9a-8149-4f73129df83c\",\"type\":\"Circle\"},{\"attributes\":{\"data_source\":{\"id\":\"024379a1-fd21-4dd2-b715-dfb296d7e044\",\"type\":\"ColumnDataSource\"},\"glyph\":{\"id\":\"0870a187-f7e5-4cd6-8d42-55f96bb5461c\",\"type\":\"Circle\"},\"hover_glyph\":null,\"muted_glyph\":null,\"nonselection_glyph\":{\"id\":\"c0b5b80d-9461-4c9a-8149-4f73129df83c\",\"type\":\"Circle\"},\"selection_glyph\":null,\"view\":{\"id\":\"d29987fe-1bd6-4fc5-9a14-95d3cca6b413\",\"type\":\"CDSView\"}},\"id\":\"7cbf1114-36ef-4d48-b794-ff9274370312\",\"type\":\"GlyphRenderer\"},{\"attributes\":{\"source\":{\"id\":\"024379a1-fd21-4dd2-b715-dfb296d7e044\",\"type\":\"ColumnDataSource\"}},\"id\":\"d29987fe-1bd6-4fc5-9a14-95d3cca6b413\",\"type\":\"CDSView\"},{\"attributes\":{\"callback\":null,\"data\":{\"x\":{\"__ndarray__\":\"Da7fNG/ZJkBX5akTCONLQGb3T+26HENA3OF45a/OJ0A+JQtYuggxwMYlG5/G+xdApYP2gR10I0A3HiM2XrNBQNiOs4ipuhdAMafuYwmhPED949T2x+grQDrwNWw3Eh5Axip2+RdmOkBJaK26QqNMQLygwDyKsEVAooGu1FToMcBt/YFUOrkrQEdMTS6sgzpAge2H7Ye+TUAPRy+5QxVPQMEVm/y+ODlAmKTj5CPNTUAW77J6yXFKQFzxE58baUVAYpWdPdxUKkAGJqtYjfdBQE1u1lwi4EZAMnZzibN0SECqWw5yqmxKQN1ek4f7K0FAysHtN/+MOkAN0LlLVmg0QNMNQXxgUUFAPd/CO/QrVEC95q9DZpMrQHT8e3lxmATAHRhOYh5/PkCmr3vvifUyQCxme02yAzVATeUPDNgyQEC6639iDPEsQGogX/VFfUZAyDbFMA1qT0ARGOcQsxs2QFcOicPHYTpAUIm5K/zeQkA=\",\"dtype\":\"float64\",\"shape\":[46]},\"y\":{\"__ndarray__\":\"0xj+K4X0BkAj47n5AJ81wF5jdJKTKDvADYnUrxgY+T9yGkH3vuk6QHNMAWPEpS5ABHLTkA6hL0DfG3VYvrQhQDA7ukLW8RvAXWkywkuLMkAdV8lfgP4sQAz1wqSZXRtACVDi38HRBsCTlfU106YEQOVgO6PDxijAHohBcumMQEC40j2lqLAjwFuxwupljR7Atk0pha0OIsCvBQh9al4uwGTcLA36tA1AuclownL4OMC2keS665cwwJWuUjRjpRpA7v2Egz3IAcDvyCxRfnMkwOJMp66qwzDA0Z6z+SVPMMD/VDyPhj0zwKD2HyVJ4R3A+bJ3Rma6EEAkodSlxMRBQD/zIkvPa01AKBCUmVGLEsB2JCFLCfQwwAfmxFWGqkZAWzW/3xqtHUBRaZkJWDUtQKg8OGl9bSlAAORyNSRh0D+BmYhY9QkgQOYI4pwItTHA5eLVdfOlI8CAgsx3axhJQKYfJ+x0iytAeYCv7TRIFMA=\",\"dtype\":\"float64\",\"shape\":[46]}},\"selected\":{\"id\":\"b59c2931-86f7-4e88-9cd4-edf5927916ba\",\"type\":\"Selection\"},\"selection_policy\":{\"id\":\"670bdd62-8431-437d-a621-aeb874396b4a\",\"type\":\"UnionRenderers\"}},\"id\":\"09cd2f5d-a2e5-461b-a5ae-763b5682b3a7\",\"type\":\"ColumnDataSource\"},{\"attributes\":{},\"id\":\"865697d8-682e-4995-8a94-bc9eb4deac4f\",\"type\":\"BasicTickFormatter\"},{\"attributes\":{},\"id\":\"047e2375-f530-4bbb-ba68-2e759bb40ac2\",\"type\":\"LinearScale\"},{\"attributes\":{\"dimension\":1,\"plot\":{\"id\":\"3b01317b-4560-4a65-ba6d-7e2742aa01c3\",\"subtype\":\"Figure\",\"type\":\"Plot\"},\"ticker\":{\"id\":\"0066e7da-e2c2-41e8-a3fe-3990150548a4\",\"type\":\"BasicTicker\"}},\"id\":\"11436559-e5fa-4e94-93d1-3e57f22d24b8\",\"type\":\"Grid\"},{\"attributes\":{},\"id\":\"0066e7da-e2c2-41e8-a3fe-3990150548a4\",\"type\":\"BasicTicker\"},{\"attributes\":{\"fill_color\":{\"value\":\"blue\"},\"line_color\":{\"value\":\"blue\"},\"x\":{\"field\":\"x\"},\"y\":{\"field\":\"y\"}},\"id\":\"0870a187-f7e5-4cd6-8d42-55f96bb5461c\",\"type\":\"Circle\"},{\"attributes\":{},\"id\":\"670bdd62-8431-437d-a621-aeb874396b4a\",\"type\":\"UnionRenderers\"},{\"attributes\":{\"axis_label\":\"PC1 (8.19%)\",\"formatter\":{\"id\":\"865697d8-682e-4995-8a94-bc9eb4deac4f\",\"type\":\"BasicTickFormatter\"},\"plot\":{\"id\":\"3b01317b-4560-4a65-ba6d-7e2742aa01c3\",\"subtype\":\"Figure\",\"type\":\"Plot\"},\"ticker\":{\"id\":\"13528d4a-0f2e-4629-810d-2de948b9833f\",\"type\":\"BasicTicker\"}},\"id\":\"ace706a4-075f-4d90-9264-d4abce004f08\",\"type\":\"LinearAxis\"},{\"attributes\":{\"fill_color\":{\"value\":\"orange\"},\"line_color\":{\"value\":\"orange\"},\"x\":{\"field\":\"x\"},\"y\":{\"field\":\"y\"}},\"id\":\"2148dbde-ad02-4142-9239-9de7ea263ed6\",\"type\":\"Circle\"},{\"attributes\":{\"fill_alpha\":{\"value\":0.1},\"fill_color\":{\"value\":\"#1f77b4\"},\"line_alpha\":{\"value\":0.1},\"line_color\":{\"value\":\"#1f77b4\"},\"x\":{\"field\":\"x\"},\"y\":{\"field\":\"y\"}},\"id\":\"433ed932-afd7-4e0f-be6c-491087a7c97b\",\"type\":\"Circle\"},{\"attributes\":{},\"id\":\"b6e5b1cd-5027-43e0-bbcd-21b12cddfb98\",\"type\":\"Selection\"},{\"attributes\":{\"source\":{\"id\":\"09cd2f5d-a2e5-461b-a5ae-763b5682b3a7\",\"type\":\"ColumnDataSource\"}},\"id\":\"f5749626-f5b9-45b3-aca3-6cc155bb7359\",\"type\":\"CDSView\"},{\"attributes\":{\"below\":[{\"id\":\"ace706a4-075f-4d90-9264-d4abce004f08\",\"type\":\"LinearAxis\"}],\"left\":[{\"id\":\"313df7bc-1e81-4fb1-8f77-7f2be3aa608f\",\"type\":\"LinearAxis\"}],\"plot_height\":400,\"plot_width\":400,\"renderers\":[{\"id\":\"ace706a4-075f-4d90-9264-d4abce004f08\",\"type\":\"LinearAxis\"},{\"id\":\"a6b68a1a-d626-4b97-9940-8b8f461e18e9\",\"type\":\"Grid\"},{\"id\":\"313df7bc-1e81-4fb1-8f77-7f2be3aa608f\",\"type\":\"LinearAxis\"},{\"id\":\"11436559-e5fa-4e94-93d1-3e57f22d24b8\",\"type\":\"Grid\"},{\"id\":\"2089e332-ca04-4e4d-bbe0-b5b0bbd065db\",\"type\":\"BoxAnnotation\"},{\"id\":\"cbbce434-c409-4019-9255-e686122e7422\",\"type\":\"GlyphRenderer\"},{\"id\":\"7cbf1114-36ef-4d48-b794-ff9274370312\",\"type\":\"GlyphRenderer\"}],\"title\":{\"id\":\"6279164d-1d6c-4095-854d-88f37333827c\",\"type\":\"Title\"},\"toolbar\":{\"id\":\"ada31b3a-3c21-46c4-9768-41986fcef0ea\",\"type\":\"Toolbar\"},\"x_range\":{\"id\":\"eb051b83-72e5-4188-b22d-798ba987aab6\",\"type\":\"DataRange1d\"},\"x_scale\":{\"id\":\"047e2375-f530-4bbb-ba68-2e759bb40ac2\",\"type\":\"LinearScale\"},\"y_range\":{\"id\":\"c0fa0dd3-f4e4-49e9-824b-04b94b09c30d\",\"type\":\"DataRange1d\"},\"y_scale\":{\"id\":\"b93b4687-55cb-4723-b5d6-eefbdbc61cb5\",\"type\":\"LinearScale\"}},\"id\":\"3b01317b-4560-4a65-ba6d-7e2742aa01c3\",\"subtype\":\"Figure\",\"type\":\"Plot\"},{\"attributes\":{\"callback\":null,\"data\":{\"x\":{\"__ndarray__\":\"r3w6U3SjJ0DUrHUEWwYkwAu5NkMDryzApf8P/LV5db/IoqlpbWrwv0zupfqjHjzA9RSNE3jNMMC5cLjOP0MywH+kgXg94TPAz8qB7l6zPMDJ9ynKS6svwNLGRQH86jfAqxBnOSVjN8CoR1n89OctwMZAbcLoeDDAJAOO5gVSOsB8TDRF2D4hwJiKdI/vKTvAt1G91TIWNMA9DUD/pgI3wLYCFzMfHjvADEaeCywkP8CEGhQoAzY7wOrsaWWp/BzASbNkLoiIEUBewxoBJXgjwMmHrt8RdjnA0xVIqQGmKsBt5jHDBqkxwDIx/MPH6C7A2hnXGz4dOMAIG9wlcJMywHMhIlPG3TbARgBEURUFMcAtuQm/tSo6wPIkWF2PXxHAwB8iildEGsAhZqoc85oSwHjZ6qmVASPAo0JkkntYL8DyssBNLkr+v9oMLbXiZAHApZynzZufMcAcnCozufoWwBZm+cvvIzTAGFdZCJnVHsD5DDkxgN0wwJCw34QumgpAGiekI8YsGsCBgbtsL5MVwDOEjb6m/ifA2FN021gc8b/+cPYWFB41wHCof4xkvA9AunONx7IU/j8QIkn5Uc0XwBMDeb+22TLAbbHQbAGINcCc56Gka4UkQEaSs6jExRfAQ2Rm6nX+E8Aygw3i/Jg+wFvDT4b7ETjAvcph6n3EJMB+oROCWDIwwLNafEw9gjXAPEH31mFQIkAS1iZmRXQmwHOO1GGUqCbAyG/3OacCJkDgUz+tTB0wwCebB6XlQTvAun7vb8FrMECdzwJk0jEaQEBGGtxlNiNAShaM6+6eM8Bhg0u8k84qwHFnjJlJKRjAmFL71tc9L8BDMq8KuwcCwP1Mr9ksGTbAN7pzPNlxJ8Dv0JBWDTopwFaYeNfQ8QxAlC81+uCuB0AHplt3Wy8iwDsANum4NC7AG0HcvSHU/z/yJkWef38rwF8/axTE8zTA\",\"dtype\":\"float64\",\"shape\":[90]},\"y\":{\"__ndarray__\":\"SWUjdYr7NsDo+b7zK98wwO6/jNPo9UJAVLbT5VVOMsC5J3kLJ64EwHhYKMgP8jNAOpduok2xDEDJR2L4hSc6wLcToAhvyBZAuNflTzbWN0BI42WEsLi0Pzjx3mwRjj5A2kzr8MvdEEB56rgQK6FDQGmcsHhGHzDAbris1N55A0AbtDZQIz1FQHOi2D3csTpANdiP7TXs3j8Z5RGT4qwhQM3djMzvEDXAX8viTKahIcAc4ooutsgrQKpa+xXcrj3AifRXPwh2NMC5g/0KDRw+wFTiOgS64BzA2hz24SMzJ0CPzGORhOodwN1dV9V3CTPAJgdGvuGmMsAmyI7fZ5UgwI9VAqkN3TbADWiV8x/oNMBUIWjjOGEiwK1mo0ej2jDAsZEGkKn0N8A1n4hOCYgVwKzDUpkIkhrAW5SrbihEJMBSWbTo67YqwF96hNfTvUDA1DbBDIQAMMAuRHoBlxIpQK0e9jzx6yvAfUue6Cyo9D9Pv3T05iIcQNHoYMbrnSXAHpTTVS9LMMCV8dsSI/86wHuH/f8eWu+/vLb93/fI1b/SrRYuFOIbwOMpZfkicDTA1qX/rCyLD0A50KAlgzkdwFrZjcvzjy7AuW5trUo3OcB412aUvN4wwFNAiowmNDPA0a6usI4SNsC6TtZ+RxIwQE18eW94UwFAIt9RIUhjOsDwdMdd9Kf1v1RcoV51DyPAfICZnZRwPMDXXe8LoccjwL66BpG1cSDAy2dDbRfYPcDJUT2syyUkwCfagVpxjA7A6jieVvCuMsA0UCdn4QveP7mtbaCun0FAR3Q0rDaVGkCdXkqBonY8wNpkNa58QEDAvBHzX6VgJ8CY11/mMonwP4bjmMMO7EFAYx4CNC8CKcCrQVToFFchwGginsyiFRZAdwqSlEbLLsD/4NE1CjgHQLMqgNLOPfa/Pju88sHEO8DUT+l5sI4jQHjJaGgQqUBA\",\"dtype\":\"float64\",\"shape\":[90]}},\"selected\":{\"id\":\"b6e5b1cd-5027-43e0-bbcd-21b12cddfb98\",\"type\":\"Selection\"},\"selection_policy\":{\"id\":\"28f202ff-4ff3-4908-8881-242f880d380f\",\"type\":\"UnionRenderers\"}},\"id\":\"024379a1-fd21-4dd2-b715-dfb296d7e044\",\"type\":\"ColumnDataSource\"},{\"attributes\":{\"data_source\":{\"id\":\"09cd2f5d-a2e5-461b-a5ae-763b5682b3a7\",\"type\":\"ColumnDataSource\"},\"glyph\":{\"id\":\"2148dbde-ad02-4142-9239-9de7ea263ed6\",\"type\":\"Circle\"},\"hover_glyph\":null,\"muted_glyph\":null,\"nonselection_glyph\":{\"id\":\"433ed932-afd7-4e0f-be6c-491087a7c97b\",\"type\":\"Circle\"},\"selection_glyph\":null,\"view\":{\"id\":\"f5749626-f5b9-45b3-aca3-6cc155bb7359\",\"type\":\"CDSView\"}},\"id\":\"cbbce434-c409-4019-9255-e686122e7422\",\"type\":\"GlyphRenderer\"},{\"attributes\":{\"plot\":{\"id\":\"3b01317b-4560-4a65-ba6d-7e2742aa01c3\",\"subtype\":\"Figure\",\"type\":\"Plot\"},\"ticker\":{\"id\":\"13528d4a-0f2e-4629-810d-2de948b9833f\",\"type\":\"BasicTicker\"}},\"id\":\"a6b68a1a-d626-4b97-9940-8b8f461e18e9\",\"type\":\"Grid\"},{\"attributes\":{},\"id\":\"13528d4a-0f2e-4629-810d-2de948b9833f\",\"type\":\"BasicTicker\"},{\"attributes\":{},\"id\":\"b93b4687-55cb-4723-b5d6-eefbdbc61cb5\",\"type\":\"LinearScale\"},{\"attributes\":{\"callback\":null},\"id\":\"c0fa0dd3-f4e4-49e9-824b-04b94b09c30d\",\"type\":\"DataRange1d\"},{\"attributes\":{},\"id\":\"b59c2931-86f7-4e88-9cd4-edf5927916ba\",\"type\":\"Selection\"},{\"attributes\":{\"plot\":null,\"text\":\"PCA of Test data\"},\"id\":\"6279164d-1d6c-4095-854d-88f37333827c\",\"type\":\"Title\"}],\"root_ids\":[\"3b01317b-4560-4a65-ba6d-7e2742aa01c3\"]},\"title\":\"Bokeh Application\",\"version\":\"0.12.16\"}};\n",
+ " var render_items = [{\"docid\":\"9462ff19-ef8a-43b7-bc28-c2ae0b0ca382\",\"elementid\":\"ecbd62b0-3e13-4340-8ac1-4971af0727b5\",\"modelid\":\"3b01317b-4560-4a65-ba6d-7e2742aa01c3\"}];\n",
+ " root.Bokeh.embed.embed_items_notebook(docs_json, render_items);\n",
+ "\n",
+ " }\n",
+ " if (root.Bokeh !== undefined) {\n",
+ " embed_document(root);\n",
+ " } else {\n",
+ " var attempts = 0;\n",
+ " var timer = setInterval(function(root) {\n",
+ " if (root.Bokeh !== undefined) {\n",
+ " embed_document(root);\n",
+ " clearInterval(timer);\n",
+ " }\n",
+ " attempts++;\n",
+ " if (attempts > 100) {\n",
+ " console.log(\"Bokeh: ERROR: Unable to run BokehJS code because BokehJS library is missing\")\n",
+ " clearInterval(timer);\n",
+ " }\n",
+ " }, 10, root)\n",
+ " }\n",
+ "})(window);"
+ ],
+ "application/vnd.bokehjs_exec.v0+json": ""
+ },
+ "metadata": {
+ "application/vnd.bokehjs_exec.v0+json": {
+ "id": "3b01317b-4560-4a65-ba6d-7e2742aa01c3"
+ }
+ },
+ "output_type": "display_data"
+ }
+ ],
"source": [
- "## exercise here"
+ "## exercise here\n",
+ "z_ts = pca.transform(x_ts)\n",
+ "vars = pca.explained_variance_ratio_\n",
+ "p = figure(plot_width=400, plot_height=400, title=\"PCA of Test data\")\n",
+ "p.circle(z_ts[y_ts==0, 0], z_ts[y_ts==0, 1], line_color=\"orange\", fill_color=\"orange\")\n",
+ "p.circle(z_ts[y_ts==1, 0], z_ts[y_ts==1, 1], line_color=\"blue\", fill_color=\"blue\")\n",
+ "p.xaxis.axis_label = \"PC1 (%.2f%%)\" % (100*vars[0])\n",
+ "p.yaxis.axis_label = \"PC2 (%.2f%%)\" % (100*vars[1])\n",
+ "show(p)"
]
},
{
@@ -761,7 +2148,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 29,
"metadata": {
"colab": {},
"colab_type": "code",
@@ -774,21 +2161,34 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 30,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "Cg8TpDATk3XI",
"outputId": "e9658389-474c-4bf5-f196-11d4518311b7"
},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',\n",
+ " metric_params=None, n_jobs=1, n_neighbors=10, p=2,\n",
+ " weights='uniform')"
+ ]
+ },
+ "execution_count": 30,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"knn.fit(x_tr, y_tr)"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 31,
"metadata": {
"colab": {},
"colab_type": "code",
@@ -845,13 +2245,25 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 33,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "JISD2EVQ9Q9Z"
},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([[27, 19],\n",
+ " [ 0, 90]])"
+ ]
+ },
+ "execution_count": 33,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"from sklearn.metrics import confusion_matrix\n",
"conf = confusion_matrix(y_ts, y_pred_knn)\n",
@@ -870,13 +2282,24 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 34,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "pZVN8GKKdOhy"
},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "46"
+ ]
+ },
+ "execution_count": 34,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"np.sum(y_ts==0) # total number of \"class 0\" samples in the test set"
]
@@ -893,13 +2316,24 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 35,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "1PVj7JbxdVk0"
},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "90"
+ ]
+ },
+ "execution_count": 35,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"np.sum(y_ts==1) # total number of \"class 1\" samples in the test set"
]
@@ -928,13 +2362,24 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 36,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "-1-40TyQeAIt"
},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.8602941176470589"
+ ]
+ },
+ "execution_count": 36,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"(conf[0,0] + conf[1,1])/y_ts.shape[0] # y_ts.shape[0] is the sample size of the test set"
]
@@ -951,13 +2396,21 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 37,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "q0emRGAvfWi4"
},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "0.8602941176470589\n"
+ ]
+ }
+ ],
"source": [
"tp = conf[1,1]\n",
"tn = conf[0,0]\n",
@@ -984,13 +2437,24 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 38,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "a9JlR-LNe5ZI"
},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "1.0"
+ ]
+ },
+ "execution_count": 38,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"conf[1,1] / (conf[1,1] + conf[1,0])"
]
@@ -1021,13 +2485,24 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 39,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "3KeLJcCbkSo6"
},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.8602941176470589"
+ ]
+ },
+ "execution_count": 39,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"from sklearn.metrics import accuracy_score\n",
"accuracy_score(y_ts, y_pred_knn)"
@@ -1045,13 +2520,24 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 40,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "MgfhssjZmsg3"
},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "1.0"
+ ]
+ },
+ "execution_count": 40,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"from sklearn.metrics import recall_score\n",
"recall_score(y_ts, y_pred_knn)"
@@ -1069,13 +2555,22 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 41,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "AKiUXIkPm-N3"
},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "0.8602941176470589\n",
+ "1.0\n"
+ ]
+ }
+ ],
"source": [
"from sklearn import metrics\n",
"print(metrics.accuracy_score(y_ts, y_pred_knn))\n",
@@ -1094,14 +2589,28 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 42,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "HXgvIJM2k3XQ",
"outputId": "0d2d0773-a292-40cb-d8e7-df6b6ee29ff2"
},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " precision recall f1-score support\n",
+ "\n",
+ " 0 1.00 0.59 0.74 46\n",
+ " 1 0.83 1.00 0.90 90\n",
+ "\n",
+ "avg / total 0.88 0.86 0.85 136\n",
+ "\n"
+ ]
+ }
+ ],
"source": [
"from sklearn import metrics\n",
"print(metrics.classification_report(y_ts, y_pred_knn))"
@@ -1159,14 +2668,22 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 43,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "OuoRfictk3XW",
"outputId": "9119acba-9d18-4076-eb3c-8346ba420579"
},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "0.6961630553262051\n"
+ ]
+ }
+ ],
"source": [
"print(metrics.matthews_corrcoef(y_ts, y_pred_knn))"
]
@@ -1241,7 +2758,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 44,
"metadata": {
"colab": {},
"colab_type": "code",
@@ -1291,13 +2808,22 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 45,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "n12boA3k3Neo"
},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Accuracy = 0.750\n",
+ "MCC = 0.538\n"
+ ]
+ }
+ ],
"source": [
"from sklearn import metrics\n",
"knn = neighbors.KNeighborsClassifier(n_neighbors=10)\n",
@@ -1356,7 +2882,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 46,
"metadata": {
"colab": {},
"colab_type": "code",
@@ -1390,13 +2916,70 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 48,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "-uoahY6yNcIv"
},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "### Fold 1 / 5 ###\n",
+ "TRAIN size: 108\n",
+ "-- class 0: 36 class 1: 72\n",
+ "TEST size: 28\n",
+ "-- class 0: 9 class 1: 19\n",
+ "\n",
+ "Model performance\n",
+ "Accuracy on TEST set: 0.929\n",
+ "MCC on TEST set: 0.839\n",
+ "\n",
+ "### Fold 2 / 5 ###\n",
+ "TRAIN size: 109\n",
+ "-- class 0: 36 class 1: 73\n",
+ "TEST size: 27\n",
+ "-- class 0: 9 class 1: 18\n",
+ "\n",
+ "Model performance\n",
+ "Accuracy on TEST set: 0.852\n",
+ "MCC on TEST set: 0.674\n",
+ "\n",
+ "### Fold 3 / 5 ###\n",
+ "TRAIN size: 109\n",
+ "-- class 0: 36 class 1: 73\n",
+ "TEST size: 27\n",
+ "-- class 0: 9 class 1: 18\n",
+ "\n",
+ "Model performance\n",
+ "Accuracy on TEST set: 0.815\n",
+ "MCC on TEST set: 0.567\n",
+ "\n",
+ "### Fold 4 / 5 ###\n",
+ "TRAIN size: 109\n",
+ "-- class 0: 36 class 1: 73\n",
+ "TEST size: 27\n",
+ "-- class 0: 9 class 1: 18\n",
+ "\n",
+ "Model performance\n",
+ "Accuracy on TEST set: 0.815\n",
+ "MCC on TEST set: 0.590\n",
+ "\n",
+ "### Fold 5 / 5 ###\n",
+ "TRAIN size: 109\n",
+ "-- class 0: 36 class 1: 73\n",
+ "TEST size: 27\n",
+ "-- class 0: 9 class 1: 18\n",
+ "\n",
+ "Model performance\n",
+ "Accuracy on TEST set: 0.926\n",
+ "MCC on TEST set: 0.837\n",
+ "\n"
+ ]
+ }
+ ],
"source": [
"## get the number of splitting operations\n",
"N = skf.get_n_splits(x_tr, y_tr)\n",
@@ -1412,7 +2995,7 @@
"## computing kNN accuracy & MCC on each test partition\n",
"i = 1\n",
"for (idx_tr, idx_ts) in skf.split(x_tr, y_tr):\n",
- " print(f\"### Fold {i+1} / {N:d} ###\")\n",
+ " print(f\"### Fold {i} / {N:d} ###\")\n",
" X_train, Y_train = x_tr[idx_tr], y_tr[idx_tr]\n",
" X_test, Y_test = x_tr[idx_ts], y_tr[idx_ts]\n",
" print(\"TRAIN size:\", X_train.shape[0])\n",
@@ -1451,9 +3034,18 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 49,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Average cross-validation accuracy: 0.867\n",
+ "Average cross-validation MCC: 0.701\n"
+ ]
+ }
+ ],
"source": [
"## note: we need to convert the lists to numpy arrays before computing the means\n",
"acc_avg = np.mean(np.array(acc_list))\n",
@@ -1472,13 +3064,30 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 50,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "-uoahY6yNcIv"
},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "### Iteration 1 ###\n",
+ "### Iteration 2 ###\n",
+ "### Iteration 3 ###\n",
+ "### Iteration 4 ###\n",
+ "### Iteration 5 ###\n",
+ "### Iteration 6 ###\n",
+ "### Iteration 7 ###\n",
+ "### Iteration 8 ###\n",
+ "### Iteration 9 ###\n",
+ "### Iteration 10 ###\n"
+ ]
+ }
+ ],
"source": [
"## how many repetitions?\n",
"N_CV = 10\n",
@@ -1519,13 +3128,22 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 52,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Average cross-validation accuracy: 0.854\n",
+ "Average cross-validation MCC: 0.670\n"
+ ]
+ }
+ ],
"source": [
"## note: we need to convert the lists to numpy arrays before computing the means\n",
- "# acc_avg = ...\n",
- "# mcc_avg = ...\n",
+ "acc_avg = np.mean(np.array(acc_list))\n",
+ "mcc_avg = np.mean(np.array(mcc_list))\n",
"\n",
"print(f\"Average cross-validation accuracy: {acc_avg:.3f}\")\n",
"print(f\"Average cross-validation MCC: {mcc_avg:.3f}\")"
@@ -1572,7 +3190,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.6.8"
+ "version": "3.6.5"
}
},
"nbformat": 4,
diff --git a/chierici_practical_part2.ipynb b/chierici_practical_part2.ipynb
index a658b47..fbec1a2 100644
--- a/chierici_practical_part2.ipynb
+++ b/chierici_practical_part2.ipynb
@@ -35,7 +35,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 1,
"metadata": {
"colab": {},
"colab_type": "code",
@@ -56,9 +56,298 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 2,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "
Loading BokehJS ...\n",
+ "
"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/javascript": [
+ "\n",
+ "(function(root) {\n",
+ " function now() {\n",
+ " return new Date();\n",
+ " }\n",
+ "\n",
+ " var force = true;\n",
+ "\n",
+ " if (typeof (root._bokeh_onload_callbacks) === \"undefined\" || force === true) {\n",
+ " root._bokeh_onload_callbacks = [];\n",
+ " root._bokeh_is_loading = undefined;\n",
+ " }\n",
+ "\n",
+ " var JS_MIME_TYPE = 'application/javascript';\n",
+ " var HTML_MIME_TYPE = 'text/html';\n",
+ " var EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n",
+ " var CLASS_NAME = 'output_bokeh rendered_html';\n",
+ "\n",
+ " /**\n",
+ " * Render data to the DOM node\n",
+ " */\n",
+ " function render(props, node) {\n",
+ " var script = document.createElement(\"script\");\n",
+ " node.appendChild(script);\n",
+ " }\n",
+ "\n",
+ " /**\n",
+ " * Handle when an output is cleared or removed\n",
+ " */\n",
+ " function handleClearOutput(event, handle) {\n",
+ " var cell = handle.cell;\n",
+ "\n",
+ " var id = cell.output_area._bokeh_element_id;\n",
+ " var server_id = cell.output_area._bokeh_server_id;\n",
+ " // Clean up Bokeh references\n",
+ " if (id !== undefined) {\n",
+ " Bokeh.index[id].model.document.clear();\n",
+ " delete Bokeh.index[id];\n",
+ " }\n",
+ "\n",
+ " if (server_id !== undefined) {\n",
+ " // Clean up Bokeh references\n",
+ " var cmd = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n",
+ " cell.notebook.kernel.execute(cmd, {\n",
+ " iopub: {\n",
+ " output: function(msg) {\n",
+ " var element_id = msg.content.text.trim();\n",
+ " Bokeh.index[element_id].model.document.clear();\n",
+ " delete Bokeh.index[element_id];\n",
+ " }\n",
+ " }\n",
+ " });\n",
+ " // Destroy server and session\n",
+ " var cmd = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n",
+ " cell.notebook.kernel.execute(cmd);\n",
+ " }\n",
+ " }\n",
+ "\n",
+ " /**\n",
+ " * Handle when a new output is added\n",
+ " */\n",
+ " function handleAddOutput(event, handle) {\n",
+ " var output_area = handle.output_area;\n",
+ " var output = handle.output;\n",
+ "\n",
+ " // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n",
+ " if ((output.output_type != \"display_data\") || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n",
+ " return\n",
+ " }\n",
+ "\n",
+ " var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n",
+ "\n",
+ " if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n",
+ " toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n",
+ " // store reference to embed id on output_area\n",
+ " output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n",
+ " }\n",
+ " if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n",
+ " var bk_div = document.createElement(\"div\");\n",
+ " bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n",
+ " var script_attrs = bk_div.children[0].attributes;\n",
+ " for (var i = 0; i < script_attrs.length; i++) {\n",
+ " toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n",
+ " }\n",
+ " // store reference to server id on output_area\n",
+ " output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n",
+ " }\n",
+ " }\n",
+ "\n",
+ " function register_renderer(events, OutputArea) {\n",
+ "\n",
+ " function append_mime(data, metadata, element) {\n",
+ " // create a DOM node to render to\n",
+ " var toinsert = this.create_output_subarea(\n",
+ " metadata,\n",
+ " CLASS_NAME,\n",
+ " EXEC_MIME_TYPE\n",
+ " );\n",
+ " this.keyboard_manager.register_events(toinsert);\n",
+ " // Render to node\n",
+ " var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n",
+ " render(props, toinsert[toinsert.length - 1]);\n",
+ " element.append(toinsert);\n",
+ " return toinsert\n",
+ " }\n",
+ "\n",
+ " /* Handle when an output is cleared or removed */\n",
+ " events.on('clear_output.CodeCell', handleClearOutput);\n",
+ " events.on('delete.Cell', handleClearOutput);\n",
+ "\n",
+ " /* Handle when a new output is added */\n",
+ " events.on('output_added.OutputArea', handleAddOutput);\n",
+ "\n",
+ " /**\n",
+ " * Register the mime type and append_mime function with output_area\n",
+ " */\n",
+ " OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n",
+ " /* Is output safe? */\n",
+ " safe: true,\n",
+ " /* Index of renderer in `output_area.display_order` */\n",
+ " index: 0\n",
+ " });\n",
+ " }\n",
+ "\n",
+ " // register the mime type if in Jupyter Notebook environment and previously unregistered\n",
+ " if (root.Jupyter !== undefined) {\n",
+ " var events = require('base/js/events');\n",
+ " var OutputArea = require('notebook/js/outputarea').OutputArea;\n",
+ "\n",
+ " if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n",
+ " register_renderer(events, OutputArea);\n",
+ " }\n",
+ " }\n",
+ "\n",
+ " \n",
+ " if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n",
+ " root._bokeh_timeout = Date.now() + 5000;\n",
+ " root._bokeh_failed_load = false;\n",
+ " }\n",
+ "\n",
+ " var NB_LOAD_WARNING = {'data': {'text/html':\n",
+ " \"\\n\"+\n",
+ " \"
\\n\"+\n",
+ " \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n",
+ " \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n",
+ " \"
\\n\"+\n",
+ " \"
\\n\"+\n",
+ " \"- re-rerun `output_notebook()` to attempt to load from CDN again, or
\\n\"+\n",
+ " \"- use INLINE resources instead, as so:
\\n\"+\n",
+ " \"
\\n\"+\n",
+ " \"
\\n\"+\n",
+ " \"from bokeh.resources import INLINE\\n\"+\n",
+ " \"output_notebook(resources=INLINE)\\n\"+\n",
+ " \"\\n\"+\n",
+ " \"
\"}};\n",
+ "\n",
+ " function display_loaded() {\n",
+ " var el = document.getElementById(\"5cac4085-57f7-4935-8294-6fc71ea061bf\");\n",
+ " if (el != null) {\n",
+ " el.textContent = \"BokehJS is loading...\";\n",
+ " }\n",
+ " if (root.Bokeh !== undefined) {\n",
+ " if (el != null) {\n",
+ " el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n",
+ " }\n",
+ " } else if (Date.now() < root._bokeh_timeout) {\n",
+ " setTimeout(display_loaded, 100)\n",
+ " }\n",
+ " }\n",
+ "\n",
+ "\n",
+ " function run_callbacks() {\n",
+ " try {\n",
+ " root._bokeh_onload_callbacks.forEach(function(callback) { callback() });\n",
+ " }\n",
+ " finally {\n",
+ " delete root._bokeh_onload_callbacks\n",
+ " }\n",
+ " console.info(\"Bokeh: all callbacks have finished\");\n",
+ " }\n",
+ "\n",
+ " function load_libs(js_urls, callback) {\n",
+ " root._bokeh_onload_callbacks.push(callback);\n",
+ " if (root._bokeh_is_loading > 0) {\n",
+ " console.log(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n",
+ " return null;\n",
+ " }\n",
+ " if (js_urls == null || js_urls.length === 0) {\n",
+ " run_callbacks();\n",
+ " return null;\n",
+ " }\n",
+ " console.log(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n",
+ " root._bokeh_is_loading = js_urls.length;\n",
+ " for (var i = 0; i < js_urls.length; i++) {\n",
+ " var url = js_urls[i];\n",
+ " var s = document.createElement('script');\n",
+ " s.src = url;\n",
+ " s.async = false;\n",
+ " s.onreadystatechange = s.onload = function() {\n",
+ " root._bokeh_is_loading--;\n",
+ " if (root._bokeh_is_loading === 0) {\n",
+ " console.log(\"Bokeh: all BokehJS libraries loaded\");\n",
+ " run_callbacks()\n",
+ " }\n",
+ " };\n",
+ " s.onerror = function() {\n",
+ " console.warn(\"failed to load library \" + url);\n",
+ " };\n",
+ " console.log(\"Bokeh: injecting script tag for BokehJS library: \", url);\n",
+ " document.getElementsByTagName(\"head\")[0].appendChild(s);\n",
+ " }\n",
+ " };var element = document.getElementById(\"5cac4085-57f7-4935-8294-6fc71ea061bf\");\n",
+ " if (element == null) {\n",
+ " console.log(\"Bokeh: ERROR: autoload.js configured with elementid '5cac4085-57f7-4935-8294-6fc71ea061bf' but no matching script tag was found. \")\n",
+ " return false;\n",
+ " }\n",
+ "\n",
+ " var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-0.12.16.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.16.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-0.12.16.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-0.12.16.min.js\"];\n",
+ "\n",
+ " var inline_js = [\n",
+ " function(Bokeh) {\n",
+ " Bokeh.set_log_level(\"info\");\n",
+ " },\n",
+ " \n",
+ " function(Bokeh) {\n",
+ " \n",
+ " },\n",
+ " function(Bokeh) {\n",
+ " console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-0.12.16.min.css\");\n",
+ " Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-0.12.16.min.css\");\n",
+ " console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.16.min.css\");\n",
+ " Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.16.min.css\");\n",
+ " console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-tables-0.12.16.min.css\");\n",
+ " Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-tables-0.12.16.min.css\");\n",
+ " }\n",
+ " ];\n",
+ "\n",
+ " function run_inline_js() {\n",
+ " \n",
+ " if ((root.Bokeh !== undefined) || (force === true)) {\n",
+ " for (var i = 0; i < inline_js.length; i++) {\n",
+ " inline_js[i].call(root, root.Bokeh);\n",
+ " }if (force === true) {\n",
+ " display_loaded();\n",
+ " }} else if (Date.now() < root._bokeh_timeout) {\n",
+ " setTimeout(run_inline_js, 100);\n",
+ " } else if (!root._bokeh_failed_load) {\n",
+ " console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n",
+ " root._bokeh_failed_load = true;\n",
+ " } else if (force !== true) {\n",
+ " var cell = $(document.getElementById(\"5cac4085-57f7-4935-8294-6fc71ea061bf\")).parents('.cell').data().cell;\n",
+ " cell.output_area.append_execute_result(NB_LOAD_WARNING)\n",
+ " }\n",
+ "\n",
+ " }\n",
+ "\n",
+ " if (root._bokeh_is_loading === 0) {\n",
+ " console.log(\"Bokeh: BokehJS loaded, going straight to plotting\");\n",
+ " run_inline_js();\n",
+ " } else {\n",
+ " load_libs(js_urls, function() {\n",
+ " console.log(\"Bokeh: BokehJS plotting callback run at\", now());\n",
+ " run_inline_js();\n",
+ " });\n",
+ " }\n",
+ "}(window));"
+ ],
+ "application/vnd.bokehjs_load.v0+json": "\n(function(root) {\n function now() {\n return new Date();\n }\n\n var force = true;\n\n if (typeof (root._bokeh_onload_callbacks) === \"undefined\" || force === true) {\n root._bokeh_onload_callbacks = [];\n root._bokeh_is_loading = undefined;\n }\n\n \n\n \n if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n var NB_LOAD_WARNING = {'data': {'text/html':\n \"\\n\"+\n \"
\\n\"+\n \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n \"
\\n\"+\n \"
\\n\"+\n \"- re-rerun `output_notebook()` to attempt to load from CDN again, or
\\n\"+\n \"- use INLINE resources instead, as so:
\\n\"+\n \"
\\n\"+\n \"
\\n\"+\n \"from bokeh.resources import INLINE\\n\"+\n \"output_notebook(resources=INLINE)\\n\"+\n \"\\n\"+\n \"
\"}};\n\n function display_loaded() {\n var el = document.getElementById(\"5cac4085-57f7-4935-8294-6fc71ea061bf\");\n if (el != null) {\n el.textContent = \"BokehJS is loading...\";\n }\n if (root.Bokeh !== undefined) {\n if (el != null) {\n el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(display_loaded, 100)\n }\n }\n\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) { callback() });\n }\n finally {\n delete root._bokeh_onload_callbacks\n }\n console.info(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(js_urls, callback) {\n root._bokeh_onload_callbacks.push(callback);\n if (root._bokeh_is_loading > 0) {\n console.log(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls == null || js_urls.length === 0) {\n run_callbacks();\n return null;\n }\n console.log(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n root._bokeh_is_loading = js_urls.length;\n for (var i = 0; i < js_urls.length; i++) {\n var url = js_urls[i];\n var s = document.createElement('script');\n s.src = url;\n s.async = false;\n s.onreadystatechange = s.onload = function() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.log(\"Bokeh: all BokehJS libraries loaded\");\n run_callbacks()\n }\n };\n s.onerror = function() {\n console.warn(\"failed to load library \" + url);\n };\n console.log(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.getElementsByTagName(\"head\")[0].appendChild(s);\n }\n };var element = document.getElementById(\"5cac4085-57f7-4935-8294-6fc71ea061bf\");\n if (element == null) {\n console.log(\"Bokeh: ERROR: autoload.js configured with elementid '5cac4085-57f7-4935-8294-6fc71ea061bf' but no matching script tag was found. \")\n return false;\n }\n\n var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-0.12.16.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.16.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-0.12.16.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-0.12.16.min.js\"];\n\n var inline_js = [\n function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\n \n function(Bokeh) {\n \n },\n function(Bokeh) {\n console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-0.12.16.min.css\");\n Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-0.12.16.min.css\");\n console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.16.min.css\");\n Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.16.min.css\");\n console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-tables-0.12.16.min.css\");\n Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-tables-0.12.16.min.css\");\n }\n ];\n\n function run_inline_js() {\n \n if ((root.Bokeh !== undefined) || (force === true)) {\n for (var i = 0; i < inline_js.length; i++) {\n inline_js[i].call(root, root.Bokeh);\n }if (force === true) {\n display_loaded();\n }} else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n } else if (force !== true) {\n var cell = $(document.getElementById(\"5cac4085-57f7-4935-8294-6fc71ea061bf\")).parents('.cell').data().cell;\n cell.output_area.append_execute_result(NB_LOAD_WARNING)\n }\n\n }\n\n if (root._bokeh_is_loading === 0) {\n console.log(\"Bokeh: BokehJS loaded, going straight to plotting\");\n run_inline_js();\n } else {\n load_libs(js_urls, function() {\n console.log(\"Bokeh: BokehJS plotting callback run at\", now());\n run_inline_js();\n });\n }\n}(window));"
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
"source": [
"output_notebook()"
]
@@ -75,7 +364,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
@@ -85,7 +374,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 4,
"metadata": {
"colab": {},
"colab_type": "code",
@@ -111,7 +400,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 5,
"metadata": {
"colab": {},
"colab_type": "code",
@@ -145,7 +434,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 12,
"metadata": {
"colab": {},
"colab_type": "code",
@@ -169,16 +458,28 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 17,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "Mua2Ajr-LKKa"
},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(136, 52229)"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"x_tr = data_tr.values\n",
- "x_ts = data_ts.values"
+ "x_ts = data_ts.values\n",
+ "x_ts.shape"
]
},
{
@@ -193,7 +494,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 18,
"metadata": {
"colab": {},
"colab_type": "code",
@@ -231,7 +532,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
@@ -246,7 +547,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 20,
"metadata": {
"colab": {},
"colab_type": "code",
@@ -281,14 +582,22 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 21,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "Qqc3TmFBLKKn",
"outputId": "d9ef6c64-9f18-4bea-9167-decaa0ca1820"
},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[LibSVM]"
+ ]
+ }
+ ],
"source": [
"## fit the model and get the predictions\n",
"svc.fit(x_tr, y_tr)\n",
@@ -307,14 +616,24 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 22,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "Ku0JSF_ALKKs",
"outputId": "94585c0e-534a-445d-d0ba-92a9bf3a9388"
},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "MCC = 0.8857501367027195\n",
+ "ACC = 0.9485294117647058\n",
+ "SENS = 0.9555555555555556\n"
+ ]
+ }
+ ],
"source": [
"from sklearn import metrics\n",
"print('MCC = ', metrics.matthews_corrcoef(class_lab_ts, class_pred_ts))\n",
@@ -334,14 +653,28 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 23,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "whSZnHGALKKx",
"outputId": "2c471734-3504-4af7-8ebb-74e5a02be301"
},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " precision recall f1-score support\n",
+ "\n",
+ " 0 0.91 0.93 0.92 46\n",
+ " 1 0.97 0.96 0.96 90\n",
+ "\n",
+ "avg / total 0.95 0.95 0.95 136\n",
+ "\n"
+ ]
+ }
+ ],
"source": [
"print(metrics.classification_report(class_lab_ts, class_pred_ts))"
]
@@ -358,15 +691,34 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 31,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "ZT6XjB20LKK0"
},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "MCC = 0.8357487922705314\n",
+ "ACC = 0.9264705882352942\n",
+ "SENS = 0.9444444444444444\n"
+ ]
+ }
+ ],
"source": [
- "## space for exercise\n"
+ "## space for exercise\n",
+ "from sklearn.ensemble import RandomForestClassifier\n",
+ "\n",
+ "clf = RandomForestClassifier(502)\n",
+ "clf.fit(x_tr, y_tr)\n",
+ "y_ts = clf.predict(x_ts)\n",
+ "\n",
+ "print('MCC = ', metrics.matthews_corrcoef(class_lab_ts, y_ts))\n",
+ "print('ACC = ', metrics.accuracy_score(class_lab_ts, y_ts))\n",
+ "print('SENS = ', metrics.recall_score(class_lab_ts, y_ts))"
]
},
{
@@ -393,7 +745,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 32,
"metadata": {
"colab": {},
"colab_type": "code",
@@ -401,7 +753,58 @@
"outputId": "099e6404-c7fd-414a-b49a-4092af095c57",
"scrolled": true
},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "C = 1e-06\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/home/neeyanthkvk/anaconda3/lib/python3.6/site-packages/sklearn/metrics/classification.py:538: RuntimeWarning: invalid value encountered in double_scalars\n",
+ " mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "MCC = 0.0\n",
+ "ACC = 0.6617647058823529\n",
+ "SENS = 1.0 \n",
+ "\n",
+ "C = 1e-05\n",
+ "MCC = 0.6310547428675068\n",
+ "ACC = 0.8308823529411765\n",
+ "SENS = 1.0 \n",
+ "\n",
+ "C = 0.0001\n",
+ "MCC = 0.9014492753623189\n",
+ "ACC = 0.9558823529411765\n",
+ "SENS = 0.9666666666666667 \n",
+ "\n",
+ "C = 0.001\n",
+ "MCC = 0.8857501367027195\n",
+ "ACC = 0.9485294117647058\n",
+ "SENS = 0.9555555555555556 \n",
+ "\n",
+ "C = 0.01\n",
+ "MCC = 0.8857501367027195\n",
+ "ACC = 0.9485294117647058\n",
+ "SENS = 0.9555555555555556 \n",
+ "\n",
+ "C = 0.1\n",
+ "MCC = 0.8857501367027195\n",
+ "ACC = 0.9485294117647058\n",
+ "SENS = 0.9555555555555556 \n",
+ "\n"
+ ]
+ }
+ ],
"source": [
"## define the sequence of C values we want to use in the search of the best one\n",
"C_list = [0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1]\n",
@@ -437,15 +840,167 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 33,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "BPtC-EBSLKK_"
},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "C = 1e-06 gamma = 0.001\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/home/neeyanthkvk/anaconda3/lib/python3.6/site-packages/sklearn/metrics/classification.py:538: RuntimeWarning: invalid value encountered in double_scalars\n",
+ " mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "MCC = 0.0\n",
+ "ACC = 0.6617647058823529\n",
+ "SENS = 1.0 \n",
+ "\n",
+ "C = 1e-06 gamma = 0.01\n",
+ "MCC = 0.0\n",
+ "ACC = 0.6617647058823529\n",
+ "SENS = 1.0 \n",
+ "\n",
+ "C = 1e-06 gamma = 0.1\n",
+ "MCC = 0.0\n",
+ "ACC = 0.6617647058823529\n",
+ "SENS = 1.0 \n",
+ "\n",
+ "C = 1e-06 gamma = 1\n",
+ "MCC = 0.0\n",
+ "ACC = 0.6617647058823529\n",
+ "SENS = 1.0 \n",
+ "\n",
+ "C = 1e-05 gamma = 0.001\n",
+ "MCC = 0.6310547428675068\n",
+ "ACC = 0.8308823529411765\n",
+ "SENS = 1.0 \n",
+ "\n",
+ "C = 1e-05 gamma = 0.01\n",
+ "MCC = 0.6310547428675068\n",
+ "ACC = 0.8308823529411765\n",
+ "SENS = 1.0 \n",
+ "\n",
+ "C = 1e-05 gamma = 0.1\n",
+ "MCC = 0.6310547428675068\n",
+ "ACC = 0.8308823529411765\n",
+ "SENS = 1.0 \n",
+ "\n",
+ "C = 1e-05 gamma = 1\n",
+ "MCC = 0.6310547428675068\n",
+ "ACC = 0.8308823529411765\n",
+ "SENS = 1.0 \n",
+ "\n",
+ "C = 0.0001 gamma = 0.001\n",
+ "MCC = 0.9014492753623189\n",
+ "ACC = 0.9558823529411765\n",
+ "SENS = 0.9666666666666667 \n",
+ "\n",
+ "C = 0.0001 gamma = 0.01\n",
+ "MCC = 0.9014492753623189\n",
+ "ACC = 0.9558823529411765\n",
+ "SENS = 0.9666666666666667 \n",
+ "\n",
+ "C = 0.0001 gamma = 0.1\n",
+ "MCC = 0.9014492753623189\n",
+ "ACC = 0.9558823529411765\n",
+ "SENS = 0.9666666666666667 \n",
+ "\n",
+ "C = 0.0001 gamma = 1\n",
+ "MCC = 0.9014492753623189\n",
+ "ACC = 0.9558823529411765\n",
+ "SENS = 0.9666666666666667 \n",
+ "\n",
+ "C = 0.001 gamma = 0.001\n",
+ "MCC = 0.8857501367027195\n",
+ "ACC = 0.9485294117647058\n",
+ "SENS = 0.9555555555555556 \n",
+ "\n",
+ "C = 0.001 gamma = 0.01\n",
+ "MCC = 0.8857501367027195\n",
+ "ACC = 0.9485294117647058\n",
+ "SENS = 0.9555555555555556 \n",
+ "\n",
+ "C = 0.001 gamma = 0.1\n",
+ "MCC = 0.8857501367027195\n",
+ "ACC = 0.9485294117647058\n",
+ "SENS = 0.9555555555555556 \n",
+ "\n",
+ "C = 0.001 gamma = 1\n",
+ "MCC = 0.8857501367027195\n",
+ "ACC = 0.9485294117647058\n",
+ "SENS = 0.9555555555555556 \n",
+ "\n",
+ "C = 0.01 gamma = 0.001\n",
+ "MCC = 0.8857501367027195\n",
+ "ACC = 0.9485294117647058\n",
+ "SENS = 0.9555555555555556 \n",
+ "\n",
+ "C = 0.01 gamma = 0.01\n",
+ "MCC = 0.8857501367027195\n",
+ "ACC = 0.9485294117647058\n",
+ "SENS = 0.9555555555555556 \n",
+ "\n",
+ "C = 0.01 gamma = 0.1\n",
+ "MCC = 0.8857501367027195\n",
+ "ACC = 0.9485294117647058\n",
+ "SENS = 0.9555555555555556 \n",
+ "\n",
+ "C = 0.01 gamma = 1\n",
+ "MCC = 0.8857501367027195\n",
+ "ACC = 0.9485294117647058\n",
+ "SENS = 0.9555555555555556 \n",
+ "\n",
+ "C = 0.1 gamma = 0.001\n",
+ "MCC = 0.8857501367027195\n",
+ "ACC = 0.9485294117647058\n",
+ "SENS = 0.9555555555555556 \n",
+ "\n",
+ "C = 0.1 gamma = 0.01\n",
+ "MCC = 0.8857501367027195\n",
+ "ACC = 0.9485294117647058\n",
+ "SENS = 0.9555555555555556 \n",
+ "\n",
+ "C = 0.1 gamma = 0.1\n",
+ "MCC = 0.8857501367027195\n",
+ "ACC = 0.9485294117647058\n",
+ "SENS = 0.9555555555555556 \n",
+ "\n",
+ "C = 0.1 gamma = 1\n",
+ "MCC = 0.8857501367027195\n",
+ "ACC = 0.9485294117647058\n",
+ "SENS = 0.9555555555555556 \n",
+ "\n"
+ ]
+ }
+ ],
"source": [
- "## space for exercise"
+ "## space for exercise\n",
+ "C_list = [0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1]\n",
+ "gamma_list = [0.001, 0.01, 0.1, 1]\n",
+ "for C in C_list:\n",
+ " for gamma in gamma_list:\n",
+ " print('C = ', C, ' gamma = ', gamma)\n",
+ " svc = svm.SVC(kernel = 'linear', C=C, gamma = gamma)\n",
+ " svc.fit(x_tr, class_lab_tr.values.ravel())\n",
+ " class_pred_ts = svc.predict(x_ts)\n",
+ " print('MCC = ', metrics.matthews_corrcoef(class_lab_ts, class_pred_ts))\n",
+ " print('ACC = ', metrics.accuracy_score(class_lab_ts, class_pred_ts))\n",
+ " print('SENS = ', metrics.recall_score(class_lab_ts, class_pred_ts), \"\\n\")"
]
},
{
@@ -460,14 +1015,25 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 34,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "utM1ALBfLKLC",
"outputId": "d96dc041-2f6f-4f1a-bca5-70310d1f79ee"
},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'C': 0.001, 'gamma': 0.001}"
+ ]
+ },
+ "execution_count": 34,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"from sklearn.model_selection import GridSearchCV\n",
"\n",
@@ -505,14 +1071,31 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 35,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "2lZAaTXJLKLH",
"outputId": "2155231c-e50c-4c06-82c4-6b6a5f7c4ee2"
},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n",
+ " max_depth=None, max_features='auto', max_leaf_nodes=None,\n",
+ " min_impurity_decrease=0.0, min_impurity_split=None,\n",
+ " min_samples_leaf=1, min_samples_split=2,\n",
+ " min_weight_fraction_leaf=0.0, n_estimators=250, n_jobs=1,\n",
+ " oob_score=False, random_state=None, verbose=0,\n",
+ " warm_start=False)"
+ ]
+ },
+ "execution_count": 35,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Build a forest and compute the feature importances\n",
"rf = RandomForestClassifier(n_estimators=250)\n",
@@ -531,14 +1114,24 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 36,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "rspvHmO0LKLK",
"outputId": "7b131d8f-ebc8-4d03-9f38-ad90de735367"
},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "MCC = 0.8357487922705314\n",
+ "ACC = 0.9264705882352942\n",
+ "SENS = 0.9444444444444444\n"
+ ]
+ }
+ ],
"source": [
"class_pred_ts = rf.predict(x_ts)\n",
"print('MCC = ', metrics.matthews_corrcoef(class_lab_ts, class_pred_ts))\n",
@@ -558,14 +1151,32 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 37,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "7g9k5EHsLKLU",
"outputId": "aa26094b-0e4a-48f0-be91-ecd2874ab204"
},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Feature ranking (top 10 features):\n",
+ "1. feature 7448 (0.008498)\n",
+ "2. feature 17952 (0.007474)\n",
+ "3. feature 450 (0.006937)\n",
+ "4. feature 16266 (0.006608)\n",
+ "5. feature 5719 (0.006272)\n",
+ "6. feature 8357 (0.006259)\n",
+ "7. feature 21809 (0.006237)\n",
+ "8. feature 3556 (0.005993)\n",
+ "9. feature 12169 (0.005958)\n",
+ "10. feature 3112 (0.005879)\n"
+ ]
+ }
+ ],
"source": [
"importances = rf.feature_importances_\n",
"indices = np.argsort(importances)[::-1]\n",
@@ -588,14 +1199,31 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 38,
"metadata": {
"colab": {},
"colab_type": "code",
"id": "2fSkitN7LKLY",
"outputId": "73191a71-9657-4582-ede6-7fb14cd3fc05"
},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "VPS53.Gene_AceView\n",
+ "ERCC6L.Gene_AceView\n",
+ "CHD5.Gene_RefSeq\n",
+ "dawfloyby.Gene_AceView\n",
+ "LOC100287397.Gene_RefSeq\n",
+ "C4orf46.Gene_AceView\n",
+ "snawjarby.Gene_AceView\n",
+ "SNORD1C.Gene_AceView\n",
+ "cheymey.Gene_AceView\n",
+ "NRBP2.Gene_AceView\n"
+ ]
+ }
+ ],
"source": [
"columnsNamesArr = data_tr.columns.values\n",
"for i in range(10):\n",
@@ -732,7 +1360,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.6.8"
+ "version": "3.6.5"
}
},
"nbformat": 4,