In [42]:
from datetime import datetime

In [5]:
from __future__ import division, print_function, unicode_literals

# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
np.random.seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "fundamentals"

def save_fig(fig_id, tight_layout=True):
    path = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID, fig_id + ".png")
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format='png', dpi=300)

# Ignore useless warnings (see SciPy issue #5998)
import warnings
warnings.filterwarnings(action="ignore", message="^internal gelsd")

In [14]:
def prepare_country_stats(oecd_bli, gdp_per_capita):
    oecd_bli = oecd_bli[oecd_bli["INEQUALITY"]=="TOT"]
    oecd_bli = oecd_bli.pivot(index="Country", columns="Indicator", values="Value")
    gdp_per_capita.rename(columns={"2015": "GDP per capita"}, inplace=True)
    gdp_per_capita.set_index("Country", inplace=True)
    full_country_stats = pd.merge(left=oecd_bli, right=gdp_per_capita,
                                  left_index=True, right_index=True)
    full_country_stats.sort_values(by="GDP per capita", inplace=True)
    remove_indices = [0, 1, 6, 8, 33, 34, 35]
    keep_indices = list(set(range(36)) - set(remove_indices))
    return full_country_stats[["GDP per capita", 'Life satisfaction']].iloc[keep_indices]

In [83]:
# make directory to the datasets
cur_directory=os.getcwd()

# make multiple folders
os.makedirs('datasets\\lifesat')

In [94]:
# make datapath
datapath = os.path.join(cur_directory,"datasets", "lifesat", "")
datapath

'C:\\Users\\wangc\\Documents\\GitHub\\Machine-Learning-with-Scikit-Learn\\datasets\\lifesat\\'

In [96]:
# Code example
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import sklearn.linear_model

# Load the data
oecd_bli = pd.read_csv(datapath + "oecd_bli_2015.csv", thousands=',')

In [97]:
oecd_bli.head(5)

Unnamed: 0,LOCATION,Country,INDICATOR,Indicator,MEASURE,Measure,INEQUALITY,Inequality,Unit Code,Unit,PowerCode Code,PowerCode,Reference Period Code,Reference Period,Value,Flag Codes,Flags
0,AUS,Australia,HO_BASE,Dwellings without basic facilities,L,Value,TOT,Total,PC,Percentage,0,units,,,1.1,E,Estimated value
1,AUT,Austria,HO_BASE,Dwellings without basic facilities,L,Value,TOT,Total,PC,Percentage,0,units,,,1.0,,
2,BEL,Belgium,HO_BASE,Dwellings without basic facilities,L,Value,TOT,Total,PC,Percentage,0,units,,,2.0,,
3,CAN,Canada,HO_BASE,Dwellings without basic facilities,L,Value,TOT,Total,PC,Percentage,0,units,,,0.2,,
4,CZE,Czech Republic,HO_BASE,Dwellings without basic facilities,L,Value,TOT,Total,PC,Percentage,0,units,,,0.9,,


### something about os module

In [19]:
# something about os module
# print(dir(os)) # show options all functions

'C:\\Users\\wangc\\Documents\\GitHub\\Machine-Learning-with-Scikit-Learn'

In [20]:
# current directory
os.getcwd()

'C:\\Users\\wangc\\Documents\\GitHub\\Machine-Learning-with-Scikit-Learn'

In [21]:
# change directory
os.chdir('C:\\Users\\wangc\\Documents\\GitHub')

In [23]:
os.chdir('C:\\Users\\wangc\\Documents\\GitHub\\Machine-Learning-with-Scikit-Learn')

In [24]:
os.getcwd()

'C:\\Users\\wangc\\Documents\\GitHub\\Machine-Learning-with-Scikit-Learn'

In [26]:
# list files in the folder
os.listdir()

['.git',
 '.ipynb_checkpoints',
 'chp1 linear models.ipynb',
 'Classification.ipynb',
 'datasets',
 'oecd_bli_2015.csv',
 'README.md']

In [46]:
# make new folder
os.mkdir('OS-Demo-2')
# delete the folder
os.removedirs('OS-Demo-2')

# rename
#os.rename('test.txt','demo.txt')

In [45]:
# get the file information
file_info=os.stat('demo.txt')
mod_time=os.stat('demo.txt').st_mtime

# print the readable time format from timestamp
print(datetime.fromtimestamp(mod_time))

2020-01-10 09:07:37.428385


In [50]:
for dirpath, dirnames, filenames in os.walk('C:\\Users\\wangc\\Documents\\GitHub'):
    print('Current Path:', dirpath)
    print('Directories:', dirnames)
    print('Files:', filenames)
    print('###################################################')

Current Path: C:\Users\wangc\Documents\GitHub
Directories: ['datastructure-and-algorithm', 'Machine-Learning-with-Scikit-Learn', 'Machine_Learning_R', 'pydata-book', 'python-data-analysis', 'python-mosh-coding']
Files: []
###################################################
Current Path: C:\Users\wangc\Documents\GitHub\datastructure-and-algorithm
Directories: ['.git', 'code', 'Moshcode']
Files: ['README.md']
###################################################
Current Path: C:\Users\wangc\Documents\GitHub\datastructure-and-algorithm\.git
Directories: ['hooks', 'info', 'logs', 'objects', 'refs']
Files: ['COMMIT_EDITMSG', 'config', 'description', 'FETCH_HEAD', 'HEAD', 'index', 'ORIG_HEAD', 'packed-refs']
###################################################
Current Path: C:\Users\wangc\Documents\GitHub\datastructure-and-algorithm\.git\hooks
Directories: []
Files: ['applypatch-msg.sample', 'commit-msg.sample', 'fsmonitor-watchman.sample', 'post-update.sample', 'pre-applypatch.sample', 'pre-co

###################################################
Current Path: C:\Users\wangc\Documents\GitHub\datastructure-and-algorithm\.git\objects\80
Directories: []
Files: ['49df4e272d4bb8dc148f9e084bb1a5a7f795d7', 'bde963170dfe7908462202c65d529f0660c322']
###################################################
Current Path: C:\Users\wangc\Documents\GitHub\datastructure-and-algorithm\.git\objects\81
Directories: []
Files: ['e1590ffd39b6012783f65c01e6f4107ef8c83f']
###################################################
Current Path: C:\Users\wangc\Documents\GitHub\datastructure-and-algorithm\.git\objects\82
Directories: []
Files: ['2f60b8fef3d846f624af6c5d759f663d48f83d', '5dcfb4ba626159ac3f480e2120db1b51f05fac', 'b9d213f8e00d7fc9c12d497db6f7591da3d9d3', 'e3710dbfe2f6daa6faa0b4b1aa145efd1dbc4a']
###################################################
Current Path: C:\Users\wangc\Documents\GitHub\datastructure-and-algorithm\.git\objects\83
Directories: []
Files: ['11a3844993efc4591af68b7f68eaae2f9fce95']


Directories: ['DataStructures']
Files: ['._DataStructures']
###################################################
Current Path: C:\Users\wangc\Documents\GitHub\datastructure-and-algorithm\code\__MACOSX\Data Structures - Part 2\DataStructures\out\production\DataStructures
Directories: ['com', 'META-INF']
Files: ['._com', '._META-INF']
###################################################
Current Path: C:\Users\wangc\Documents\GitHub\datastructure-and-algorithm\code\__MACOSX\Data Structures - Part 2\DataStructures\out\production\DataStructures\com
Directories: ['codewithmosh']
Files: ['._codewithmosh']
###################################################
Current Path: C:\Users\wangc\Documents\GitHub\datastructure-and-algorithm\code\__MACOSX\Data Structures - Part 2\DataStructures\out\production\DataStructures\com\codewithmosh
Directories: []
Files: ['._Array.class', '._ArrayQueue.class', '._AVLTree$AVLNode.class', '._AVLTree.class', '._CharFinder.class', '._Demo.class', '._Expression.class', 

Directories: []
Files: ['movies.dat', 'ratings.dat', 'README', 'users.dat']
###################################################
Current Path: C:\Users\wangc\Documents\GitHub\pydata-book\datasets\mta_perf
Directories: []
Files: ['parse.py', 'Performance_LIBUS.xml', 'Performance_LIBUS.xsd', 'Performance_LIRR.xml', 'Performance_LIRR.xsd', 'Performance_MNR.xml', 'Performance_MNR.xsd', 'Performance_MTABUS.xml', 'Performance_MTABUS.xsd', 'Performance_NYCT.xml', 'Performance_NYCT.xsd', 'Performance_TBTA.xml', 'Performance_TBTA.xsd']
###################################################
Current Path: C:\Users\wangc\Documents\GitHub\pydata-book\datasets\titanic
Directories: []
Files: ['genderclassmodel.csv', 'gendermodel.csv', 'test.csv', 'train.csv']
###################################################
Current Path: C:\Users\wangc\Documents\GitHub\pydata-book\datasets\usda_food
Directories: []
Files: ['database.json']
###################################################
Current Path: C:\Users\wang

In [71]:
os.environ.get('APPDATA')

'C:\\Users\\wangc\\AppData\\Roaming'

In [72]:
file_path1=os.environ.get('APPDATA') + '\\test.txt'
file_path2=os.path.join('C:\\Users\\wangc\\Documents\\GitHub','test.txt')

In [73]:
os.path.basename('temp\\test.txt')
os.path.dirname('temp\\test.txt')
os.path.exists('temp\\test.txt') # see if the file exists
os.path.isdir('temp\\test.txt') # see if this is a directory
os.path.isfile('temp\\test.txt') # see if this is a file
os.path.splitext('temp\\test.txt') # split extension

('temp\\test', '.txt')

In [70]:
# dir(os.path) # list all methods