## Working with files and folders ##

built in `os` module gives handy ways of wroking with files and folders.

In [1]:
import os

In [None]:
os.mkdir("test") #create directory

In [None]:
os.makedirs("test/nested/hello/x") # create full path if it does not exist

In [None]:
os.rmdir("test/nested/hello/x") #removes directory 

In [None]:
os.listdir(".ipynb_checkpoints/") #lists contents of directory

In [None]:
os.removedirs("test/nested/hello/") #removes complete path if all subdirectories are empty

In [None]:
os.getcwd()

In [None]:
os.mkdir("test")

In [None]:
os.chdir("test/")

In [None]:
os.getcwd()

In [None]:
with open("hello.py","w")as f:
    f.write("print('hello world!')")

In [None]:
os.chdir("../")

In [None]:
for path, dirs, files in os.walk("."):
    for f in files:
        print(os.path.join(path,f))

In [None]:
os.path.exists("hello.py")

In [None]:
os.path.basename("../advanced/testing.ipynb")

In [None]:
os.path.basename("/home/vikrant/programming")

In [None]:
os.path.dirname("/home/vikrant/programming")

In [None]:
os.path.isfile("hello.py")

In [None]:
os.path.isfile("test/")

In [None]:
os.path.isdir("test/")

### working with permissions ###

In [None]:
os.access("test/", os.R_OK)

In [None]:
import stat
os.chmod("test/",stat.S_IRUSR | stat.S_IWUSR )

In [None]:
os.access("test/", os.R_OK)

In [None]:
os.access("test/", os.W_OK)

In [None]:
os.access("test/", os.X_OK)

In [7]:
os.chmod("test/",stat.S_IREAD|stat.S_IEXEC)

In [None]:
!ls -ld test

In [None]:
os.lstat("test/")

In [None]:
stat.S_IMODE(os.lstat("test/").st_mode)

### print directory tree ###

In [45]:
import os
def dirtree(dirname, level=0):
    print("| "*level +  "|--" + dirname)
    for item in os.listdir(dirname):
        p = os.path.join(dirname, item)
        if os.path.isdir(p):
            dirtree(p, level+1)
        else:
            print("| "*(level+1)+ "|--" +item)
        

In [46]:
dirtree(".")

|--.
| |--./.ipynb_checkpoints
| | |--files_folders-checkpoint.ipynb
| | |--external_applications-checkpoint.ipynb
| | |--warmup-checkpoint.ipynb
| |--warmup.ipynb
| |--tasks.py
| |--outline.org
| |--tasks.py~
| |--c.conf
| |--person.yaml
| |--echo.py
| |--training.py~
| |--hello.py
| |--files_folders.ipynb
| |--fabfile.py
| |--external_applications.ipynb
| |--debug.py~
| |--fabfile.py~
| |--debug.py
| |--training.py
| |--x.conf
| |--fab.py~


In [None]:
os.path.getsize("./test/hello.py")

In [None]:
os.path.getsize(".")

In [None]:
for p,dirs,files in os.walk("."):
    for d in dirs:
        print(os.path.join(p,d))

** problem **
- Write a function to compute size of a directory. This should consider recursively size of all the files inside that directory.
- Write a function to find all ".log" files from given directory

In [None]:
def dirsize(dirname):
    s = 0#os.path.getsize(dirname)
    for p, dirs, files in os.walk(dirname):
        #s += sum(dirsize(os.path.join(p, d)) for d in dirs)
        s += sum(os.path.getsize(os.path.join(p, f)) for f in files)
    return s

In [None]:
dirsize("../")/1024/1024

In [22]:
def find(dirpath, ext="log"):
    for path, dirs, files in os.walk(dirpath):
        for f in files:
            if f.endswith("."+ext):
                yield os.path.join(path, f)
list(find("/var/log"))

['/var/log/auth.log',
 '/var/log/gpu-manager.log',
 '/var/log/Xorg.1.log',
 '/var/log/kern.log',
 '/var/log/boot.log',
 '/var/log/alternatives.log',
 '/var/log/Xorg.2.log',
 '/var/log/dpkg.log',
 '/var/log/Xorg.0.log',
 '/var/log/fontconfig.log',
 '/var/log/bootstrap.log',
 '/var/log/mintsystem.log',
 '/var/log/apt/term.log',
 '/var/log/apt/history.log',
 '/var/log/lightdm/x-0.log',
 '/var/log/lightdm/x-2.log',
 '/var/log/lightdm/seat0-greeter.log',
 '/var/log/lightdm/x-1.log',
 '/var/log/lightdm/lightdm.log',
 '/var/log/installer/casper.log']

### Watching files for modifications ###

In [4]:
os.path.getmtime("files_folders.ipynb")

1537799455.9950037

In [5]:
import time

In [6]:
time.ctime(os.path.getmtime("files_folders.ipynb"))

'Mon Sep 24 20:00:55 2018'

** problem **
- Write a function to find files modified within given timeframe in seconds _*hint: use time.time() to get current time.*_


In [20]:
import time
def modifiedwithin(dirpath, seconds):
    now = time.time()
    for path, dirs, files in os.walk(dirpath):
        for f in files:
            filepath = os.path.join(path, f)
            t = os.path.getmtime(filepath)
            if (now - t) <= seconds:
                yield filepath

In [18]:
list(modifiedwithin(".", 100))

['./outline.org', './files_folders.ipynb']

In [24]:
import time
def modifiedwithin_(files, seconds):
    now = time.time()
    for f in files:
        t = os.path.getmtime(f)
        if (now - t) <= seconds:
            yield f

find all log files in /var/log, modified in last 1000 seconds

In [25]:
logfiles = find("/var/log/","log")
list(modifiedwithin_(logfiles, 1000))

['/var/log/auth.log', '/var/log/kern.log']

### copying and moving files ###

In [3]:
import shutil

In [None]:
shutil.copy("./test/hello.py","/tmp/")

In [None]:
os.path.exists("/tmp/hello.py")

In [None]:
time.ctime(os.path.getmtime("/tmp/hello.py"))

In [None]:
shutil.copy2("./test/hello.py","/tmp/")

In [None]:
time.ctime(os.path.getmtime("/tmp/hello.py"))

In [13]:
shutil.copytree(".","/tmp/tree2")

'/tmp/tree2'

In [12]:
import stat
os.chmod("./test/hello.py",stat.S_IREAD)

In [16]:
dirtree("/tmp/tree2/")

|--/tmp/tree2/
| |--/tmp/tree2/test
| | |--hello.py
| |--outline.org
| |--external_applications.ipynb
| |--hello.py
| |--/tmp/tree2/.ipynb_checkpoints
| | |--warmup-checkpoint.ipynb
| | |--files_folders-checkpoint.ipynb
| |--files_folders.ipynb
| |--warmup.ipynb


### Comparing files ###

In [17]:
import filecmp
filecmp.cmp("./test/hello.py" ,"/tmp/tree2/hello.py")

True

In [20]:
filecmp.cmpfiles(".","/tmp/tree1", common=["hello.py"])

(['hello.py'], [], [])

** problem **
- Write a function to backup given folder. Assume that folder has huge data, so make sure to backup only new or modified files. don't touch already backed up files but not modified. backup function will be used to take system backup at periodic interval.

In [21]:
def backup(source, dest):
    pass

In [25]:
import fnmatch
fnmatch.filter(["test.py","hello.py","abc.txt"],"*.py")

['test.py', 'hello.py']

In [26]:
fnmatch.fnmatch("heelo.py","h*.py")

True

### Working with archives ###

In [27]:
shutil.get_archive_formats()

[('bztar', "bzip2'ed tar-file"),
 ('gztar', "gzip'ed tar-file"),
 ('tar', 'uncompressed tar file'),
 ('xztar', "xz'ed tar-file"),
 ('zip', 'ZIP file')]

In [33]:
shutil.make_archive("X","zip",root_dir="/tmp")

'/home/vikrant/programming/work/github/python-trainings/practical_python_master/X.zip'

###  Working various file formats ###

In [1]:
import yaml

In [11]:
person = {"name":"vmware","email":"person@vmware.com", "teams":["infra","cloud","HR"],"d":(1,2,3)}

In [22]:
with open("person.yaml","w") as f:
    f.write(yaml.dump(person))

In [13]:
!cat person.yaml

d: !!python/tuple [1, 2, 3]
email: person@vmware.com
name: vmware
teams: [infra, cloud, HR]


In [21]:
with open("person.yaml") as f:
    p = yaml.load(f)
    print(p)

{'d': (1, 2, 3), 'email': 'person@vmware.com', 'name': 'vmware', 'teams': ['infra', 'cloud', 'HR']}


In [15]:
import json

In [17]:
sp = json.dumps(person)

In [18]:
sp

'{"name": "vmware", "email": "person@vmware.com", "teams": ["infra", "cloud", "HR"], "d": [1, 2, 3]}'

In [19]:
json.loads(sp)

{'d': [1, 2, 3],
 'email': 'person@vmware.com',
 'name': 'vmware',
 'teams': ['infra', 'cloud', 'HR']}

In [24]:
import configparser

In [27]:
%%file x.conf
[section1]
a = 2
b = hello
c = 1.2

Overwriting x.conf


In [29]:
conf = configparser.ConfigParser()

In [30]:
conf.read("x.conf")

['x.conf']

In [32]:
conf.sections()

['section1']

In [36]:
section = conf['section1']

In [39]:
for i in section.items():
    print(i)

('a', '2')
('b', 'hello')
('c', '1.2')


In [40]:
c= configparser.ConfigParser()

In [41]:
c['header1'] = {"x":1,"y":2}

In [43]:
c.write(open("c.conf", "w"))

In [44]:
!cat c.conf

[header1]
x = 1
y = 2

