In [2]:
import os
import logging
import tqdm
import tidydrive as td

from googleapiclient.discovery import build

logger = logging.getLogger()
handler = logging.StreamHandler()
formatter = logging.Formatter(
        '%(asctime)s %(name)-12s %(levelname)-8s %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)
logger.setLevel(logging.DEBUG)


auth.authenticate_user()
drive_service = build('drive', 'v3')

ImportError: No module named 'googleapiclient'

## Functions for downloading/uploading large files

In [0]:
import io
import sys
import logging

from apiclient.http import MediaIoBaseDownload, MediaFileUpload
from googleapiclient import errors


def retry_wrapper(default_retries=5, default_catch_codes=[403, 503]):
    '''
    A doble wrapper to make a function retry defautl_retries if a code error is found.
    (This is to prevent things from breaking if google resources are temporally unavailable.
    :param default_retries:
    :param default_catch_codes:
    :return:
    '''
    def wrap(service_function):
        def persistent_function(*args, **kwargs):
            if 'retries' in kwargs.keys():
                retries = kwargs['retries']
            else:
                retries = default_retries
            if 'codes' in kwargs.keys():
                catch_codes = default_catch_codes

            logger.debug('retries {}'.format(retries))
            for n in range(retries):
                try:
                    return_value = service_function(*args, **kwargs)
                    return return_value
                except errors.HttpError as err:
                    error = json.loads(err.content)
                    if error.get('code') in catch_codes:
                        logger.debug('Error {0}, waiting to retry {1}'.format(error.get('code'), n))
                        time.sleep(2**n)
                    else:
                        raise
            logger.warning('Critical error')
            return None
        return persistent_function
    return wrap
  
def md5(file_path):
    hash_md5 = hashlib.md5()
    with open(file_path, "rb") as f:
        for chunk in iter(lambda: f.read(4096), b""):
            hash_md5.update(chunk)
    return hash_md5.hexdigest()
  

def get_file_item(service, file_id, retries=5):
    for n in range(retries):
        try:
            file_item = service.files().get(fileId=file_id, fields='id, name, size').execute()
            return file_item
        except errors.HttpError as e:
            error = json.loads(e.content)
            if error.get('code') == 403 or error.get('code') == 503:
                logger.warning('Error  {}'.format(error.get('errors')[0].get('reason')))
                time.sleep((2**n))
            else:
                raise
    logger.warning('Error in get_file_item')
    return None


def download_chunk(downloader, retries=5):
    for n in range(retries):
        try:
            status, done = downloader.next_chunk()
            return status, done
        except errors.HttpError as e:
            error = json.loads(e.content)
            if error.get('code') == 403 or error.get('code') == 503 or error.get('code') == 500:
                logger.warning('Error  {}'.format(error.get('errors')[0].get('reason')))
                time.sleep((2**n))
            else:
                raise

def download_file(service, file_id, dest_path, retries=5, chunk_size=1048576):
    file_obj = get_file_item(service, file_id)
    file_name = file_obj['name']
    print(file_obj)
    logger.info('Downloading file {} to folder {}'.format(file_name, dest_path))
    request = service.files().get_media(fileId=file_id)
    fh = io.FileIO(os.path.join(dest_path, file_name), 'wb')
    downloader = MediaIoBaseDownload(fh, request, chunksize=chunk_size)
    done = False
    total_chunks = int(file_obj['size'])//chunk_size
    sys.stdout.flush()
    progress_bar = tqdm.tqdm(total=total_chunks)
    while done is False:
        try:
            status, done = download_chunk(downloader, retries=retries);
            progress_bar.update(1);
        except errors.HttpError as err:
            print(err)
            if int(err.resp['status']) == 503:
                logger.warning('Sevice Unavailable, retrying')
            else:
                raise

    logger.info('Download complete')
    return True
  

In [3]:
def path_parts(path):
  path_parts = []
  while(True):
    path, folder = os.path.split(os.path.abspath(path))
    if (folder == ''):
      break
    path_parts.append(folder)
  return path_parts[::-1]

def make_query(query_par):
  q = ""
  if query_par['parents']:
    q += "'{}' in parents".format(query_par['parents'])
  del query_par['parents']
  
  if query_par['trashed']:
    q += " and trashed={}".format(query_par['trashed'])
  del query_par['trashed']
  
  q_terms = ([" and {0}='{1}'".format(k, v) for k, v in query_par.items() if v])
  return {'q': ''.join([q] + q_terms)}
    

class TidyDrive:
  def __init__(self, service):
    self.service = service
    self.query_fields = "files(id, name, mimeType, parents, size)"
    self.default_query_par = {'parents': None,
                         'trashed': 'false',
                         'title': None,
                          'type': None,
                          'pattern': None,
                          'id': None
                          }
 
  def get_single_child(self, name, parent_id='root'):
    query = "parents='{0}' and trashed=false and name='{1}'".format(parent_id, name)
    results = self.service.files().list(q=query, 
                                     fields=self.query_fields).execute()
    list_found = items = results.get('files', [])
    if len(list_found) > 1:
      raise RuntimeError('Found more than 1 file with the name')
    return list_found
  
  def get_by_path(self, path, parent_id='root'):
  # gets a path relative to a parent_id
  # returns a query result
    path_list = path_parts(os.path.abspath(path))
  
    for name in path_list:
      #print(parent_id)
      found_in_parent = self.get_single_child(name, parent_id)
      # check that the size of found is exactly 1
      parent_id = found_in_parent[0]['id']

    #print(found_in_parent[0]['id'])
    return found_in_parent[0]

  def isdir(self, file_obj):
    return file_obj['mimeType'] == 'application/vnd.google-apps.folder'
  
  def size(self, file_obj):
    raise NotImplementedError
    return file_obj['fileSize']
    
  def listdir(self, file_obj):
    if not (self.isdir(file_obj) or file_obj['id'] == 'root'):
      raise ValueError('Not a dir {}'.format(file_obj))
    query = "parents='{0}' and trashed=false".format(file_obj['id'])
    results = self.service.files().list(q=query, 
                                     fields=self.query_fields).execute()
    list_found = items = results.get('files', [])
    #return list_found
    return list(map(lambda x: x['name'], list_found))
  
  def download(self, file_obj, dest_path):
    return download_file(self.gauth.service, file_obj['id'], dest_path, retries=5)
  
  def mkdir(self, parent_file_obj, name):
    if not (self.isdir(parent_file_obj) or parent_file_obj['id'] == 'root'):
      raise ValueError('Not a dir {}'.format(parent_file_obj))
    
    if len(self.get_single_child(name, parent_id=parent_file_obj['id'])) > 0:
      raise RuntimeError('File already exists {}'.format(name))
    
    fold_meta = {'name': name,
           'parents': [parent_file_obj['id']],
            'mimeType': 'application/vnd.google-apps.folder'
           }

    new_file = self.service.files().create(body=fold_meta, fields='id').execute()
    return new_file
  
  def move(self, source_file_obj, dest_file_obj):
    if not (self.isdir(dest_file_obj) or dest_file_obj['id'] == 'root'):
      raise ValueError('Not a dir {}'.format(dest_file_obj))
    file = self.service.files().get(fileId=source_file_obj['id'],
                                 fields='parents').execute();
    previous_parents = ",".join(file.get('parents'))
    file = self.service.files().update(fileId=source_file_obj['id'],
                                        addParents=dest_file_obj['id'],
                                        removeParents=previous_parents,
                                        fields='id, parents').execute()
    return file
   
one_drive = TidyDrive(drive_service)
plist = one_drive.get_single_child('stim_data', '0BxJutEqkpkV4dkFkNjVmbFgwX2M')

#one_drive.drive_find(parents='root', type='folder')

2018-02-08 22:08:56,756 googleapiclient.discovery INFO     URL being requested: GET https://www.googleapis.com/drive/v3/files?q=parents%3D%270BxJutEqkpkV4dkFkNjVmbFgwX2M%27+and+trashed%3Dfalse+and+name%3D%27stim_data%27&fields=files%28id%2C+name%2C+mimeType%2C+parents%2C+size%29&alt=json
2018-02-08 22:08:56,760 google_auth_httplib2 DEBUG    Making request: POST https://accounts.google.com/o/oauth2/token


In [5]:
one_drive.mkdir(plist[0], 'tuvieja2')
one_drive.listdir(plist[0])

2018-02-08 22:32:22,310 googleapiclient.discovery INFO     URL being requested: GET https://www.googleapis.com/drive/v3/files?q=parents%3D%270BxJutEqkpkV4bXZNaklSbm1wLUk%27+and+trashed%3Dfalse+and+name%3D%27tuvieja2%27&fields=files%28id%2C+name%2C+mimeType%2C+parents%2C+size%29&alt=json
2018-02-08 22:32:22,537 googleapiclient.discovery INFO     URL being requested: POST https://www.googleapis.com/drive/v3/files?fields=id&alt=json
2018-02-08 22:32:23,067 googleapiclient.discovery INFO     URL being requested: GET https://www.googleapis.com/drive/v3/files?q=parents%3D%270BxJutEqkpkV4bXZNaklSbm1wLUk%27+and+trashed%3Dfalse&fields=files%28id%2C+name%2C+mimeType%2C+parents%2C+size%29&alt=json


['tuvieja2', 'tuvieja', 'z017', 'z020', 'z007']

In [6]:
one_elem = one_drive.get_by_path('/colab_play/folder_a/raw.rhd')
download_file(one_drive.service, one_elem['id'], os.path.abspath('.cache'))

os.listdir(os.path.abspath('.cache'))

2018-02-08 22:32:32,260 googleapiclient.discovery INFO     URL being requested: GET https://www.googleapis.com/drive/v3/files?q=parents%3D%27root%27+and+trashed%3Dfalse+and+name%3D%27colab_play%27&fields=files%28id%2C+name%2C+mimeType%2C+parents%2C+size%29&alt=json
2018-02-08 22:32:32,462 googleapiclient.discovery INFO     URL being requested: GET https://www.googleapis.com/drive/v3/files?q=parents%3D%271ByVIQVo9ZEEVzAj8GKXbuphyH5ropcej%27+and+trashed%3Dfalse+and+name%3D%27folder_a%27&fields=files%28id%2C+name%2C+mimeType%2C+parents%2C+size%29&alt=json
2018-02-08 22:32:32,634 googleapiclient.discovery INFO     URL being requested: GET https://www.googleapis.com/drive/v3/files?q=parents%3D%271o2sa2WwUeJuzL7bRGuvVO_Gqaeuhnc0d%27+and+trashed%3Dfalse+and+name%3D%27raw.rhd%27&fields=files%28id%2C+name%2C+mimeType%2C+parents%2C+size%29&alt=json
2018-02-08 22:32:32,799 googleapiclient.discovery INFO     URL being requested: GET https://www.googleapis.com/drive/v3/files/1lLeLxvxlSExetN3BJ3o8YV

{'id': '1lLeLxvxlSExetN3BJ3o8YVpVIm7tKhLT', 'name': 'raw.rhd', 'size': '17948002'}


18it [00:12,  1.47it/s]                        2018-02-08 22:32:45,348 root         INFO     Download complete


['raw.rhd', 'pip', 'matplotlib']

In [7]:
def upload_file(service, parent_obj, file_path, retries=5, chunk_size=1048576):
  
  file_size = os.path.getsize(file_path)

  file_metadata = {'name': os.path.split(file_path)[-1], 
                  'parents': [parent_obj['id']],
                  'content-length':chunk_size,
                  'content-range': '*/{}'.format(file_size)}
                            
  media = MediaFileUpload(local_file,
                         resumable=True,
                         chunksize=chunk_size)

  request = one_drive.service.files().create(body=file_metadata, 
                                             media_body=media)
  done = False
  total_chunks = int(file_obj['size']) // chunk_size
    sys.stdout.flush()
    progress_bar = tqdm.tqdm(total=total_chunks)
  bytes_in = 0
  response = None
  sys.stdout.flush()
  print('file size {}'.format(file_size))
  #progress_bar = tqdm.tqdm(total=file_size)
  while response is None:
    try:
      print('byts in {}'.format(bytes_in))
      file_metadata['content_range'] = '{0}-{1}/{2}'.format(bytes_in, 
                                                            bytes_in + chunk_size,
                                                           file_size)
      file_metadata['content-length'] = '{0}'.format(chunk_size)
      status, response = request.next_chunk()
      bytes_in = bytes_in + chunk_size
      chunk_size = min(chunk_size, file_size - bytes_in)
      #progres_bar.update(chunk_size)
    except:
      #raise
      print('aborted')
      return request, status, response
  
  print("Upload Complete!")
  return request, status, response



local_file = '.cache/raw.rhd'
parent_obj = one_drive.get_by_path('/colab_play')

req, st, resp = upload_file(one_drive.service, parent_obj, local_file)


2018-02-08 22:33:07,453 googleapiclient.discovery INFO     URL being requested: GET https://www.googleapis.com/drive/v3/files?q=parents%3D%27root%27+and+trashed%3Dfalse+and+name%3D%27colab_play%27&fields=files%28id%2C+name%2C+mimeType%2C+parents%2C+size%29&alt=json
2018-02-08 22:33:07,664 googleapiclient.discovery INFO     URL being requested: POST https://www.googleapis.com/upload/drive/v3/files?alt=json&uploadType=resumable


file size 17948002
byts in 0
byts in 1048576
byts in 2097152
byts in 3145728
byts in 4194304
byts in 5242880
byts in 6291456
byts in 7340032
byts in 8388608
byts in 9437184
byts in 10485760
byts in 11534336
byts in 12582912
byts in 13631488
byts in 14680064
byts in 15728640
byts in 16777216
byts in 17825792
Upload Complete!


In [23]:
req.to_json()

'{"uri": "https://www.googleapis.com/upload/drive/v3/files?alt=json&uploadType=resumable", "method": "POST", "body": "{\\"name\\": \\"raw.rhd\\", \\"parents\\": [\\"1ByVIQVo9ZEEVzAj8GKXbuphyH5ropcej\\"], \\"content-length\\": 1048576, \\"content-range\\": \\"*/17948002\\"}", "headers": {"accept": "application/json", "accept-encoding": "gzip, deflate", "user-agent": "google-api-python-client/1.6.5 (gzip)", "content-type": "application/json"}, "methodId": "drive.files.create", "resumable": "{\\"_filename\\": \\".cache/raw.rhd\\", \\"_mimetype\\": \\"application/octet-stream\\", \\"_chunksize\\": 1048576, \\"_resumable\\": true, \\"_size\\": 17948002, \\"_class\\": \\"MediaFileUpload\\", \\"_module\\": \\"googleapiclient.http\\"}", "response_callbacks": [], "_in_error_state": true, "body_size": 127, "resumable_uri": "https://www.googleapis.com/upload/drive/v3/files?alt=json&uploadType=resumable&upload_id=AEnB2Ur3dYaoS00N4bsu9rxj8lOTDgmT8daGZhUOseN2kDnlZZHwv3K_P5o6wEP5jybkRI-dAofGwQ9WjLAVS

In [37]:
fls = one_drive.service.files().create()

2018-01-27 03:14:46,713 googleapiclient.discovery INFO     URL being requested: POST https://www.googleapis.com/drive/v3/files?alt=json


In [9]:
file_metadata = {'content-length':0,
                'content-range': '*/{}'.format(17948002)}
fls = one_drive.service.files().create(body=file_metadata)
fls.resumable_uri = req.resumable_uri
fls.to_json()
r = fls.execute()

2018-01-27 03:22:01,452 googleapiclient.discovery INFO     URL being requested: POST https://www.googleapis.com/drive/v3/files?alt=json


In [10]:
r

{'id': '1GzyTfYotcuZ2_Pnql54-hp0RNz3DR0zk',
 'kind': 'drive#file',
 'mimeType': 'application/octet-stream',
 'name': 'Untitled'}

In [77]:
file_metadata = {'name': 'tuvieja.wav', 
                'parents': [parent_obj['id']],
                'content-length':local_file_size,
                'content-range': '*/{}'.format(local_file_size)}
                            
media = MediaFileUpload(local_file,
                       resumable=True,
                       chunksize=chunk_size)

request = one_drive.service.files().create(body=file_metadata, 
                                           media_body=media)

2018-01-27 02:05:26,356 googleapiclient.discovery INFO     URL being requested: POST https://www.googleapis.com/upload/drive/v3/files?alt=json&uploadType=resumable


In [0]:
fls.create?

In [0]:
re_request = one_drive.service.files().create(body={}, 
                                           media_body=media)