In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
try:
    from . import constants_GDrive
except ImportError:
    import constants_GDrive


In [3]:
from pathlib import Path
from json import JSONDecodeError
import logging
# from time import time
from os import path
# import json
# from datetime import datetime, timezone
# from dateutil import parser
from functools import wraps


from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from google.auth import exceptions as google_exceptions
from ratelimit import limits, sleep_and_retry


In [4]:
logger = logging.getLogger(__name__)

In [5]:
class GDriveError(Exception):
    pass

In [6]:
# # wrap any call to the service in this decorator 
# # see: https://stackoverflow.com/a/36944992/5530152
# def credential_wrapper(method):
#     @wraps(method)
#     def _impl(self, *method_args, **method_kwargs):
#         # look at the example from google and rewrite this mess
#         if self.credentials.expired and self.credentials.refresh_token:
#             self.credentials.refresh(Request())
#         else:
#             flow = InstalledAppFlow.from_client_secrets_file(self.secrets, self.scopes)
#             creds = flow.run_local_server(port=0)
#             self.credentials = creds
#         # Save the credentials for the next run
#             try:
#                 with open('token.json', 'w') as token:
#                     token.write(creds.to_json())
#             except OSError as e:
#                 raise GDRiveError(f'error writing token file: {token} - {e}')
            
            
#         method_output = method(self, *method_args, **method_kwargs)
#         return method_output
#     return _impl

In [7]:
# wrap any call to the service in this decorator 
# see: https://stackoverflow.com/a/36944992/5530152
def credential_wrapper(method):
    @wraps(method)
    def _impl(self, *method_args, **method_kwargs):
        if not self.credentials or not self.credentials.valid:
            if self.credentials and self.credentials.expired and self.credentials.refresh_token:
                self.credentials.refresh(Request())
            else:
                flow = InstalledAppFlow.from_client_secrets_file(self.secrets, self.scopes)
                self.credentials = flow.run_local_server(port=0)
                
                # save the credentials for the next run
                try:
                    with open(self.token, 'w') as token_file:
                        token_file.write(self.credentials.to_json())
                except OSError as e:
                    raise GDriveError(f'error writing token file: {self.token} - {e}')
            self.build_service(self.credentials)
        method_output = method(self, *method_args, **method_kwargs)
        return method_output
    return _impl                    

In [16]:
class GDrive():
    def __repr__(self):
        return 'GDrive()'
    
    def __str__(self):
        return f'GDrive()'
    
    def __init__(self, secrets, scopes, cache='./', token='./token.json'):
        '''create a google drive interface for searching and returning file/folder information
        
        Args:
            secrets(Path): secrets json file obtained from https://console.cloud.google.com/cloud-resource-manager
            token(Path): file to cache auth information (typically within cache path)
        '''
        self.secrets = secrets
        self.scopes = scopes        
        self.token = token
        self.credentials = self.set_credentials(secrets=self.secrets, 
                                            scopes=self.scopes, 
                                            token=self.token)
        self.service = self.build_service(self.credentials)
        self.MIMETYPES = constants_GDrive.MIMETYPES
        self.CORPORA = constants_GDrive.CORPORA
        self.FILE_FIELDS = constants_GDrive.FILE_FIELDS
        self.FIELDS_DEFAULT = constants_GDrive.FIELDS_DEFAULT
        self.PAGESIZE = constants_GDrive.PAGESIZE
        
    
    @staticmethod
    def set_credentials(secrets, token, scopes, force=False):
        token = Path(token).expanduser()
        secrets = Path(secrets).expanduser()
        creds = None

        if token.exists():
            creds = Credentials.from_authorized_user_file(token, scopes)


        if not creds or not creds.valid or force:
            if creds and creds.expired and creds.refresh_token:
                creds.refresh(Request())    
            else:
                flow = InstalledAppFlow.from_client_secrets_file(secrets, scopes)
                creds = flow.run_local_server(port=0)
            # Save the credentials for the next run
            try:
                with open('token.json', 'w') as token:
                    token.write(creds.to_json())
            except OSError as e:
                raise GDRiveError(f'error writing token file: {token} - {e}')


        return creds    
# 
    
    @staticmethod
    def build_service(credentials):
        try:
            service  = build('drive', 'v3', credentials=credentials)
        except google_exceptions.GoogleAuthError as e:
            raise GDriveError(f'error building credentials: {e}')
        return service
    
                
    @property
    def token(self):
        '''token file'''
        return self._token
        
    @token.setter
    def token(self, t_path):
        t_path = Path(t_path)        
        self._token = t_path
        
        
    def _interface(self, name=None, trashed=False, mimeType=None, fuzzy=True, 
               modifiedTime=None, parents=None, dopperator='>',
               fields = [], forcefields=False,
               corpora='user', orderBy='createdTime', driveId='',):
        
        query_build = {
            'name': (name, f'name {"contains" if fuzzy else "="} "{name}"'),
            'trashed': (trashed, f'trashed={trashed}'),
            'mimeType': (mimeType, f'mimeType="{self.MIMETYPES[mimeType] if mimeType in self.MIMETYPES else ""}"'),
            'parents': (parents, f'"{parents}" in parents'),
            'modifiedTime': (modifiedTime, f'modifiedTime{dopperator}"{modifiedTime}"')
        }
        
        query = [v[1] for k, v in query_build.items() if v[0]]
        
        if len(fields) < 1:
            fields = self.FIELDS_DEFAULT
        fields = set(fields)
        known_fields = []
        for f in fields:
            if f not in self.FILE_FIELDS:
                if forcefields:
                    logger.warning(f'unknown return field: {f}')
                    known_fields.append(f)
                else:
                    raise GDriveError(f'unknown return field: {f}')
            else:
                known_fields.append(f)
        fields_string = f'nextPageToken, files({",".join(known_fields)})'
        
        if driveId:
            corpora = 'drive'
        if corpora not in self.CORPORA:
            raise GDriveError(f'unknown `corpora` value: {corpora}')
        else:
            includeItemsFromAllDrives = self.CORPORA[corpora]['params']['includeItemsFromAllDrives']
            supportsAllDrives = self.CORPORA[corpora]['params']['supportsAllDrives']
            
        q = ' and '.join(query)
        logger.debug(f'QUERY STRING: {q}')
        
        return {'q': q,
                'corpora': corpora,
                'includeItemsFromAllDrives': includeItemsFromAllDrives,
                'supportsAllDrives': supportsAllDrives,
                'fields_string': fields_string,
                'driveId': driveId,
                }
    
    @credential_wrapper
    @sleep_and_retry
    @limits(calls=constants_GDrive.CALL_LIMIT, period=constants_GDrive.CALL_PERIOD)
    def _list(self, q='', corpora='', includeItemsFromAllDrives=False,
             supportsAllDrives='', fields_string='', driveId='', 
              pageToken='', pageSize=constants_GDrive.PAGESIZE):
        logger.debug(f'fettching page of {pageSize} results with query {q}')
        try:
            results = self.service.files().list(q=q,
                                                corpora=corpora,
                                                includeItemsFromAllDrives=includeItemsFromAllDrives,
                                                supportsAllDrives=supportsAllDrives,
                                                fields=fields_string,
                                                driveId=driveId,
                                                pageSize=pageSize,
                                                pageToken=pageToken
                                                ).execute()
        except HttpError as e:
            raise GDriveError(f'error searching: {type(e)}: {e}')

        return results

        
    def search(self, name=None, trashed=False, mimeType=None, fuzzy=True, 
               modifiedTime=None, parents=None, dopperator='>',
               fields = [], forcefields=False,
               corpora='user', orderBy='createdTime', driveId='',
               pageSize=constants_GDrive.PAGESIZE, complete=True,
               pageToken=''):
        '''search for objects in google drive by name

        Args:
            name(str): string to search for
            trashed(bool): search in trash when true
            mimeType(str): short mimeType (see MIMETYPES property)
            fuzzy(bool): true: `name contains "value"` false: `name = "value"`
            modifiedTime(str): yyyy-mm-dd string
            dopperator(str): >, < for use with modifiedTime
            parents(str): folder to search within
            fields(list of str): fields to return (see FILE_FIELDS property)
            forcefields(bool): true: use unknown fields, false: reject fields not in FILE_FIELDS
            corpora(str): locations to search (see CORPORA property)
            orderBy(str): order results by (see https://developers.google.com/drive/api/v3/reference/files/list)
            driveId(str): search this shared drive
            pageSize(int): number of results to return per page (default 300)
            complete(bool): true: exhaust all nextPageTokens

        Retruns dict of resutls
            '''

        
        
        
#         query_build = {
#             'name': (name, f'name {"contains" if fuzzy else "="} "{name}"'),
#             'trashed': (trashed, f'trashed={trashed}'),
#             'mimeType': (mimeType, f'mimeType="{self.MIMETYPES[mimeType] if mimeType in self.MIMETYPES else ""}"'),
#             'parents': (parents, f'"{parents}" in parents'),
#             'modifiedTime': (modifiedTime, f'modifiedTime{dopperator}"{modifiedTime}"')
#         }

#         query = [v[1] for k, v in query_build.items() if v[0]]

#         if len(fields) < 1:
#             fields = self.FIELDS_DEFAULT
#         fields = set(fields)

#         known_fields = []
#         for f in fields:
#             if f not in self.FILE_FIELDS:
#                 if forcefields:
#                     logger.warning(f'unknown return field: {f}')
#                     known_fields.append(f)
#                 else:
#                     raise GDriveError(f'unknown return field: {f}')
#             else:
#                 known_fields.append(f)

#         fields_string = f'nextPageToken, files({",".join(known_fields)})'


#         if driveId:
#             corpora = 'drive'

#         if corpora not in self.CORPORA:
#             raise GDriveError(f'unknown `corpora` value: {corpora}')
#         else:
#             includeItemsFromAllDrives = self.CORPORA[corpora]['params']['includeItemsFromAllDrives']
#             supportsAllDrives = self.CORPORA[corpora]['params']['supportsAllDrives']

#         q = ' and '.join(query)
#         logger.debug(f'QUERY STRING: {q}')
        
        
    
        interface = self._interface(name=name, trashed=trashed, mimeType=mimeType, fuzzy=fuzzy, 
               modifiedTime=modifiedTime, parents=parents, dopperator=dopperator,
               fields = fields, forcefields=forcefields,
               corpora=corpora, orderBy=orderBy, driveId=driveId)
        
        file_list = []
        search_result = self._list(pageToken=pageToken, **interface)
                    
        token = search_result.get('nextPageToken', False)
        file_list.extend(search_result.get('files', []))
        
        while token and complete:
            logger.debug(f'processing additional pages of results')
            search_result = self._list(pageToken=token, **interface) # need to pass pagetoken=token and **interface
            token = search_result.get('nextPageToken', False)
            file_list.extend(search_result.get('files', []))


        logger.debug(f'{len(file_list)} total matches returned')
        


        return file_list
        
    def ls(self, *args, **kwargs):
        '''print lis of files in a google drive using any of the search properties'''

        result = self.search(*args, **kwargs)
        for file in result.get('files', []):
            print(('name: {f[name]}, ID:{f[id]}, mimeType:{f[mimeType]}'.format(f=file)))

        return result
    
    def add(self, name, parents=None, fields=['webViewLink, mimeType, id']):
        '''add a file to google drive
        Args:
            name(str): name of file
            parents(str): parent folder to place item in
            fields(list of str): fields to return after uploading file
            
        Returns:
            list of str upon success
        '''
        
        if not parents:
            parents = ''
        

In [17]:
import constants

In [18]:
sec = '../secrets/client_secret_910311278281-bh8qk3kmgk0veri3v8en260e76ipafpj.apps.googleusercontent.com.json'
d = GDrive(secrets=sec, scopes=constants.SCOPES)

INFO:googleapiclient.discovery_cache:file_cache is only supported with oauth2client<4.0.0


In [None]:
iface = d._interface(parents='0B9WTleJ1MzaYT2pieWNXYkZtZm8', trashed=False, fields=['parents', 'id', 'name', 'mimeType'])

In [19]:
r = d.search(name='Job Description', fields=['parents', 'id', 'name', 'mimeType'], pageSize=300)

DEBUG:__main__:QUERY STRING: name contains "Job Description"
DEBUG:__main__:fettching page of 300 results with query name contains "Job Description"
DEBUG:__main__:7 total matches returned


In [20]:
r

[{'id': '1tu9MiR2744cskSukHFn4zW_axxVw7tIXDzllvguSW_A',
  'name': 'IT Security Manager Job Description',
  'mimeType': 'application/vnd.google-apps.document',
  'parents': ['0B9WTleJ1MzaYMFBvNko3M0RiY0E']},
 {'id': '1ROfMqpg2p6b44_NvLm-TU7Qdi_6Fuwh-fZyNSSUb_jc',
  'name': 'Notes IT Security Job Description',
  'mimeType': 'application/vnd.google-apps.document',
  'parents': ['0B9WTleJ1MzaYMFBvNko3M0RiY0E']},
 {'id': '1KtyNP26Zh2P3XifmT7eCevUaV45P-6E4',
  'name': 'HS International Baccalaureate Coordinator 2020-2021 Job Description.pdf',
  'mimeType': 'application/pdf',
  'parents': ['1It1PDdpJXRFBx_7nuHmPflYYFqUmaBW_']},
 {'id': '0Byt14NjSl0Z7d1R4YXozc3BjdmpzLXMtdXVoU3dnOXkwUGhV',
  'name': 'EDEP list job descriptions 19-20.pdf',
  'mimeType': 'application/pdf'},
 {'id': '14-a9vySk4Vpp-Yj1fBGXVHsA2f8nmuUqGLVonvs9Pn8',
  'name': 'Proposal - ECC Techology Integrator Job Description 2017/18 ',
  'mimeType': 'application/vnd.google-apps.document'},
 {'id': '0B3JoU6opCIINTWxhM3lNemdvWFE',
 

In [None]:
r = d._list(**iface)

In [None]:
len(r['files'])

In [None]:
def recurse_folders(drive, parents, 
                    fields=['parents', 'id', 'name', 'mimeType', 'owners', 'modifiedTime', 'webViewLink', 'parents'], 
                    file_list=[], skipped=[], depth=0):
    '''recursively find all files in a google drive folder'''
    if depth == 0:
        file_list = []
        skipped = []
    logger.info(f'recursion depth: {depth}')
    try:
        result = drive.search(parents=parents, fields=fields)
    except GDriveError as e:
        logger.error(f'error accessing google drive: {e}')
        skipped.append(parents)
        result = {}
        
    for f in result:
        if drive.MIMETYPES['folder'] == f.get('mimeType'):
            return_files, return_skipped = recurse_folders(drive=drive, parents=f['id'], 
                                         fields=fields, 
                                         file_list=file_list,
                                         skipped=skipped,
                                         depth=depth+1)
            file_list + return_files
            skipped + return_files
        else:
            file_list.append(f)
    return (file_list, skipped)
            

In [None]:
l = recurse_folders(d, "0B9WTleJ1MzaYMFBvNko3M0RiY0E")

In [None]:
l

In [None]:
# def set_credentials(secret, token, scopes):
#     token = Path(token).expanduser()
#     secret = Path(secret).expanduser()
#     creds = None
    
#     if token.exists():
#         creds = Credentials.from_authorized_user_file(token, scopes)
        
    
#     if not creds or not creds.valid:
#         if creds and creds.expired and creds.refresh_token:
#             creds.refresh(Request())    
#         else:
#             flow = InstalledAppFlow.from_client_secrets_file(secret)
#             creds = flow.run_local_server(port=0)
#         # Save the credentials for the next run
#         try:
#             with open('token.json', 'w') as token:
#                 token.write(creds.to_json())
#         except OSError as e:
#             raise GDRiveError(f'error writing token file: {token} - {e}')
            
    
#     return creds
            

In [None]:
logging.basicConfig(level=logging.DEBUG)
logger.debug('foo')

In [None]:
class DC():
    '''dummy class for developing class functions'''
    pass
self = DC()
# self.mimetypes = constants_GDrive.MIMETYPES

In [None]:
# def recurseFolders(myDrive, parents="", fieldNames='parents, id, name', fileList=[], skipped=[], depth=0):
#     if depth == 0:
#         fileList = []
#         skipped = []
#     logger.info('depth: ', depth)
#     try:
#         result = myDrive.search(parents=parents, fields=fieldNames)
#     except GDriveError as e:
#         logger.error(e)
#         skipped.append(parents)
#     for file in result['files']:
#         if file['mimeType'] == 'application/vnd.google-apps.folder':
#             returnVals = recurseFolders(myDrive=myDrive, parents=file['id'], fieldNames=fieldNames, fileList=fileList, 
#                                         skipped=skipped, depth=depth+1)
#             fileList + returnVals[0]
#             skipped + returnVals[1]
#         else:
#             fileList.append(file)
    
#     return(fileList, skipped)

In [None]:
# def check_auth(f):
#     def wrapper(*args):
#         print('args: ', args)
#         print(dir(args))
#         return f(*args)
#     return wrapper

# class GD(object):
#     def __init__(self):
#         self.authorized = False
    
#     @check_auth
#     def get(self):
#         print( 'get')


In [None]:
# from functools import wraps

# def wrapper(method):
#     @wraps(method)
#     def _impl(self, *method_args, **method_kwargs):
#         if self.auth == False:
#             print('wrapper: not authorized!')
#             self.auth = 77
#         method_output = method(self, *method_args, **method_kwargs)
#         return method_output
#     return _impl

# class Foo:
#     def __init__(self):
#         self.auth = False
#         self.list = []
        
#     @wrapper
#     def bar(self, word):
#         self.list.append(word)
#         return self.list

# f = Foo()
# f.bar("kitty")


In [None]:
def foof(a, **kwargs):
    print(a)
foof(**{'a': 'snek','b': 'trash panda'})

In [10]:
logging.basicConfig(level=logging.DEBUG)
logging.root.setLevel('INFO')
logger = logging.getLogger(__name__)
logger.setLevel('DEBUG')

In [11]:
logger.debug('foo')

DEBUG:__main__:foo


In [None]:
!jupyter-nbconvert --to python --template python_clean GDrive.ipynb