In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
try:
    from . import constants_GDrive
except ImportError:
    import constants_GDrive


In [13]:
from pathlib import Path
from json import JSONDecodeError
import logging
# from time import time
from os import path
import json
from datetime import datetime, timezone
from dateutil import parser

from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from google.auth import exceptions as google_exceptions
from ratelimit import limits, sleep_and_retry


In [9]:
logger = logging.getLogger(__name__)

In [10]:
class GDriveError(Exception):
    pass

In [232]:
# wrap any call to the service in this decorator 
def credential_wrapper(method):
    @wraps(method)
    def _impl(self, *method_args, **method_kwargs):
        if self.credentials.expired and self.credentials.refresh_token:
            creds.refresh(Request())    
        else:
            flow = InstalledAppFlow.from_client_secrets_file(secrets)
            creds = flow.run_local_server(port=0)
        # Save the credentials for the next run
        try:
            with open('token.json', 'w') as token:
                token.write(creds.to_json())
        except OSError as e:
            raise GDRiveError(f'error writing token file: {token} - {e}')
            
        method_output = method(self, *method_args, **method_kwargs)
        return method_output
    return _impl

In [233]:
class GDrive():
    def __repr__(self):
        return 'GDrive()'
    
    def __str__(self):
        return f'GDrive()'
    
    def __init__(self, secrets, scopes, cache='./', token='./token.json'):
        '''create a google drive interface for searching and returning file/folder information
        
        Args:
            secrets(Path): secrets json file obtained from https://console.cloud.google.com/cloud-resource-manager
            token(Path): file to cache auth information (typically within cache path)
        '''
        self.secrets = secrets
        self.scopes = scopes        
        self.token = token
        self.credentials = self.set_credentials(secrets=self.secrets, 
                                            scopes=self.scopes, 
                                            token=self.token)
        self.service = self.build_service(self.credentials)
        self.MIMETYPES = constants_GDrive.MIMETYPES
        self.CORPORA = constants_GDrive.CORPORA
        self.FILE_FIELDS = constants_GDrive.FILE_FIELDS
        self.FIELDS_DEFAULT = constants_GDrive.FIELDS_DEFAULT
        self.PAGESIZE = constants_GDrive.PAGESIZE
        
    
    @staticmethod
    def set_credentials(secrets, token, scopes):
        token = Path(token).expanduser()
        secrets = Path(secrets).expanduser()
        creds = None

        if token.exists():
            creds = Credentials.from_authorized_user_file(token, scopes)


        if not creds or not creds.valid:
            if creds and creds.expired and creds.refresh_token:
                creds.refresh(Request())    
            else:
                flow = InstalledAppFlow.from_client_secrets_file(secrets)
                creds = flow.run_local_server(port=0)
            # Save the credentials for the next run
            try:
                with open('token.json', 'w') as token:
                    token.write(creds.to_json())
            except OSError as e:
                raise GDRiveError(f'error writing token file: {token} - {e}')


        return creds    
# 
    
    @staticmethod
    def build_service(credentials):
        try:
            service  = build('drive', 'v3', credentials=credentials)
        except google_exceptions.GoogleAuthError as e:
            raise GDriveError(f'error building credentials: {e}')
        return service
    
                
    @property
    def token(self):
        '''token file'''
        return self._token
        
    @token.setter
    def token(self, t_path):
        t_path = Path(t_path)        
        self._token = t_path

    @credential_wrapper
    def search(self, name=None, trashed=False, mimeType=None, fuzzy=True, 
               modifiedTime=None, parents=None, dopperator='>',
               fields = [], forcefields=False,
               corpora='user', orderBy='createdTime', driveId='',
               pageSize=constants_GDrive.PAGESIZE, complete=True,
               pageToken=''):
        '''search for objects in google drive by name

        Args:
            name(str): string to search for
            trashed(bool): search in trash when true
            mimeType(str): short mimeType (see MIMETYPES property)
            fuzzy(bool): true: `name contains "value"` false: `name = "value"`
            modifiedTime(str): yyyy-mm-dd string
            dopperator(str): >, < for use with modifiedTime
            parents(str): folder to search within
            fields(list of str): fields to return (see FILE_FIELDS property)
            forcefields(bool): true: use unknown fields, false: reject fields not in FILE_FIELDS
            corpora(str): locations to search (see CORPORA property)
            orderBy(str): order results by (see https://developers.google.com/drive/api/v3/reference/files/list)
            driveId(str): search this shared drive
            pageSize(int): number of results to return per page (default 300)
            complete(bool): true: exhaust all nextPageTokens

        Retruns dict of resutls
            '''
        
        @sleep_and_retry
        @limits(calls=constants_GDrive.CALL_LIMIT, period=constants_GDrive.CALL_PERIOD)
        def _list(pageToken=''):
            logger.debug(f'fettching page of {pageSize} results')
            try:
                results = self.service.files().list(q=q,
                                                    corpora=corpora,
                                                    includeItemsFromAllDrives=includeItemsFromAllDrives,
                                                    supportsAllDrives=supportsAllDrives,
                                                    fields=fields_string,
                                                    driveId=driveId,
                                                    pageSize=pageSize,
                                                    pageToken=pageToken
                                                    ).execute()
            except HttpError as e:
                raise GDriveError(f'error searching: {type(e)}: {e}')
                
            return results
        
        
        
        query_build = {
            'name': (name, f'name {"contains" if fuzzy else "="} "{name}"'),
            'trashed': (trashed, f'trashed={trashed}'),
            'mimeType': (mimeType, f'mimeType="{self.MIMETYPES[mimeType] if mimeType in self.MIMETYPES else ""}"'),
            'parents': (parents, f'"{parents}" in parents'),
            'modifiedTime': (modifiedTime, f'modifiedTime{dopperator}"{modifiedTime}"')
        }

        query = [v[1] for k, v in query_build.items() if v[0]]

        if len(fields) < 1:
            fields = self.FIELDS_DEFAULT
        fields = set(fields)

        known_fields = []
        for f in fields:
            if f not in self.FILE_FIELDS:
                if forcefields:
                    logger.warning(f'unknown return field: {f}')
                    known_fields.append(f)
                else:
                    raise GDriveError(f'unknown return field: {f}')
            else:
                known_fields.append(f)

        fields_string = f'nextPageToken, files({",".join(known_fields)})'


        if driveId:
            corpora = 'drive'

        if corpora not in self.CORPORA:
            raise GDriveError(f'unknown `corpora` value: {corpora}')
        else:
            includeItemsFromAllDrives = self.CORPORA[corpora]['params']['includeItemsFromAllDrives']
            supportsAllDrives = self.CORPORA[corpora]['params']['supportsAllDrives']

        q = ' and '.join(query)
        logger.debug(f'QUERY STRING: {q}')
        
        file_list = []
        search_result = _list(pageToken=pageToken)
                    
        token = search_result.get('nextPageToken', False)
        file_list.extend(search_result.get('files', []))
        
        while token and complete:
            logger.debug(f'processing additional pages of results')
            search_result = _list(token)
            token = search_result.get('nextPageToken', False)
            file_list.extend(search_result.get('files', []))


        logger.debug(f'{len(file_list)} total matches returned')
        


        return file_list
        
    def ls(self, *args, **kwargs):
        '''print lis of files in a google drive using any of the search properties'''

        result = self.search(*args, **kwargs)
        for file in result.get('files', []):
            print(('name: {f[name]}, ID:{f[id]}, mimeType:{f[mimeType]}'.format(f=file)))

        return result
        

In [169]:
import constants

In [234]:
sec = '../secrets/client_secret_910311278281-bh8qk3kmgk0veri3v8en260e76ipafpj.apps.googleusercontent.com.json'
d = GDrive(secrets=sec, scopes=constants.SCOPES)

INFO:googleapiclient.discovery_cache:file_cache is only supported with oauth2client<4.0.0


In [241]:
d.credentials

<google.oauth2.credentials.Credentials at 0x10a0e25b0>

In [229]:
r = d.search(parents='0B9WTleJ1MzaYT2pieWNXYkZtZm8', fields=['parents', 'id', 'name', 'mimeType'], pageSize=300)

DEBUG:__main__:QUERY STRING: "0B9WTleJ1MzaYT2pieWNXYkZtZm8" in parents
DEBUG:__main__:fettching page of 300 results
DEBUG:googleapiclient.discovery:URL being requested: GET https://www.googleapis.com/drive/v3/files?q=%220B9WTleJ1MzaYT2pieWNXYkZtZm8%22+in+parents&corpora=user&includeItemsFromAllDrives=false&supportsAllDrives=false&fields=nextPageToken%2C+files%28id%2Cname%2CmimeType%2Cparents%29&driveId=&pageSize=300&pageToken=&alt=json
DEBUG:__main__:processing additional pages of results
DEBUG:__main__:fettching page of 300 results
DEBUG:googleapiclient.discovery:URL being requested: GET https://www.googleapis.com/drive/v3/files?q=%220B9WTleJ1MzaYT2pieWNXYkZtZm8%22+in+parents&corpora=user&includeItemsFromAllDrives=false&supportsAllDrives=false&fields=nextPageToken%2C+files%28id%2Cname%2CmimeType%2Cparents%29&driveId=&pageSize=300&pageToken=~%21%21~AI9FV7TO5Q1WfsvAwaZUi2-jfk3dmm-9zYEYplTb9gGPKQNXlv-KS9aQ-WdPeMH5MjI3QIpIUWEcZQP5uyu3YHl9XH_02eDhl5ylfa67ugJJny02jbxxfV0qSI-uyCXvWYf1rVw3gm8

In [230]:
r

[{'id': '1JyBd_Z1dYPMJ2HPu8WyscLNEL43Wes6v',
  'name': 'SEARCH Testing',
  'mimeType': 'application/vnd.google-apps.folder',
  'parents': ['0B9WTleJ1MzaYT2pieWNXYkZtZm8']},
 {'id': '1CC-H_cGpnpkQ6YymxSH8QRGzDeFMR2-aoRLfXyCjrS4',
  'name': 'Untitled spreadsheet',
  'mimeType': 'application/vnd.google-apps.spreadsheet',
  'parents': ['0B9WTleJ1MzaYT2pieWNXYkZtZm8']},
 {'id': '1c0xe_L2csR10NOfny6LnmDQYkpzohaWqHAZnekVTBfE',
  'name': 'FCD Google Meet Links',
  'mimeType': 'application/vnd.google-apps.spreadsheet',
  'parents': ['0B9WTleJ1MzaYT2pieWNXYkZtZm8']},
 {'id': '1U6NuGcEvrQSdTycR5sZGlaGOzZatPauH',
  'name': 'Never look at this again',
  'mimeType': 'application/vnd.google-apps.folder',
  'parents': ['0B9WTleJ1MzaYT2pieWNXYkZtZm8']},
 {'id': '1UMVMWcETclF9pKPqnsqslHmllXVEteNW',
  'name': 'Untitled Diagram.drawio',
  'mimeType': 'application/vnd.jgraph.mxfile',
  'parents': ['0B9WTleJ1MzaYT2pieWNXYkZtZm8']},
 {'id': '1GyrpP6HqfqpMKwo74GpPrOE0fpJdAB3oCLgsSBoz19k',
  'name': '2021 22 S

In [160]:
def set_credentials(secret, token, scopes):
    token = Path(token).expanduser()
    secret = Path(secret).expanduser()
    creds = None
    
    if token.exists():
        creds = Credentials.from_authorized_user_file(token, scopes)
        
    
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())    
        else:
            flow = InstalledAppFlow.from_client_secrets_file(secret)
            creds = flow.run_local_server(port=0)
        # Save the credentials for the next run
        try:
            with open('token.json', 'w') as token:
                token.write(creds.to_json())
        except OSError as e:
            raise GDRiveError(f'error writing token file: {token} - {e}')
            
    
    return creds
            

In [149]:
logging.basicConfig(level=logging.DEBUG)
logger.debug('foo')

DEBUG:__main__:foo


In [None]:
class DC():
    '''dummy class for developing class functions'''
    pass
self = DC()
# self.mimetypes = constants_GDrive.MIMETYPES

In [21]:
def recurseFolders(myDrive, parents="", fieldNames='parents, id, name', fileList=[], skipped=[], depth=0):
    if depth == 0:
        fileList = []
        skipped = []
    logger.info('depth: ', depth)
    try:
        result = myDrive.search(parents=parents, fields=fieldNames)
    except GDriveError as e:
        logger.error(e)
        skipped.append(parents)
    for file in result['files']:
        if file['mimeType'] == 'application/vnd.google-apps.folder':
            returnVals = recurseFolders(myDrive=myDrive, parents=file['id'], fieldNames=fieldNames, fileList=fileList, 
                                        skipped=skipped, depth=depth+1)
            fileList + returnVals[0]
            skipped + returnVals[1]
        else:
            fileList.append(file)
    
    return(fileList, skipped)

In [29]:
rest = d.search(parents="1JyBd_Z1dYPMJ2HPu8WyscLNEL43Wes6v", fields=['parents', 'id', 'name', 'mimeType', 'owners', 'modifiedTime'])

DEBUG:__main__:QUERY STRING: "1JyBd_Z1dYPMJ2HPu8WyscLNEL43Wes6v" in parents
DEBUG:__main__:fettching next page of 300
DEBUG:__main__:5 total matches returned


In [43]:
d.FILE_FIELDS

{'Methods',
 'appProperties',
 'capabilities',
 'contentHints',
 'contentRestrictions',
 'copyCreates',
 'copyRequiresWriterPermission',
 'createdTime',
 'description',
 'driveId',
 'explicitlyTrashed',
 'exportLinks',
 'fileExtension',
 'folderColorRgb',
 'fullFileExtension',
 'hasAugmentedPermissions',
 'hasThumbnail',
 'headRevisionId',
 'iconLink',
 'id',
 'imageMediaMetadata',
 'isAppAuthorized',
 'kind',
 'lastModifyingUser',
 'linkShareMetadata',
 'md5Checksum',
 'mimeType',
 'modifiedByMe',
 'modifiedByMeTime',
 'modifiedTime',
 'name',
 'originalFilename',
 'ownedByMe',
 'owners',
 'parents',
 'permissionIds',
 'permissions',
 'properties',
 'quotaBytesUsed',
 'resourceKey',
 'shared',
 'sharedWithMeTime',
 'sharingUser',
 'shortcutDetails',
 'size',
 'spaces',
 'starred',
 'teamDriveId',
 'thumbnailLink',
 'thumbnailVersion',
 'trashed',
 'trashedTime',
 'trashingUser',
 'version',
 'videoMediaMetadata',
 'viewedByMe',
 'viewedByMeTime',
 'viewersCanCopyContent',
 'webContent

In [55]:
def recurse_folders(drive, parents, 
                    fields=['parents', 'id', 'name', 'mimeType', 'owners', 'modifiedTime', 'webViewLink', 'parents'], 
                    file_list=[], skipped=[], depth=0):
    '''recursively find all files in a google drive folder'''
    if depth == 0:
        file_list = []
        skipped = []
    logger.info(f'recursion depth: {depth}')
    try:
        result = drive.search(parents=parents, fields=fields)
    except GDriveError as e:
        logger.error(f'error accessing google drive: {e}')
        skipped.append(parents)
        result = {}
        
    for f in result:
        if drive.MIMETYPES['folder'] == f.get('mimeType'):
            return_files, return_skipped = recurse_folders(drive=drive, parents=f['id'], 
                                         fields=fields, 
                                         file_list=file_list,
                                         skipped=skipped,
                                         depth=depth+1)
            file_list + return_files
            skipped + return_files
        else:
            file_list.append(f)
    return (file_list, skipped)
            

In [56]:
r = recurse_folders(d, parents="1JyBd_Z1dYPMJ2HPu8WyscLNEL43Wes6v")

INFO:__main__:recursion depth: 0
DEBUG:__main__:QUERY STRING: "1JyBd_Z1dYPMJ2HPu8WyscLNEL43Wes6v" in parents
DEBUG:__main__:fettching next page of 300
DEBUG:__main__:5 total matches returned
INFO:__main__:recursion depth: 1
DEBUG:__main__:QUERY STRING: "1qqN00TwJif7Crc7HZHqk5OHY9_ZNayBT" in parents
DEBUG:__main__:fettching next page of 300
DEBUG:__main__:5 total matches returned
INFO:__main__:recursion depth: 2
DEBUG:__main__:QUERY STRING: "1RAG_1yFBVuijLnncO97dtcf3s45D-0x2" in parents
DEBUG:__main__:fettching next page of 300
DEBUG:__main__:2 total matches returned
INFO:__main__:recursion depth: 2
DEBUG:__main__:QUERY STRING: "1MZFT4lRP1EfRuZVd-A_2SH_qq7_9MfZp" in parents
DEBUG:__main__:fettching next page of 300
DEBUG:__main__:1 total matches returned
INFO:__main__:recursion depth: 1
DEBUG:__main__:QUERY STRING: "1ynPHhMLM53umRZfYQEi0JoPtEW2_R9bg" in parents
DEBUG:__main__:fettching next page of 300
DEBUG:__main__:2 total matches returned


In [53]:
r[0]

[{'id': '1I8UP8i4b7UtJMCHG57T0karZitiSz_jDX5-UmmHdreo',
  'name': 'Welcome  24 September PD Day',
  'mimeType': 'application/vnd.google-apps.presentation',
  'parents': ['1RAG_1yFBVuijLnncO97dtcf3s45D-0x2'],
  'webViewLink': 'https://docs.google.com/presentation/d/1I8UP8i4b7UtJMCHG57T0karZitiSz_jDX5-UmmHdreo/edit?usp=drivesdk',
  'modifiedTime': '2021-09-23T14:42:20.447Z',
  'owners': [{'kind': 'drive#user',
    'displayName': 'Aaron Ciuffo',
    'photoLink': 'https://lh3.googleusercontent.com/a-/AOh14GgR6LsmWyc0vAzoGXEa38AseFtJ8jcmBsbZ96ct=s64',
    'me': True,
    'permissionId': '14867985035974787748',
    'emailAddress': 'aciuffo@ash.nl'}]},
 {'id': '1LrbO5pS0IFqcOaSghkVsllzSWzSSNLQ8UhGrPyXWz6Y',
  'name': '2021 09 23 FCD Planning',
  'mimeType': 'application/vnd.google-apps.document',
  'parents': ['1RAG_1yFBVuijLnncO97dtcf3s45D-0x2'],
  'webViewLink': 'https://docs.google.com/document/d/1LrbO5pS0IFqcOaSghkVsllzSWzSSNLQ8UhGrPyXWz6Y/edit?usp=drivesdk',
  'modifiedTime': '2021-09-23

In [58]:
for i in r[0]:
    print(f'NAME: {i["name"]}\nFOLDER: https://drive.google.com/drive/folders/{i["parents"][0]}\nURL: {i["webViewLink"]}\n\n')


NAME: Welcome  24 September PD Day
FOLDER: https://drive.google.com/drive/folders/1RAG_1yFBVuijLnncO97dtcf3s45D-0x2
URL: https://docs.google.com/presentation/d/1I8UP8i4b7UtJMCHG57T0karZitiSz_jDX5-UmmHdreo/edit?usp=drivesdk


NAME: 2021 09 23 FCD Planning
FOLDER: https://drive.google.com/drive/folders/1RAG_1yFBVuijLnncO97dtcf3s45D-0x2
URL: https://docs.google.com/document/d/1LrbO5pS0IFqcOaSghkVsllzSWzSSNLQ8UhGrPyXWz6Y/edit?usp=drivesdk


NAME: HS PE HR Monitor Exports 2021 (2021.12.07)
FOLDER: https://drive.google.com/drive/folders/1MZFT4lRP1EfRuZVd-A_2SH_qq7_9MfZp
URL: https://docs.google.com/spreadsheets/d/1W5Qn5j4XmZUdtpNtMp0Ae2Isa1i920YtNi55XRN6gvo/edit?usp=drivesdk


NAME: Untitled form
FOLDER: https://drive.google.com/drive/folders/1qqN00TwJif7Crc7HZHqk5OHY9_ZNayBT
URL: https://docs.google.com/forms/d/1F_kVQ34DNdNLwohr5UbZBJHVYLXYdReslFY5eSaC3go/edit?usp=drivesdk


NAME: Untitled form
FOLDER: https://drive.google.com/drive/folders/1qqN00TwJif7Crc7HZHqk5OHY9_ZNayBT
URL: https://doc

In [22]:
recurseFolders(d, parents="1JyBd_Z1dYPMJ2HPu8WyscLNEL43Wes6v")

--- Logging error ---
Traceback (most recent call last):
  File "/Users/aciuffo/.pyenv/versions/3.10.1/lib/python3.10/logging/__init__.py", line 1098, in emit
    msg = self.format(record)
  File "/Users/aciuffo/.pyenv/versions/3.10.1/lib/python3.10/logging/__init__.py", line 942, in format
    return fmt.format(record)
  File "/Users/aciuffo/.pyenv/versions/3.10.1/lib/python3.10/logging/__init__.py", line 678, in format
    record.message = record.getMessage()
  File "/Users/aciuffo/.pyenv/versions/3.10.1/lib/python3.10/logging/__init__.py", line 368, in getMessage
    msg = msg % self.args
TypeError: not all arguments converted during string formatting
Call stack:
  File "/Users/aciuffo/.pyenv/versions/3.10.1/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/Users/aciuffo/.pyenv/versions/3.10.1/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/Users/aciuffo/.local/share/virtualenvs/gdr

UnboundLocalError: local variable 'result' referenced before assignment

In [203]:
def check_auth(f):
    def wrapper(*args):
        print('args: ', args)
        print(dir(args))
        return f(*args)
    return wrapper

class GD(object):
    def __init__(self):
        self.authorized = False
    
    @check_auth
    def get(self):
        print( 'get')


In [220]:
from functools import wraps

def wrapper(method):
    @wraps(method)
    def _impl(self, *method_args, **method_kwargs):
        if self.auth == False:
            print('wrapper: not authorized!')
            self.auth = 77
        method_output = method(self, *method_args, **method_kwargs)
        return method_output
    return _impl

class Foo:
    def __init__(self):
        self.auth = False
        self.list = []
        
    @wrapper
    def bar(self, word):
        self.list.append(word)
        return self.list

f = Foo()
f.bar("kitty")


wrapper: not authorized!


['kitty']

In [225]:
f.auth = False

In [226]:
f.bar('floof')

wrapper: not authorized!


['kitty', 'snek', 'car', 'floof']

In [204]:
GD().get()

args:  (<__main__.GD object at 0x10ad3f4c0>,)
['__add__', '__class__', '__class_getitem__', '__contains__', '__delattr__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__getnewargs__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__len__', '__lt__', '__mul__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__rmul__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', 'count', 'index']
get


In [176]:
f = Foo()

In [177]:
f.stuff()

0


In [4]:
logging.basicConfig(level=logging.DEBUG)
logging.root.setLevel('INFO')
logger = logging.getLogger(__name__)
logger.setLevel('INFO')

In [None]:
!jupyter-nbconvert --to python --template python_clean GDrive.ipynb