Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
583 lines (566 sloc) 39.3 KB
#DAcollector collects information of some DeviantArt user and puts them on deviantartdb.sqlite database (Gallery/Favourites folder and all deviantion art info)
#It also creates a 2x text file with information of DeviantArt user Gallery and Favourites.
#If this program is run, it deletes all previous data on SQL database.
from requests_oauthlib import OAuth2Session
from oauthlib.oauth2 import BackendApplicationClient, TokenExpiredError
import requests
import time
import json
import sqlite3
import hidden
import re
from bs4 import BeautifulSoup
from datetime import timedelta
def token_saver(token):
"""Saves the token json/dict format inside deviantartdb.sqlite on Table: deviantart_session.
token_saver argument parameters:
- token: The argument should be a variable in json/dict format:
{"expires_in": 3600, "status": "success", "access_token": "Alph4num3r1ct0k3nv4lu3", "token_type": "Bearer"}"""
conn = sqlite3.connect('deviantartdb.sqlite')
cur = conn.cursor()
cur.executescript('''DROP TABLE IF EXISTS deviantart_session;
CREATE TABLE deviantart_session( `id` INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,`token` TEXT, 'expires_at' varchar(64) )''')
token_dump=json.dumps(token)
token=json.loads(token_dump)
expires_at=time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(token['expires_at']))
cur.execute('''INSERT INTO deviantart_session (token, expires_at) VALUES ( ?, ? ) ''', (token_dump, expires_at) )
conn.commit()
cur.close()
def get_token():
"""Returns the token inside deviantartdb.sqlite. If no token exists on database or it is expired, fetch a new one from DeviantArt"""
token_url='https://www.deviantart.com/oauth2/token'
conn = sqlite3.connect('deviantartdb.sqlite')
cur = conn.cursor()
cur.execute('''SELECT token FROM deviantart_session''')
token=None
for row in cur: #if the table on token FROM deviantart_session is empty, this for loop step is just skipped
try:
token = json.loads(row[0])
print 'Adquiring token from deviantartdb.sqlite database...'
except:
print 'Adquiring token from deviantartdb.sqlite FAIL!'"\n"
if token==None:
print 'No token inside deviantartdb.sqlite'"\n"'Adquiring token from Deviantart...'
token = deviantart_session.fetch_token(token_url=token_url, client_id=client_id, client_secret=client_secret)
else:
timenow=time.time()
if timenow>token['expires_at']:
print 'The token on database is expired. Adquiring new access token from Deviantart...'
token = deviantart_session.fetch_token(token_url=token_url, client_id=client_id, client_secret=client_secret)
cur.close()
print 'Token:\n', json.dumps(token, indent=3)
print 'Token expires at:', time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(float(json.dumps(token['expires_at'])))),"\n"
token_saver(token)
return token
def createDAsession():
'''Create a DeviantArt session using The Client Credentials access by Backend Application Flow from oauthlib.oauth2 and OAuth2Session from Requests-OAuthlib '''
global client_id, client_secret, deviantart_client, deviantart_session, token
deviantart_session = OAuth2Session(client = deviantart_client)
token=get_token()
deviantart_session = OAuth2Session(client_id, token=token)
def test_token():
"""Placebo Call to test Access Protected Resource and check if token is able to authenticate itself when making requests to deviantArt endpoints"""
test=deviantart_session.get("https://www.deviantart.com/api/v1/oauth2/placebo")
print"Test Access to Protected Resource using Token..."'\n'"Accessing... https://www.deviantart.com/api/v1/oauth2/placebo"
print test.content
return json.loads(test.content)
def find_user():
"""Requests a user input for a URL address and Returns a DeviantArt username/usersite and saves the user data to SQL database.
If a non-URL is given or a site without a DeviantArt user, returns None.
If URL has a DeviantArt user, returns a tuple of (username, usersite). Inputs should be like: username.deviantart.com/..."""
go_loop=True
while go_loop:
url = raw_input('Enter a URL address which contains a DeviantArt User (http://username.deviantart.com/...)\n'
' ...OR one URL of his/her Deviantion art to commence extract information\n'
'(or press Enter to exit)\nURL... : ')
if url=='':
username=None
go_loop=False
continue
elif re.search('.*www.deviantart.com.*', url): #Exclude search for user if given www.deviantart.com main/.* site
print 'No single user can be selected in:', url, '\nTry again...\n'
else:
if re.search('.*(deviantart.com).*', url):#Exclude search for user if given non deviantart site
#Exclude search for user if given Non-URL address using try/except
try:
if not (url.startswith('http://') or url.startswith('https://')):
url='http://'+url
print url
r=requests.get(url)
if r.status_code==requests.codes.ok:#<Response [200]> = url address works. Exclude search for user if given fake deviantart site
print 'Connecting... Response', r.status_code
soup=BeautifulSoup(r.content, "html.parser")
usernames = soup.find_all('a', {'class': re.compile("u .* username")})#{'class': "u regular username"} or {'class': "u premium username"}
for i, name in enumerate(usernames):
if i==0: #The 1st loop (i==0) has the username owner of the page
print 'DeviantArt user detected... user name: %s'"\n"'DeviantArt user site: %s' %(name.text, name.get("href"))
username=name.text
usersite=name.get("href")
go_loop=False
break
else:
print 'Connecting... Response', r.status_code, '\nRequest Fail in:', url, '\nTry again...\n'
username=None
except:
print'Bad Request on:', url, '\n Try again...\n'
username=None
else:
print 'Not a DeviantArt site on:', url, '\nTry again...\n'
username=None
if username!=None:
user_info=(username,usersite)
else:
user_info=(None,None)
print'Aborted by user...'
return user_info
def get_folder(folder):
'''Extract User's Gallery or Favourites information and save to SQL database.
Returns a dictionary with the folder information {key (numeral order of folder): value (foldername, folderid, Qty of files)}
The get_folder() argument parameters must be either folder="Gallery" or folder="Favourites"'''
global go_collect, deviantart_session, token
cur.execute('''SELECT id FROM user''')
user_id=cur.fetchone()[0]
offset=0
go_loop=True
if folder=='Gallery' and go_collect==1:
print "\nCommence %s Extraction...\nAccessing... https://www.deviantart.com/api/v1/oauth2/gallery/folders?"%(folder)
gallery={}#key(numeral order of folder): value (foldername, folderid, files)
if folder=='Favourites' and go_collect==1:
print "\nCommence %s Extraction...\nAccessing... https://www.deviantart.com/api/v1/oauth2/collections/folders?"%(folder)
favourites={}#key(numeral order of folder): value (foldername, folderid, files)
#Commence extraction of folder information: How many folders, name, folder ID, Qty of files,
while go_collect==1 and go_loop:
#This loop extracts all the folder information available for gallery/favourites
if folder=='Gallery':
url = "https://www.deviantart.com/api/v1/oauth2/gallery/folders?username=%s&calculate_size=1&offset=%s&limit=50"%(str(username),str(offset))
if folder=='Favourites':
url = "https://www.deviantart.com/api/v1/oauth2/collections/folders?username=%s&calculate_size=1&ext_preload=false&offset=%s&limit=50&mature_content=true"%(str(username),str(offset))
try:
deviantfolder=deviantart_session.get(url)
except TokenExpiredError:
#If url connection fails, tries to reconnect
conn.commit()#to save any pending database request and free access to database to save new token
print 'Request on DeviantArt API fail...\nCreating New DeviantArt session using The Client Credentials access...\n'
createDAsession()
deviantfolder=deviantart_session.get(url)
if deviantfolder.status_code!=requests.codes.ok:
#if the page status is not OK, it tries one more time after 1s
time.sleep(1)
deviantfolder=deviantart_session.get(url)
#print json.dumps(deviantfolder.json(), indent=2)
for i, info in enumerate(deviantfolder.json()["results"]):
folderid=info["folderid"]
foldername=info["name"]
files=info["size"]
if folder=='Gallery':
cur.execute('''INSERT OR IGNORE INTO gallery (user_id, folderid, name, files) VALUES ( ?, ?, ?, ? )''', ( user_id, folderid, foldername, files ) )
#Gallery table connections --> user.id=gallery.user_id
gallery[i]=(foldername, folderid, files)
if folder=='Favourites':
cur.execute('''INSERT OR IGNORE INTO favourites (user_id, folderid, name, files) VALUES ( ?, ?, ?, ? )''', ( user_id, folderid, foldername, files ) )
#Favourites table connections --> user.id=favourites.user_id
favourites[i]=(foldername, folderid, files)
if deviantfolder.json()["has_more"]:#if it has more pages: True, otherwise:False
offset=deviantfolder.json()["next_offset"]#New offset of index list of folders to get more pages of data
else:
go_loop=deviantfolder.json()["has_more"]
conn.commit()
if folder=='Gallery':
folder_info=gallery
cur.execute('''SELECT COUNT(files), SUM(files) FROM gallery''')
if folder=='Favourites':
folder_info=favourites
cur.execute('''SELECT COUNT(files), SUM(files) FROM favourites''')
size, files = cur.fetchone()
print "...Extracting folder information... %s %s: %s folders with %s files inside\n"%(username,folder,str(size),str(files))
return folder_info
def get_deviation(folder):
'''Extract each Deviation Art information from User's Gallery or Favourites and save it to SQL database.
Returns a dictionary with a list all the Deviation Art. All the details for each Deviation Art are nested as tuples.
Each entry of this dictionary represents a folder (this dictionary[key]=gallery[key] or favourites[key]), so we are able to find that folder details (foldername, folderid, Qty of files).
gallerydeviation or favouritesdeviation = {key (numeral order of folder): value [List of deviation as tuples]}
The Deviation Art tuple structure is: (No., title, author, category, published_time, filesize, webpage)
The get_deviation() argument parameters must be either folder="Gallery" or folder="Favourites"'''
global go_collect, deviantart_session, token, gallery, favourites
if go_collect==1:
print 'Commence Deviantion Art Extraction from %s ...'%(folder)
if folder=='Gallery':
print 'Accessing... https://www.deviantart.com/api/v1/oauth2/gallery/{folderid}?'
gallerydeviation={}#key (folder number order): value (deviationlist for that folder) / gallerydeviation[key]=gallery[key], to find the folder name
if folder=='Favourites':
print 'Accessing... https://www.deviantart.com/api/v1/oauth2/collections/{folderid}?'
favouritesdeviation={}#key (folder number order): value (deviationlist for that folder) / key from favouritesdeviation=key from favourites, to find the folder name
print 'Accessing... https://www.deviantart.com/api/v1/oauth2/deviation/metadata?'
print 'Accessing... http://www.deviantart.com/developers/console/comments/comments_deviation/{deviationid}?'
#Commence extraction of each deviation art information from gallery/favourites folder
if folder=='Gallery':
usefolder=gallery
if folder=='Favourites':
usefolder=favourites
#This loop selects which folder of Gallery/Favourites to use
for i in xrange(len(usefolder)):
deviationlist=[]#list of tuples with 8 entries (No., title, author, category, published_time, filesize, deviationid, webpage)
offset=0
go_loop=True
#
###Check on db save state if process has stopped on some point before it starts data gathering from zero
#1st Pick up variables from saved state (if any exists) and then, checks if folder gathering was completed up to what "i" loop state.
if folder=='Gallery':
try:
cur.execute('''SELECT i_state, j_state, offset, go_loop, number FROM Gallery_loop_state''')
i_state, j_state, offset_state, go_loop_state, number = cur.fetchone()
except:
i_state, j_state, number = -1, -1, -1 #number refers to the numeral order of the file. On each print is generated by (j+1+offset)
if folder=='Favourites':
try:
cur.execute('''SELECT i_state, j_state, offset, go_loop, number FROM Favourites_loop_state''')
i_state, j_state, offset_state, go_loop_state, number = cur.fetchone()
except:
i_state, j_state, number = -1, -1, -1
if i<i_state:
print "\n"'...Extracting information from %s, folder %s of %s -- Name: %s -- %s files...\n'%(folder, (i+1), len(usefolder), usefolder[i][0], usefolder[i][2])
print "...Reading on Database... all data from folder: %s Saved"%usefolder[i][0]
continue
###End of 1st step of state check
#
###Check on db save state if process has stopped on some point before it starts data gathering from zero
#2nd check is what offset page the url was using for the loop when gathering the deviation art
if i==i_state and offset<offset_state:
offset = offset_state
if offset!=0:
print "\n"'...Extracting information from %s, folder %s of %s -- Name: %s -- %s files...\n'%(folder, (i+1), len(usefolder), usefolder[i][0], usefolder[i][2])
print 'No. \t Title; \t Author; \t Category; \t Published time; \t Filesize (bytes)'
###End of 2nd step of state check
#
while go_collect==1 and go_loop:
#This loop commence extraction of each deviation art information by first accessing that folder information from Gallery/Favourites
if offset==0:#to avoid showing this message to show again when we batch another 24 pages of data for the same folder (offset=offset+24)
print "\n"'...Extracting information from %s, folder %s of %s -- Name: %s -- %s files...\n'%(folder, (i+1), len(usefolder), usefolder[i][0], usefolder[i][2])
print 'No. \t Title; \t Author; \t Category; \t Published time; \t Filesize (bytes)'
if folder=='Gallery':
folderid = gallery[i][1]
url = "https://www.deviantart.com/api/v1/oauth2/gallery/%s?username=%s&mode=newest&offset=%s&limit=24&mature_content=true"%( str(folderid),str(username),str(offset) )
if folder=='Favourites':
folderid = favourites[i][1]
url = "https://www.deviantart.com/api/v1/oauth2/collections/%s?username=%s&offset=%s&limit=24&mature_content=true"%( str(folderid),str(username),str(offset) )
try:
deviantfolder=deviantart_session.get(url)#connects to DeviantArt API to extract info
except TokenExpiredError:
#If url connection fails, tries to reconnect
conn.commit()#to save any pending database request and free access to database to save new token
print 'Request on DeviantArt API fail...\nCreating New DeviantArt session using The Client Credentials access...\n'
createDAsession()
deviantfolder=deviantart_session.get(url)
if deviantfolder.status_code!=requests.codes.ok:
#if the page status is not OK, it tries one more time after 1s
time.sleep(1)
deviantfolder=deviantart_session.get(url)
#print json.dumps(deviantfolder.json(), indent=2)
#This loop extracts all deviation art for the selected folder from Gallery/Favourites
for j, info in enumerate(deviantfolder.json()["results"]):
#
###Check on db save state if process has stopped on some point before it starts data gathering from zero
#3rd step of check is where deviation art gathering was completed up to what "j" loop state to the number of the deviation (j+1+offset)
#print 'j = %s, j_state = %s, i = %s, i_state = %s, j+1+offset = %s, number = %s'%(j,j_state,i,i_state,j+1+offset,number)
if j<j_state and i==i_state and j+1+offset<number: continue
if j==j_state and i==i_state and j+1+offset==number:
print '...Recovering last saved state...'
if go_loop_state==1: go_loop=True
else: go_loop=False
if folder=='Gallery':
cur.execute('''SELECT deviationid FROM Gallery_loop_state''')
deviationid= cur.fetchone()[0]
cur.execute('''SELECT title, author, category, published_time, content FROM gallery_deviantion WHERE deviationid = ?''', (deviationid,) )
title, author, category, published_time, content = cur.fetchone()
i_state, j_state, number = -1, -1, -1 #after state is recovered remove it from loop
if folder=='Favourites':
cur.execute('''SELECT deviationid FROM Favourites_loop_state''')
deviationid = cur.fetchone()[0]
cur.execute('''SELECT title, author, category, published_time, content FROM favourites_deviantion WHERE deviationid = ?''', (deviationid,) )
title, author, category, published_time, content = cur.fetchone()
i_state, j_state, number = -1, -1, -1 #after state is recovered remove it from loop
print j+1+offset,'/', usefolder[i][2], '\t', title,';', author,';', category,';', '\t', published_time,';', json.loads(content)['filesize'],'bytes' #tab character ('\t') makes the output align nicely.
continue #finally goes to next j
###End of 3rd and final step of state check and continue gathering process on DeviantArt API
#
if folder=='Gallery':
cur.execute('''SELECT id FROM gallery WHERE folderid = ?''', (folderid,) )
from_folder = cur.fetchone()[0] #Gallery table connections --> gallery.id=gallery_deviantion.from_folder
if folder=='Favourites':
cur.execute('''SELECT id FROM favourites WHERE folderid = ?''', (folderid,) )
from_folder = cur.fetchone()[0] #Favourites table connections --> favourites.id=favourites_deviantion.from_folder
deviationid=info["deviationid"]
webpage=info["url"]
title=info["title"]
author=info["author"]["username"]
category=info["category"]
published_time=time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(float(info["published_time"])))
try:
content=info["content"] #contains the download url location, along: "height","width","transparency" and "filesize" information
except:
#if there is no content, probably is a journal or another thing without a picture art.
print 'Unable to retrieve art content. No content available. \nIgnoring... Title:',title,'- Deviation ID:',deviationid,'- Author:',author,'- Category:',category
continue
is_downloadable=info["is_downloadable"] #True:1 or False:0
#print json.dumps(deviantfolder.json(), indent=2)
url='https://www.deviantart.com/api/v1/oauth2/deviation/metadata?deviationids%s=%s&ext_submission=false&ext_camera=false&ext_stats=false&ext_collection=false&mature_content=true'%('%5B%5D',deviationid)
try:
metadata_page=deviantart_session.get(url)
except TokenExpiredError :
#If url connection fails, tries to reconnect
conn.commit()#to save any pending database request and free access to database to save new token
print 'Request on DeviantArt API fail...\nCreating New DeviantArt session using The Client Credentials access...\n'
createDAsession()
metadata_page=deviantart_session.get(url)
if metadata_page.status_code==requests.codes.ok:
metadata=metadata_page.json()["metadata"]
else:
#if the page status is not OK, waits 1s and it tries one more time before give-up
time.sleep(1)
metadata_page=deviantart_session.get(url)
if metadata_page.status_code==requests.codes.ok:
metadata=metadata_page.json()["metadata"]
else:
metadata=metadata_page.json()
url='https://www.deviantart.com/api/v1/oauth2/comments/deviation/%s?maxdepth=5&limit=50&mature_content=true'%(deviationid)
try:
comments_info=deviantart_session.get(url)
except TokenExpiredError :
#If url connection fails, tries to reconnect
conn.commit()#to save any pending database request and free access to database to save new token
print 'Request on DeviantArt API fail...\nCreating New DeviantArt session using The Client Credentials access...\n'
createDAsession()
comments_info=deviantart_session.get(url)
if comments_info.status_code==requests.codes.ok:
comments=comments_info.json()["thread"]
for x in range(len(comments)):
#extract results from comments["thread"] but delete unnecessary key/values "commentid","parentid","hidden"
comments[x].pop("commentid",'')
comments[x].pop("parentid",'')
comments[x].pop("hidden",'')
else:
#if the page status is not OK, it gives-up
comments=comments_info.json()
#Saving to database the deviation data
if folder=='Gallery':
cur.execute('''INSERT OR IGNORE INTO gallery_deviantion
(from_folder, deviationid, webpage, title, author, category, published_time, content, is_downloadable, comments, metadata)
VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )''',
(from_folder, deviationid, webpage, title, author, category, published_time, json.dumps(content), is_downloadable, json.dumps(comments), json.dumps(metadata) ) )
print j+1+offset,'/', usefolder[i][2], '\t', title,';', author,';', category,';', '\t', published_time,';', content['filesize'],'bytes' #tab character ('\t') makes the output align nicely.
#deviationlist list of tuples with 8 entries (No., title, author, category, published_time, filesize, deviationid, webpage)
deviationlist.append((j+1+offset, title, author, category, published_time, content['filesize'], deviationid, webpage))
if folder=='Favourites':
cur.execute('''INSERT OR IGNORE INTO favourites_deviantion
(from_folder, deviationid, webpage, title, author, category, published_time, content, is_downloadable, comments, metadata)
VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? )''',
(from_folder, deviationid, webpage, title, author, category, published_time, json.dumps(content), is_downloadable, json.dumps(comments), json.dumps(metadata) ) )
print j+1+offset,'/', usefolder[i][2], '\t', title,';', author,';', category,';', '\t', published_time,';', content['filesize'],'bytes' #tab character ('\t') makes the output align nicely.
deviationlist.append( (j+1+offset, title, author, category, published_time, content['filesize'], deviationid, webpage) )
#Only commit and save state on database every 10th record
if ( (j+1+offset) % 10)== 0:
###save state of work in case connection fails so we don't need to restart from zero when running DAcollector
print 'Saving Gathering state on Deviation No. %s, Folder No. %s ...'%(j+1+offset,i+1)
go_loop=deviantfolder.json()["has_more"]
if folder=='Gallery':
#SAVE loop state
cur.executescript('''DELETE FROM Gallery_loop_state''')
cur.execute('''INSERT OR REPLACE INTO Gallery_loop_state (deviationid, i_state, j_state, offset, go_loop, number)
VALUES ( ?, ?, ?, ?, ?, ? )''', (deviationid, i, j, offset, go_loop, j+1+offset ) )
if folder=='Favourites':
#SAVE loop state
cur.execute('''DELETE FROM Favourites_loop_state''')
cur.execute('''INSERT OR REPLACE INTO Favourites_loop_state (deviationid, i_state, j_state, offset, go_loop, number)
VALUES ( ?, ?, ?, ?, ?, ? )''', (deviationid, i, j, offset, go_loop, j+1+offset ) )
###save state of work end
conn.commit()
if folder=='Gallery':
gallerydeviation[i]=deviationlist
if folder=='Favourites':
favouritesdeviation[i]=deviationlist
if deviantfolder.json()["has_more"]:#if it has more pages: True, otherwise: False
offset=deviantfolder.json()["next_offset"] #New offset of index list of deviations to get more 24 pages of data
else:
go_loop=deviantfolder.json()["has_more"] #if no more pages, end while loop: go_loop=False
conn.commit()#Commit after the last file from folder is input
if folder=='Gallery':
deviation_info=gallerydeviation
if folder=='Favourites':
deviation_info=favouritesdeviation
conn.commit()
return deviation_info
def built_userDAtxt(folder):
"""Creates a text file containing user's Gallery/Favourites data with summary of folder/deviation structure using SQL database information.
The argument folder must be either the string: 'Gallery' or 'Favourites' to built the respectively text file"""
global username
if folder=='Gallery':
cur.execute('''SELECT id, name, folderid, files FROM gallery ORDER BY id''')
galleryfolder=[(info[0],info[1],info[2], info[3]) for info in cur]#list of tuples (id, foldername, folderid, files)
cur.execute('''SELECT COUNT(files), SUM(files) FROM gallery''')
size,b4files = cur.fetchone()#how many folders and Qty of files before non-duplication
cur.execute('''SELECT COUNT(gallery_deviantion.id)FROM gallery_deviantion''')
files = cur.fetchone()[0]#Qty of files
deviation={}#Dictionary key=(Numeral with the same order as gallery folder index) : Values=(deviationlist for that folder)
for i in range(len(galleryfolder)):
cur.execute('''SELECT from_folder, title, author, category, published_time, content, deviationid, webpage, is_downloadable
FROM gallery_deviantion ORDER BY id''')
deviationlist = [ (row[1],row[2],row[3],row[4],row[5],row[6],row[7], row[8]) for row in cur if galleryfolder[i][0]==row[0]]#galleryfolder[i][0]==row[0] gallery.id=gallery_deviantion.from_folder / To garantee the correct DA goes to the right folder
#deviationlist #list of tuples (title, author, category, published_time, content, deviationid, webpage, is_downloadable)
deviation[i]=deviationlist
if folder=='Favourites':
cur.execute('''SELECT id, name, folderid, files FROM favourites ORDER BY id''')
favouritesfolder=[(info[0],info[1],info[2], info[3]) for info in cur]#list of tuples (id, foldername, folderid, files)
cur.execute('''SELECT COUNT(files), SUM(files) FROM favourites''')
size,b4files = cur.fetchone()#how many folders and Qty of files before non-duplication
cur.execute('''SELECT COUNT(favourites_deviantion.id) FROM favourites_deviantion''')
files = cur.fetchone()[0]#Qty of files
deviation={}#Dictionary key=(Numeral with the same order as favourites folder index) : Values=(deviationlist for that folder)
for i in range(len(favouritesfolder)):
cur.execute('''SELECT from_folder, title, author, category, published_time, content, deviationid, webpage, is_downloadable
FROM favourites_deviantion ORDER BY id''')
deviationlist = [ (row[1],row[2],row[3],row[4],row[5],row[6],row[7], row[8]) for row in cur if favouritesfolder[i][0]==row[0]]#favouritesfolder[i][0]==row[0] favourites.id=favourites_deviantion.from_folder / To garantee the correct DA goes to the right folder
#deviationlist #list of tuples (title, author, category, published_time, content, deviationid, webpage, is_downloadable)
deviation[i]=deviationlist
filename=username+'_'+folder+'.txt'
with open(filename,'w') as fhand:
fhand.write("This file contains the Deviation Art '+folder+' of '+str(username)+'. The layout might be different from originally displayed on user's site.\n")
fhand.write('There are no duplications of art here. As DeviantArt allows users to show the same art on Featury folder and subfolders. To avoid download same deviation many times, only one art is displayed.\n\n')
fhand.write('Username: '+str(username)+'\t website: '+str(usersite)+'\n\n--- '+str(username)+' '+folder+' --- No. of folders: '+str(size)+' --- Total unique files: '+str(files)+' --- Qty of files before: '+str(b4files)+' ---\n\n')#username and usersite are unicode UTF-8
if folder=='Gallery':
for i in xrange(len(galleryfolder)):
fhand.write('\n'+str(i+1)+'. Folder name: '+str(galleryfolder[i][1])+'\t\tFolder ID: '+str(galleryfolder[i][2])+'\t\tFile size: '+str(len(deviation[i]))+'\t\tFile size before: '+str(galleryfolder[i][3])+'\n')
fhand.write('\tNo.\tTitle; \t\tAuthor; \t\t Category; \t\t Published; \t\t Filesize;\n')
for j in xrange(len(deviation[i])):
fhand.write('\t'+str(j+1)+' -\t'+str(deviation[i][j][0])+'; '+str(deviation[i][j][1])+'; '+str(deviation[i][j][2])+'; '+str(deviation[i][j][3])+'; '+str(json.loads(deviation[i][j][4])["filesize"])+' bytes;\n')
fhand.write('\t\tDeviation ID: '+str(deviation[i][j][5])+'\n\t\tWebpage: '+str(deviation[i][j][6])+'\n' )
if folder=='Favourites':
for i in xrange(len(favouritesfolder)):
fhand.write('\n'+str(i+1)+'. Folder name: '+str(favouritesfolder[i][1])+'\t\tFolder ID: '+str(favouritesfolder[i][2])+'\t\tFile size: '+str(len(deviation[i]))+'\t\tFile size before:'+str(favouritesfolder[i][3])+'\n')
fhand.write('\tNo.\tTitle; \t\tAuthor; \t\t Category; \t\t Published; \t\t Filesize;\n')
for j in xrange(len(deviation[i])):
fhand.write('\t'+str(j+1)+' -\t'+str(deviation[i][j][0])+'; '+str(deviation[i][j][1])+'; '+str(deviation[i][j][2])+'; '+str(deviation[i][j][3])+'; '+str(json.loads(deviation[i][j][4])["filesize"])+' bytes;\n')
fhand.write('\t\tDeviation ID: '+str(deviation[i][j][5])+'\n\t\tWebpage: '+str(deviation[i][j][6])+'\n' )
fhand.write('\nDeviation art without pictures or similar content (example: journals) are not downloaded.\nIf some art is not listed on Featured folder after download, that art should be located inside a subfolder\n')
start_time = time.time()
conn = sqlite3.connect('deviantartdb.sqlite')
cur = conn.cursor()
cur.executescript('''
CREATE TABLE IF NOT EXISTS deviantart_session ('id' INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
'token' TEXT,
'expires_at' varchar(64) );
CREATE TABLE IF NOT EXISTS user ('id' INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
'username' TEXT,
'usersite' TEXT);
CREATE TABLE IF NOT EXISTS gallery ('id' INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
'user_id' INTEGER,
'folderid' TEXT UNIQUE,
'name' TEXT,
'files' INTEGER);
CREATE TABLE IF NOT EXISTS gallery_deviantion ('id' INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
'from_folder' INTEGER,
'deviationid' TEXT UNIQUE,
'webpage' TEXT,
'title' TEXT,
'author' TEXT,
'category' TEXT,
'published_time' TEXT,
'content' TEXT,
'is_downloadable' INTEGER,
'comments' TEXT,
'metadata' TEXT);
CREATE TABLE IF NOT EXISTS favourites ('id' INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
'user_id' INTEGER,
'folderid' TEXT UNIQUE,
'name' TEXT,
'files' INTEGER);
CREATE TABLE IF NOT EXISTS favourites_deviantion ('id' INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
'from_folder' INTEGER,
'deviationid' TEXT UNIQUE,
'webpage' TEXT,
'title' TEXT,
'author' TEXT,
'category' TEXT,
'published_time' TEXT,
'content' TEXT,
'is_downloadable' INTEGER,
'comments' TEXT,
'metadata' TEXT);
CREATE TABLE IF NOT EXISTS Gallery_loop_state ('deviationid' TEXT PRIMARY KEY,
'i_state' INTEGER,
'j_state' INTEGER,
'offset' INTEGER,
'go_loop' INTEGER,
'number' INTEGER);
CREATE TABLE IF NOT EXISTS Favourites_loop_state ('deviationid' TEXT PRIMARY KEY,
'i_state' INTEGER,
'j_state' INTEGER,
'offset' INTEGER,
'go_loop' INTEGER,
'number' INTEGER)
''')
conn.commit()
#starts by finding the DeviantArt username, usersite and saving it to database
username, usersite=find_user()#this functions returns a tuple
if username!=None:
go_collect=1
conn = sqlite3.connect('deviantartdb.sqlite')
cur = conn.cursor()
cur.execute('''SELECT username FROM user''')
try:
name=cur.fetchone()[0] #if table user is empty, fetchone() will fail
except:
name=None
if name==None or username!=name:
if username!=name and name!=None:
print 'Deleting previous DA user on database:', name, '...'
print 'New user data saved on database\n'
else:
print 'User data saved on database\n'
cur.executescript('''DELETE FROM user;
DELETE FROM gallery;
DELETE FROM gallery_deviantion;
DELETE FROM favourites;
DELETE FROM favourites_deviantion;
DELETE FROM Gallery_loop_state''')
cur.execute('''INSERT INTO user (username, usersite) VALUES ( ?, ? )''', (username, usersite) )
conn.commit()
else:
print 'User data already on database\n'
else:
go_collect=0
#Start Oauth session on DeviantArt
if go_collect==1:
client_id = hidden.client_id()
client_secret = hidden.client_secret()
#The Client Credentials access = Requests-OAuthlib Backend Application Flow
print 'Creating DeviantArt session using The Client Credentials access...\n'
deviantart_client = BackendApplicationClient(client_id=client_id)
createDAsession()
test=test_token()
if test["status"]=="success":
go_collect==1
print '\n...EXTRACTION OF INFORMATION MAY TAKE SEVERAL MINUTES...'
else:
go_collect==0
print 'ERROR...Unable to create DeviantArt session'
#Extract User's (Gallery and Favourites) folder structure information on first loop, then on second loop Extract art deviations for those folders
for folder in ['Gallery','Favourites']:
if folder=='Gallery' and go_collect==1:
gallery=get_folder(folder)#gallery dict is used on get_deviation(folder)
gallerydeviation=get_deviation(folder)
if folder=='Favourites' and go_collect==1:
favourites=get_folder(folder)#favourites dict is used on get_deviation(folder)
favouritesdeviation=get_deviation(folder)
#gallerydeviation and favouritesdeviation were useful to know the structure of the files without the need to access db. Used for debugging.
#Create a [username]_Galllery.txt and [username]_Favourites.txt with details: user name / user DAsite / Gallery/Favourite folder details & list of files
if go_collect==1:
for folder in ['Gallery','Favourites']:
built_userDAtxt(folder)
print '\n\nDeviation art without pictures or similar content (example: journals) are not downloaded.'
print 'To avoid download the same art many times, there are no duplicate deviation art on SQL Database. So if some art is not listed on Featured folder after download, that art is located inside a subfolder.\n'
print '\nTo grab the user %s Gallery / Favourites deviation art use DAgallery.py / DAfavourites.py'%username
end_time = time.time()
run_time=str(timedelta(seconds=(end_time-start_time)))
print '\nDAcollector process run time (HH:MM:SS):', run_time
cur.close()
raw_input('\nProcess Finished. Press Enter to exit')
You can’t perform that action at this time.