From 4632e3c51c0ec7cbea4ce4548c4d6013ef0e8dec Mon Sep 17 00:00:00 2001 From: Jason Madden Date: Thu, 18 May 2017 17:24:28 -0500 Subject: [PATCH] Begin porting static.py; drop dependency on mechanize and only use zope.testbrowser and webtest --- src/zope/app/apidoc/codemodule/directives.rst | 10 +- src/zope/app/apidoc/static.py | 219 +++++++++--------- src/zope/app/apidoc/tests.py | 20 ++ 3 files changed, 139 insertions(+), 110 deletions(-) diff --git a/src/zope/app/apidoc/codemodule/directives.rst b/src/zope/app/apidoc/codemodule/directives.rst index 00d424d8..d48ba131 100644 --- a/src/zope/app/apidoc/codemodule/directives.rst +++ b/src/zope/app/apidoc/codemodule/directives.rst @@ -1,12 +1,12 @@ ======================================== -Code Module specific `apidoc` Directives +Code Module specific `apidoc` Directives ======================================== The `apidoc:rootModule` Directive --------------------------------- The `rootModule` directive allows you to register a third party Python package -with apidoc's code browser. +with apidoc's code browser. Before we can register a new root module, we need to load the metaconfiguration: @@ -48,9 +48,12 @@ registry tries to import the path. The hook was provided for security reasons, since uncontrolled importing of modules in a running application is considered a security hole. -By default the flag is set to false: +By default the flag is set to false (of course, this depends on the +order in which tests are run and what ZCML has been configured or if +this was manually changed, so we can't really rely on the default here): >>> from zope.app.apidoc import classregistry + >>> classregistry.__import_unknown_modules__ = False >>> classregistry.__import_unknown_modules__ False @@ -75,4 +78,3 @@ We can also set it back to false of course: >>> classregistry.__import_unknown_modules__ False - diff --git a/src/zope/app/apidoc/static.py b/src/zope/app/apidoc/static.py index 3f7fe044..ff2b2b73 100644 --- a/src/zope/app/apidoc/static.py +++ b/src/zope/app/apidoc/static.py @@ -13,7 +13,7 @@ ############################################################################## """Retrieve Static APIDOC -$Id$ + """ __docformat__ = "reStructuredText" @@ -23,15 +23,20 @@ import sys import time import optparse -import urllib2 -import urlparse +from six.moves.urllib import error as urllib2 +from six.moves.urllib import parse as urlparse + import warnings -import HTMLParser +try: + from HTMLParser import HTMLParseError +except ImportError: + class HTMLParseError(Exception): + pass -import zope.testbrowser.testing -import mechanize +import zope.testbrowser.browser +import zope.testbrowser.wsgi -from zope.app.testing import functional +#from zope.app.testing import functional from zope.app.apidoc import classregistry @@ -91,13 +96,13 @@ def completeURL(url): class Link(object): """A link in the page.""" - def __init__(self, mechLink, rootURL, referenceURL='None'): + def __init__(self, url, rootURL, referenceURL='None'): self.rootURL = rootURL self.referenceURL = referenceURL - self.originalURL = mechLink.url - self.callableURL = mechLink.absolute_url - self.url = completeURL(cleanURL(mechLink.url)) - self.absoluteURL = completeURL(cleanURL(mechLink.absolute_url)) + self.originalURL = url #mechLink.url + self.callableURL = url #mechLink.absolute_url + self.url = completeURL(cleanURL(url)) + self.absoluteURL = completeURL(cleanURL(self.callableURL)) def isLocalURL(self): """Determine whether the passed in URL is local and accessible.""" @@ -115,14 +120,14 @@ def isLocalURL(self): def isApidocLink(self): # Make sure that only apidoc links are loaded - if self.absoluteURL.startswith(self.rootURL+'++apidoc++/'): + if self.absoluteURL.startswith(self.rootURL + '++apidoc++/'): return True - if self.absoluteURL.startswith(self.rootURL+'@@/'): + if self.absoluteURL.startswith(self.rootURL + '@@/'): return True return False -class OnlineBrowser(mechanize.Browser, object): +class OnlineBrowser(zope.testbrowser.wsgi.Browser, object): def __init__(self, factory=None, history=None, request_class=None): if factory == None: @@ -134,41 +139,39 @@ def setUserAndPassword(self, user, pw): hash = base64.encodestring(user+':'+pw).strip() self.addheaders.append(('Authorization', 'Basic '+hash)) - @property - def contents(self): - """Get the content of the returned data""" - response = self.response() - old_location = response.tell() - response.seek(0) - contents = response.read() - response.seek(old_location) - return contents + # @property + # def contents(self): + # """Get the content of the returned data""" + # response = self.response() + # old_location = response.tell() + # response.seek(0) + # contents = response.read() + # response.seek(old_location) + # return contents -class PublisherBrowser(zope.testbrowser.testing.PublisherMechanizeBrowser, +class PublisherBrowser(zope.testbrowser.wsgi.Browser, object): def __init__(self, *args, **kw): - functional.defineLayer( - 'Functional', - zcml=os.path.abspath(os.path.join(os.path.dirname(__file__), - 'ftesting.zcml'))) - Functional.setUp() + from zope.app.apidoc.testing import APIDocLayer + APIDocLayer.setUp() + APIDocLayer.testSetUp() super(PublisherBrowser, self).__init__(*args, **kw) def setUserAndPassword(self, user, pw): """Specify the username and password to use for the retrieval.""" - self.addheaders.append(('Authorization', 'Basic %s:%s' %(user, pw))) + self.addHeader('Authorization', 'Basic %s:%s' %(user, pw)) - @property - def contents(self): - """Get the content of the returned data""" - response = self.response() - old_location = response.tell() - response.seek(0) - contents = response.read() - response.seek(old_location) - return contents + # @property + # def contents(self): + # """Get the content of the returned data""" + # response = self.response() + # old_location = response.tell() + # response.seek(0) + # contents = response.read() + # response.seek(old_location) + # return contents class StaticAPIDocGenerator(object): @@ -178,7 +181,7 @@ def __init__(self, options): self.options = options self.linkQueue = [] for url in self.options.additional_urls + [self.options.startpage]: - link = Link(mechanize.Link(self.options.url, url, '', '', ()), + link = Link('http://localhost/' + url,#mechanize.Link(self.options.url, url, '', '', ()), self.options.url) self.linkQueue.append(link) self.rootDir = os.path.join(os.path.dirname(__file__), @@ -263,6 +266,7 @@ def processLink(self, link): # Retrieve the content try: + print(link.callableURL) self.browser.open(link.callableURL) except urllib2.HTTPError as error: # Something went wrong with retrieving the page. @@ -284,6 +288,7 @@ def processLink(self, link): # to catch all exceptions, so that we can investigate them. if self.options.debug: import pdb; pdb.set_trace() + raise return # Get the response content @@ -303,11 +308,12 @@ def processLink(self, link): filepath = os.path.join(dir, filename) # Now retrieve all links - if self.browser.viewing_html(): + if self.browser.isHtml: try: - links = self.browser.links() - except HTMLParser.HTMLParseError as error: + links = self.browser._response.html.find_all('a') + #links = self.browser.links() + except HTMLParseError as error: self.htmlErrors += 1 self.sendMessage('Failed to parse HTML: ' + url, 1) self.sendMessage('+-> %s: line %i, column %s' % ( @@ -333,71 +339,72 @@ def processLink(self, link): # Write the data into the file try: - file = open(filepath, 'w') - file.write(contents) - file.close() + with open(filepath, 'wb') as f: + if not isinstance(contents, bytes): + contents = contents.encode('utf-8') + f.write(contents) except IOError: # The file already exists, so it is a duplicate and a bad one, # since the URL misses `index.hml`. ReST can produce strange URLs # that produce this problem, and we have little control over it. pass -class ApiDocDefaultFactory(mechanize._html.DefaultFactory): - """Based on sgmllib.""" - def __init__(self, i_want_broken_xhtml_support=False): - mechanize._html.Factory.__init__( - self, - forms_factory=mechanize._html.FormsFactory(), - links_factory=ApiDocLinksFactory(urltags=urltags), - title_factory=mechanize._html.TitleFactory(), - response_type_finder=mechanize._html.ResponseTypeFinder( - allow_xhtml=i_want_broken_xhtml_support), - ) - - -class ApiDocLinksFactory(mechanize._html.LinksFactory): - """Copy of mechanize link factory. - - Unfortunately, the original implementation explicitely ignores base hrefs. - """ - - def links(self): - """Return an iterator that provides links of the document.""" - response = self._response - encoding = self._encoding - base_url = self._base_url - p = self.link_parser_class(response, encoding=encoding) - - for token in p.tags(*(self.urltags.keys()+["base"])): - # NOTE: WE WANT THIS HERE NOT TO IGNORE IT! - #if token.data == "base": - # base_url = dict(token.attrs).get("href") - # continue - if token.type == "endtag": - continue - attrs = dict(token.attrs) - tag = token.data - name = attrs.get("name") - text = None - # XXX use attr_encoding for ref'd doc if that doc does not provide - # one by other means - #attr_encoding = attrs.get("charset") - url = attrs.get(self.urltags[tag]) # XXX is "" a valid URL? - if not url: - # Probably an link or . - # For our purposes a link is something with a URL, so ignore - # this. - continue - - url = mechanize._rfc3986.clean_url(url, encoding) - if tag == "a": - if token.type != "startendtag": - # hmm, this'd break if end tag is missing - text = p.get_compressed_text(("endtag", tag)) - # but this doesn't work for eg. Andy - #text = p.get_compressed_text() - - yield mechanize._html.Link(base_url, url, text, tag, token.attrs) +# class ApiDocDefaultFactory(mechanize._html.DefaultFactory): +# """Based on sgmllib.""" +# def __init__(self, i_want_broken_xhtml_support=False): +# mechanize._html.Factory.__init__( +# self, +# forms_factory=mechanize._html.FormsFactory(), +# links_factory=ApiDocLinksFactory(urltags=urltags), +# title_factory=mechanize._html.TitleFactory(), +# response_type_finder=mechanize._html.ResponseTypeFinder( +# allow_xhtml=i_want_broken_xhtml_support), +# ) + + +# class ApiDocLinksFactory(mechanize._html.LinksFactory): +# """Copy of mechanize link factory. + +# Unfortunately, the original implementation explicitely ignores base hrefs. +# """ + +# def links(self): +# """Return an iterator that provides links of the document.""" +# response = self._response +# encoding = self._encoding +# base_url = self._base_url +# p = self.link_parser_class(response, encoding=encoding) + +# for token in p.tags(*(self.urltags.keys()+["base"])): +# # NOTE: WE WANT THIS HERE NOT TO IGNORE IT! +# #if token.data == "base": +# # base_url = dict(token.attrs).get("href") +# # continue +# if token.type == "endtag": +# continue +# attrs = dict(token.attrs) +# tag = token.data +# name = attrs.get("name") +# text = None +# # XXX use attr_encoding for ref'd doc if that doc does not provide +# # one by other means +# #attr_encoding = attrs.get("charset") +# url = attrs.get(self.urltags[tag]) # XXX is "" a valid URL? +# if not url: +# # Probably an link or . +# # For our purposes a link is something with a URL, so ignore +# # this. +# continue + +# url = mechanize._rfc3986.clean_url(url, encoding) +# if tag == "a": +# if token.type != "startendtag": +# # hmm, this'd break if end tag is missing +# text = p.get_compressed_text(("endtag", tag)) +# # but this doesn't work for eg. Andy +# #text = p.get_compressed_text() + +# yield mechanize._html.Link(base_url, url, text, tag, token.attrs) ############################################################################### @@ -511,7 +518,7 @@ def links(self): '--verbosity', '5', '--publisher', '--url', 'http://localhost:8080/', - '--startpage', '++apidoc++/static.html', + '--startpage', '/++apidoc++/static.html', '--username', 'mgr', '--password', 'mgrpw', '--progress', @@ -561,8 +568,8 @@ def get_options(args=None, defaults=None): ############################################################################### -def main(): - options = get_options() +def main(args=None): + options = get_options(args) maker = StaticAPIDocGenerator(options) maker.start() sys.exit(0) diff --git a/src/zope/app/apidoc/tests.py b/src/zope/app/apidoc/tests.py index 07043b56..76bb0f8f 100644 --- a/src/zope/app/apidoc/tests.py +++ b/src/zope/app/apidoc/tests.py @@ -196,6 +196,26 @@ def test_unpack_methods(self): self.assertEqual("((a, b))", getFunctionSignature(locals()['f'])) + +from zope.app.apidoc import static + +class TestStatic(unittest.TestCase): + + def test_run(self): + import tempfile + import shutil + tmpdir = tempfile.mkdtemp(suffix='apidoc.TestStatic') + self.addCleanup(shutil.rmtree, tmpdir) + + try: + static.main(['static', tmpdir]) + self.fail("Should raise SystemExit") + except SystemExit as e: + self.assertEqual(e.args[0], 0) + finally: + APIDocLayer.testTearDown() + APIDocLayer.tearDown() + # Generally useful classes and functions @implementer(IContainmentRoot)