From 4632e3c51c0ec7cbea4ce4548c4d6013ef0e8dec Mon Sep 17 00:00:00 2001
From: Jason Madden <jamadden@gmail.com>
Date: Thu, 18 May 2017 17:24:28 -0500
Subject: [PATCH] Begin porting static.py; drop dependency on mechanize and
 only use zope.testbrowser and webtest

---
 src/zope/app/apidoc/codemodule/directives.rst |  10 +-
 src/zope/app/apidoc/static.py                 | 219 +++++++++---------
 src/zope/app/apidoc/tests.py                  |  20 ++
 3 files changed, 139 insertions(+), 110 deletions(-)

diff --git a/src/zope/app/apidoc/codemodule/directives.rst b/src/zope/app/apidoc/codemodule/directives.rst
index 00d424d8..d48ba131 100644
--- a/src/zope/app/apidoc/codemodule/directives.rst
+++ b/src/zope/app/apidoc/codemodule/directives.rst
@@ -1,12 +1,12 @@
 ========================================
-Code Module specific `apidoc` Directives 
+Code Module specific `apidoc` Directives
 ========================================
 
 The `apidoc:rootModule` Directive
 ---------------------------------
 
 The `rootModule` directive allows you to register a third party Python package
-with apidoc's code browser. 
+with apidoc's code browser.
 
 Before we can register a new root module, we need to load the
 metaconfiguration:
@@ -48,9 +48,12 @@ registry tries to import the path. The hook was provided for security reasons,
 since uncontrolled importing of modules in a running application is considered
 a security hole.
 
-By default the flag is set to false:
+By default the flag is set to false (of course, this depends on the
+order in which tests are run and what ZCML has been configured or if
+this was manually changed, so we can't really rely on the default here):
 
   >>> from zope.app.apidoc import classregistry
+  >>> classregistry.__import_unknown_modules__ = False
   >>> classregistry.__import_unknown_modules__
   False
 
@@ -75,4 +78,3 @@ We can also set it back to false of course:
 
   >>> classregistry.__import_unknown_modules__
   False
-
diff --git a/src/zope/app/apidoc/static.py b/src/zope/app/apidoc/static.py
index 3f7fe044..ff2b2b73 100644
--- a/src/zope/app/apidoc/static.py
+++ b/src/zope/app/apidoc/static.py
@@ -13,7 +13,7 @@
 ##############################################################################
 """Retrieve Static APIDOC
 
-$Id$
+
 """
 __docformat__ = "reStructuredText"
 
@@ -23,15 +23,20 @@
 import sys
 import time
 import optparse
-import urllib2
-import urlparse
+from six.moves.urllib import error as urllib2
+from six.moves.urllib import parse as urlparse
+
 import warnings
-import HTMLParser
+try:
+    from HTMLParser import HTMLParseError
+except ImportError:
+    class HTMLParseError(Exception):
+        pass
 
-import zope.testbrowser.testing
-import mechanize
+import zope.testbrowser.browser
+import zope.testbrowser.wsgi
 
-from zope.app.testing import functional
+#from zope.app.testing import functional
 
 from zope.app.apidoc import classregistry
 
@@ -91,13 +96,13 @@ def completeURL(url):
 class Link(object):
     """A link in the page."""
 
-    def __init__(self, mechLink, rootURL, referenceURL='None'):
+    def __init__(self, url, rootURL, referenceURL='None'):
         self.rootURL = rootURL
         self.referenceURL = referenceURL
-        self.originalURL = mechLink.url
-        self.callableURL = mechLink.absolute_url
-        self.url = completeURL(cleanURL(mechLink.url))
-        self.absoluteURL = completeURL(cleanURL(mechLink.absolute_url))
+        self.originalURL = url #mechLink.url
+        self.callableURL = url #mechLink.absolute_url
+        self.url = completeURL(cleanURL(url))
+        self.absoluteURL = completeURL(cleanURL(self.callableURL))
 
     def isLocalURL(self):
         """Determine whether the passed in URL is local and accessible."""
@@ -115,14 +120,14 @@ def isLocalURL(self):
 
     def isApidocLink(self):
         # Make sure that only apidoc links are loaded
-        if self.absoluteURL.startswith(self.rootURL+'++apidoc++/'):
+        if self.absoluteURL.startswith(self.rootURL + '++apidoc++/'):
             return True
-        if self.absoluteURL.startswith(self.rootURL+'@@/'):
+        if self.absoluteURL.startswith(self.rootURL + '@@/'):
             return True
         return False
 
 
-class OnlineBrowser(mechanize.Browser, object):
+class OnlineBrowser(zope.testbrowser.wsgi.Browser, object):
 
     def __init__(self, factory=None, history=None, request_class=None):
         if factory == None:
@@ -134,41 +139,39 @@ def setUserAndPassword(self, user, pw):
         hash = base64.encodestring(user+':'+pw).strip()
         self.addheaders.append(('Authorization', 'Basic '+hash))
 
-    @property
-    def contents(self):
-        """Get the content of the returned data"""
-        response = self.response()
-        old_location = response.tell()
-        response.seek(0)
-        contents = response.read()
-        response.seek(old_location)
-        return contents
+    # @property
+    # def contents(self):
+    #     """Get the content of the returned data"""
+    #     response = self.response()
+    #     old_location = response.tell()
+    #     response.seek(0)
+    #     contents = response.read()
+    #     response.seek(old_location)
+    #     return contents
 
 
-class PublisherBrowser(zope.testbrowser.testing.PublisherMechanizeBrowser,
+class PublisherBrowser(zope.testbrowser.wsgi.Browser,
                        object):
 
     def __init__(self, *args, **kw):
-        functional.defineLayer(
-            'Functional',
-            zcml=os.path.abspath(os.path.join(os.path.dirname(__file__),
-                                              'ftesting.zcml')))
-        Functional.setUp()
+        from zope.app.apidoc.testing import APIDocLayer
+        APIDocLayer.setUp()
+        APIDocLayer.testSetUp()
         super(PublisherBrowser, self).__init__(*args, **kw)
 
     def setUserAndPassword(self, user, pw):
         """Specify the username and password to use for the retrieval."""
-        self.addheaders.append(('Authorization', 'Basic %s:%s' %(user, pw)))
+        self.addHeader('Authorization', 'Basic %s:%s' %(user, pw))
 
-    @property
-    def contents(self):
-        """Get the content of the returned data"""
-        response = self.response()
-        old_location = response.tell()
-        response.seek(0)
-        contents = response.read()
-        response.seek(old_location)
-        return contents
+    # @property
+    # def contents(self):
+    #     """Get the content of the returned data"""
+    #     response = self.response()
+    #     old_location = response.tell()
+    #     response.seek(0)
+    #     contents = response.read()
+    #     response.seek(old_location)
+    #     return contents
 
 
 class StaticAPIDocGenerator(object):
@@ -178,7 +181,7 @@ def __init__(self, options):
         self.options = options
         self.linkQueue = []
         for url in self.options.additional_urls + [self.options.startpage]:
-            link = Link(mechanize.Link(self.options.url, url, '', '', ()),
+            link = Link('http://localhost/' + url,#mechanize.Link(self.options.url, url, '', '', ()),
                         self.options.url)
             self.linkQueue.append(link)
         self.rootDir = os.path.join(os.path.dirname(__file__),
@@ -263,6 +266,7 @@ def processLink(self, link):
 
         # Retrieve the content
         try:
+            print(link.callableURL)
             self.browser.open(link.callableURL)
         except urllib2.HTTPError as error:
             # Something went wrong with retrieving the page.
@@ -284,6 +288,7 @@ def processLink(self, link):
             # to catch all exceptions, so that we can investigate them.
             if self.options.debug:
                 import pdb; pdb.set_trace()
+            raise
             return
 
         # Get the response content
@@ -303,11 +308,12 @@ def processLink(self, link):
         filepath = os.path.join(dir, filename)
 
         # Now retrieve all links
-        if self.browser.viewing_html():
+        if self.browser.isHtml:
 
             try:
-                links = self.browser.links()
-            except HTMLParser.HTMLParseError as error:
+                links = self.browser._response.html.find_all('a')
+                #links = self.browser.links()
+            except HTMLParseError as error:
                 self.htmlErrors += 1
                 self.sendMessage('Failed to parse HTML: ' + url, 1)
                 self.sendMessage('+-> %s: line %i, column %s' % (
@@ -333,71 +339,72 @@ def processLink(self, link):
 
         # Write the data into the file
         try:
-            file = open(filepath, 'w')
-            file.write(contents)
-            file.close()
+            with open(filepath, 'wb') as f:
+                if not isinstance(contents, bytes):
+                    contents = contents.encode('utf-8')
+                f.write(contents)
         except IOError:
             # The file already exists, so it is a duplicate and a bad one,
             # since the URL misses `index.hml`. ReST can produce strange URLs
             # that produce this problem, and we have little control over it.
             pass
 
-class ApiDocDefaultFactory(mechanize._html.DefaultFactory):
-    """Based on sgmllib."""
-    def __init__(self, i_want_broken_xhtml_support=False):
-        mechanize._html.Factory.__init__(
-            self,
-            forms_factory=mechanize._html.FormsFactory(),
-            links_factory=ApiDocLinksFactory(urltags=urltags),
-            title_factory=mechanize._html.TitleFactory(),
-            response_type_finder=mechanize._html.ResponseTypeFinder(
-                allow_xhtml=i_want_broken_xhtml_support),
-            )
-
-
-class ApiDocLinksFactory(mechanize._html.LinksFactory):
-    """Copy of mechanize link factory.
-
-    Unfortunately, the original implementation explicitely ignores base hrefs.
-    """
-
-    def links(self):
-        """Return an iterator that provides links of the document."""
-        response = self._response
-        encoding = self._encoding
-        base_url = self._base_url
-        p = self.link_parser_class(response, encoding=encoding)
-
-        for token in p.tags(*(self.urltags.keys()+["base"])):
-            # NOTE: WE WANT THIS HERE NOT TO IGNORE IT!
-            #if token.data == "base":
-            #    base_url = dict(token.attrs).get("href")
-            #    continue
-            if token.type == "endtag":
-                continue
-            attrs = dict(token.attrs)
-            tag = token.data
-            name = attrs.get("name")
-            text = None
-            # XXX use attr_encoding for ref'd doc if that doc does not provide
-            #  one by other means
-            #attr_encoding = attrs.get("charset")
-            url = attrs.get(self.urltags[tag])  # XXX is "" a valid URL?
-            if not url:
-                # Probably an <A NAME="blah"> link or <AREA NOHREF...>.
-                # For our purposes a link is something with a URL, so ignore
-                # this.
-                continue
-
-            url = mechanize._rfc3986.clean_url(url, encoding)
-            if tag == "a":
-                if token.type != "startendtag":
-                    # hmm, this'd break if end tag is missing
-                    text = p.get_compressed_text(("endtag", tag))
-                # but this doesn't work for eg. <a href="blah"><b>Andy</b></a>
-                #text = p.get_compressed_text()
-
-            yield mechanize._html.Link(base_url, url, text, tag, token.attrs)
+# class ApiDocDefaultFactory(mechanize._html.DefaultFactory):
+#     """Based on sgmllib."""
+#     def __init__(self, i_want_broken_xhtml_support=False):
+#         mechanize._html.Factory.__init__(
+#             self,
+#             forms_factory=mechanize._html.FormsFactory(),
+#             links_factory=ApiDocLinksFactory(urltags=urltags),
+#             title_factory=mechanize._html.TitleFactory(),
+#             response_type_finder=mechanize._html.ResponseTypeFinder(
+#                 allow_xhtml=i_want_broken_xhtml_support),
+#             )
+
+
+# class ApiDocLinksFactory(mechanize._html.LinksFactory):
+#     """Copy of mechanize link factory.
+
+#     Unfortunately, the original implementation explicitely ignores base hrefs.
+#     """
+
+#     def links(self):
+#         """Return an iterator that provides links of the document."""
+#         response = self._response
+#         encoding = self._encoding
+#         base_url = self._base_url
+#         p = self.link_parser_class(response, encoding=encoding)
+
+#         for token in p.tags(*(self.urltags.keys()+["base"])):
+#             # NOTE: WE WANT THIS HERE NOT TO IGNORE IT!
+#             #if token.data == "base":
+#             #    base_url = dict(token.attrs).get("href")
+#             #    continue
+#             if token.type == "endtag":
+#                 continue
+#             attrs = dict(token.attrs)
+#             tag = token.data
+#             name = attrs.get("name")
+#             text = None
+#             # XXX use attr_encoding for ref'd doc if that doc does not provide
+#             #  one by other means
+#             #attr_encoding = attrs.get("charset")
+#             url = attrs.get(self.urltags[tag])  # XXX is "" a valid URL?
+#             if not url:
+#                 # Probably an <A NAME="blah"> link or <AREA NOHREF...>.
+#                 # For our purposes a link is something with a URL, so ignore
+#                 # this.
+#                 continue
+
+#             url = mechanize._rfc3986.clean_url(url, encoding)
+#             if tag == "a":
+#                 if token.type != "startendtag":
+#                     # hmm, this'd break if end tag is missing
+#                     text = p.get_compressed_text(("endtag", tag))
+#                 # but this doesn't work for eg. <a href="blah"><b>Andy</b></a>
+#                 #text = p.get_compressed_text()
+
+#             yield mechanize._html.Link(base_url, url, text, tag, token.attrs)
 
 
 ###############################################################################
@@ -511,7 +518,7 @@ def links(self):
     '--verbosity', '5',
     '--publisher',
     '--url', 'http://localhost:8080/',
-    '--startpage', '++apidoc++/static.html',
+    '--startpage', '/++apidoc++/static.html',
     '--username', 'mgr',
     '--password', 'mgrpw',
     '--progress',
@@ -561,8 +568,8 @@ def get_options(args=None, defaults=None):
 ###############################################################################
 
 
-def main():
-    options = get_options()
+def main(args=None):
+    options = get_options(args)
     maker = StaticAPIDocGenerator(options)
     maker.start()
     sys.exit(0)
diff --git a/src/zope/app/apidoc/tests.py b/src/zope/app/apidoc/tests.py
index 07043b56..76bb0f8f 100644
--- a/src/zope/app/apidoc/tests.py
+++ b/src/zope/app/apidoc/tests.py
@@ -196,6 +196,26 @@ def test_unpack_methods(self):
 
         self.assertEqual("((a, b))", getFunctionSignature(locals()['f']))
 
+
+from zope.app.apidoc import static
+
+class TestStatic(unittest.TestCase):
+
+    def test_run(self):
+        import tempfile
+        import shutil
+        tmpdir = tempfile.mkdtemp(suffix='apidoc.TestStatic')
+        self.addCleanup(shutil.rmtree, tmpdir)
+
+        try:
+            static.main(['static', tmpdir])
+            self.fail("Should raise SystemExit")
+        except SystemExit as e:
+            self.assertEqual(e.args[0], 0)
+        finally:
+            APIDocLayer.testTearDown()
+            APIDocLayer.tearDown()
+
 # Generally useful classes and functions
 
 @implementer(IContainmentRoot)