Merge pull request #1368 from firm1/fix-image-download

correction des bugs de download pendant la validation
zestedesavoir · Aug 18, 2014 · 5113857 · 5113857
2 parents 9fb98fa + a3d3020
commit 5113857
Show file tree

Hide file tree

Showing 3 changed files with 88 additions and 54 deletions.
diff --git a/fixtures/noir_black.png b/fixtures/noir_black.png
diff --git a/zds/tutorial/factories.py b/zds/tutorial/factories.py
@@ -20,7 +20,19 @@
 from zds.utils.models import SubCategory 
 from zds.utils.tutorials import export_tutorial
 
-
+contenu = (
+u'Ceci est un contenu de tutoriel utile et à tester un peu partout'
+u'Ce contenu ira aussi bien dans les introductions, que dans les conclusions et les extraits '
+u'le gros intéret étant qu\'il renferme des images pour tester l\'execution coté pandoc '
+u'Exemple d\'image ![Ma pepite souris](http://blog.science-infuse.fr/public/souris.jpg)'
+u'\nExemple d\'image ![Image inexistante](http://blog.science-infuse.fr/public/inv_souris.jpg)'
+u'\nExemple de gif ![](http://corigif.free.fr/oiseau/img/oiseau_004.gif)'
+u'\nExemple de gif inexistant ![](http://corigif.free.fr/oiseau/img/ironman.gif)'
+u'Une image de type wikipedia qui fait tomber des tests ![](https://s.qwant.com/thumbr/?u=http%3A%2F%2Fwww.blogoergosum.com%2Fwp-content%2Fuploads%2F2010%2F02%2Fwikipedia-logo.jpg&h=338&w=600)'
+u'Image dont le serveur n\'existe pas ![](http://unknown.image.zds)'
+u'\n Attention les tests ne doivent pas crasher '
+u'qu\'un sujet abandonné !')
+
 class BigTutorialFactory(factory.DjangoModelFactory):
     FACTORY_FOR = Tutorial
 
@@ -47,10 +59,10 @@ def _prepare(cls, create, **kwargs):
         f.write(json_writer.dumps(man, indent=4, ensure_ascii=False).encode('utf-8'))
         f.close()
         f = open(os.path.join(path, tuto.introduction), "w")
-        f.write(u'Test')
+        f.write(contenu.encode('utf-8'))
         f.close()
         f = open(os.path.join(path, tuto.conclusion), "w")
-        f.write(u'Test')
+        f.write(contenu.encode('utf-8'))
         f.close()
         repo.index.add(['manifest.json', tuto.introduction, tuto.conclusion])
         cm = repo.index.commit("Init Tuto")
@@ -90,10 +102,10 @@ def _prepare(cls, create, **kwargs):
                 ensure_ascii=False).encode('utf-8'))
         file.close()
         file = open(os.path.join(path, tuto.introduction), "w")
-        file.write(u'Test')
+        file.write(contenu.encode('utf-8'))
         file.close()
         file = open(os.path.join(path, tuto.conclusion), "w")
-        file.write(u'Test')
+        file.write(contenu.encode('utf-8'))
         file.close()
 
         repo.index.add(['manifest.json', tuto.introduction, tuto.conclusion])
@@ -124,11 +136,11 @@ def _prepare(cls, create, **kwargs):
         part.save()
 
         f = open(os.path.join(tutorial.get_path(), part.introduction), "w")
-        f.write(u'Test')
+        f.write(contenu.encode('utf-8'))
         f.close()
         repo.index.add([part.introduction])
         f = open(os.path.join(tutorial.get_path(), part.conclusion), "w")
-        f.write(u'Test')
+        f.write(contenu.encode('utf-8'))
         f.close()
         repo.index.add([part.conclusion])
 
@@ -202,14 +214,14 @@ def _prepare(cls, create, **kwargs):
                     part.tutorial.get_path(),
                     chapter.introduction),
                 "w")
-            f.write(u'Test')
+            f.write(contenu.encode('utf-8'))
             f.close()
             f = open(
                 os.path.join(
                     part.tutorial.get_path(),
                     chapter.conclusion),
                 "w")
-            f.write(u'Test')
+            f.write(contenu.encode('utf-8'))
             f.close()
             part.tutorial.save()
             repo = Repo(part.tutorial.get_path())
@@ -289,4 +301,4 @@ class SubCategoryFactory(factory.DjangoModelFactory):
 
 
 class VaidationFactory(factory.DjangoModelFactory):
-    FACTORY_FOR = Validation
+    FACTORY_FOR = Validation
diff --git a/zds/tutorial/views.py b/zds/tutorial/views.py
@@ -4,7 +4,7 @@
 from datetime import datetime
 from operator import attrgetter
 from urllib import urlretrieve
-from urlparse import urlparse
+from urlparse import urlparse, parse_qs
 try:
     import ujson as json_reader
 except:
@@ -2780,83 +2780,105 @@ def get_url_images(md_text, pt):
     """find images urls in markdown text and download this."""
 
     regex = ur"(!\[.*?\]\()(.+?)(\))"
+    unknow_path = os.path.join(settings.SITE_ROOT, "fixtures", "noir_black.png")
 
     # if text is empty don't download
 
     if md_text is not None:
         imgs = re.findall(regex, md_text)
         for img in imgs:
-
-            # decompose images
-
-            parse_object = urlparse(img[1])
+            real_url=img[1]
+            # decompose images 
+            parse_object = urlparse(real_url)
+            if parse_object.query!='':
+                resp = parse_qs(urlparse(img[1]).query, keep_blank_values=True)
+                real_url = resp["u"][0]
+                parse_object = urlparse(real_url)
 
             # if link is http type
-
-            if parse_object.scheme in ("http", "https", "ftp") or \
+            if parse_object.scheme in ["http", "https", "ftp"] or \
             parse_object.netloc[:3]=="www" or \
             parse_object.path[:3]=="www":
                 (filepath, filename) = os.path.split(parse_object.path)
                 if not os.path.isdir(os.path.join(pt, "images")):
                     os.makedirs(os.path.join(pt, "images"))
 
                 # download image
-
-                urlretrieve(img[1], os.path.abspath(os.path.join(pt, "images",
-                                                                 filename)))
-                ext = filename.split(".")[-1]
-
-                # if image is gif, convert to png
-
-                if ext == "gif":
-                    im = ImagePIL.open(os.path.join(pt, img[1]))
-                    im.save(os.path.join(pt, filename.split(".")[0] + ".png"))
+                down_path=os.path.abspath(os.path.join(pt, "images", filename))
+                try:
+                    urlretrieve(real_url, down_path)                    
+                    try:
+                        ext = filename.split(".")[-1]
+                        im = ImagePIL.open(down_path)
+                        # if image is gif, convert to png
+                        if ext == "gif":
+                            im.save(os.path.join(pt, "images", filename.split(".")[0] + ".png"))
+                    except IOError:
+                        ext = filename.split(".")[-1]
+                        im = ImagePIL.open(unknow_path)
+                        if ext == "gif":
+                            im.save(os.path.join(pt, "images", filename.split(".")[0] + ".png"))
+                        else:
+                            im.save(os.path.join(pt, "images", filename))
+                except IOError:
+                    pass
             else:
-
                 # relative link
-
-                srcfile = settings.SITE_ROOT + img[1]
+                srcfile = settings.SITE_ROOT + real_url
                 if os.path.isfile(srcfile):
-                    dstroot = pt + img[1]
+                    dstroot = pt + real_url
                     dstdir = os.path.dirname(dstroot)
                     if not os.path.exists(dstdir):
                         os.makedirs(dstdir)
                     shutil.copy(srcfile, dstroot)
-                    ext = dstroot.split(".")[-1]
-
-                    # if image is gif, convert to png
-
-                    if ext == "gif":
+                    try:
+                        ext = dstroot.split(".")[-1]
                         im = ImagePIL.open(dstroot)
-                        im.save(os.path.join(dstroot.split(".")[0] + ".png"))
+                        # if image is gif, convert to png
+                        if ext == "gif":
+                            im.save(os.path.join(dstroot.split(".")[0] + ".png"))
+                    except IOError:
+                        ext = dstroot.split(".")[-1]
+                        im = ImagePIL.open(unknow_path)
+                        if ext == "gif":
+                            im.save(os.path.join(dstroot.split(".")[0] + ".png"))
+                        else:
+                            im.save(os.path.join(dstroot))
 
 
 def sub_urlimg(g):
     start = g.group("start")
     url = g.group("url")
     parse_object = urlparse(url)
+    if parse_object.query!='':
+        resp = parse_qs(urlparse(url).query, keep_blank_values=True)
+        parse_object = urlparse(resp["u"][0])
     (filepath, filename) = os.path.split(parse_object.path)
-    ext = filename.split(".")[-1]
-    if ext == "gif":
-        if parse_object.scheme in ("http", "https") or \
-        parse_object.netloc[:3]=="www" or \
-        parse_object.path[:3]=="www":
-            url = os.path.join("images", filename.split(".")[0] + ".png")
+    if filename!='':
+        mark= g.group("mark")
+        ext = filename.split(".")[-1]
+        if ext == "gif":
+            if parse_object.scheme in ("http", "https") or \
+            parse_object.netloc[:3]=="www" or \
+            parse_object.path[:3]=="www":
+                url = os.path.join("images", filename.split(".")[0] + ".png")
+            else:
+                url = (url.split(".")[0])[1:] + ".png"
         else:
-            url = (url.split(".")[0])[1:] + ".png"
+            if parse_object.scheme in ("http", "https") or \
+            parse_object.netloc[:3]=="www" or \
+            parse_object.path[:3]=="www":
+                url = os.path.join("images", filename)
+            else:
+                url = url[1:]
+        end = g.group("end")
+        return start + mark+ url + end
     else:
-        if parse_object.scheme in ("http", "https") or \
-        parse_object.netloc[:3]=="www" or \
-        parse_object.path[:3]=="www":
-            url = os.path.join("images", filename)
-        else:
-            url = url[1:]
-    end = g.group("end")
-    return start + url + end
-
+        return start
+
 
 def markdown_to_out(md_text):
-    return re.sub(ur"(?P<start>!\[.*?\]\()(?P<url>.+?)(?P<end>\))", sub_urlimg,
+    return re.sub(ur"(?P<start>)(?P<mark>!\[.*?\]\()(?P<url>.+?)(?P<end>\))", sub_urlimg,
                   md_text)