unpywall · bganglia · Apr 20, 2020 · Apr 18, 2020 · Apr 19, 2020 · Apr 20, 2020
diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -0,0 +1,27 @@
+from unpywall.__main__ import main
+from requests.exceptions import HTTPError
+import pytest
+
+
+class TestUnpywallCli:
+
+    def test_main(self):
+        with pytest.raises(SystemExit) as pytest_raise_system_exit:
+            main(test_args=(['-h']))
+
+        assert pytest_raise_system_exit.value.code == 0
+
+        with pytest.raises(SystemExit) as pytest_raise_system_exit:
+            main(test_args=(['this is a bad argument']))
+
+        assert pytest_raise_system_exit.value.code == 1
+
+    def test_view(self):
+        with pytest.raises(SystemExit) as pytest_raise_system_exit:
+            main(test_args=(['view']))
+
+        assert pytest_raise_system_exit.value.code == 2
+
+        with pytest.raises(HTTPError):
+            bad_doi = 'this is a bad doi'
+            main(test_args=(['view', bad_doi]))
diff --git a/unpywall/__init__.py b/unpywall/__init__.py
@@ -1,21 +1,19 @@
-import urllib.request
+import requests
 import pandas as pd
-import json
-import time
 import sys
+import subprocess
+import tempfile
+import webbrowser
+import os
+import platform
+from io import BytesIO
 
 
 class Unpywall:
     """
     Base class that contains useful functions for retrieving information
     from the Unpaywall REST API (https://api.unpaywall.org). This client uses
     version 2 of the API.
-
-    Methods
-    -------
-    get_df(dois, progress, errors)
-        Retrieves information from the Unpaywall API service and returns a
-        pandas DataFrame.
     """
 
     api_limit: int = 100000
@@ -73,7 +71,6 @@ def _progress(progress: float) -> None:
                                    int(progress * 100))
 
         print(text, end='\r', flush=False, file=sys.stdout)
-        time.sleep(0.1)
 
         if progress == 1:
             print('\n', file=sys.stdout)
@@ -82,6 +79,7 @@ def _progress(progress: float) -> None:
     def get_df(dois: list,
                progress: bool = False,
                errors: str = 'raise',
+               force: bool = False,
                ignore_cache: bool = True) -> pd.DataFrame:
         """
         Parses information from the Unpaywall API service and returns it as
@@ -96,6 +94,10 @@ def get_df(dois: list,
         errors : str
             Either 'raise' or 'ignore'. If the parameter errors is set to
             'ignore' than errors will not raise an exception.
+        force : bool
+            Whether to force the cache to retrieve a new entry.
+        ignore_cache : bool
+            Whether to use or ignore the cache.
 
         Returns
         -------
@@ -107,8 +109,6 @@ def get_df(dois: list,
         ------
         ValueError
             If the parameter errors contains a faulty value.
-        AttributeError
-            If the Unpaywall API did not respond with json.
         """
 
         dois = Unpywall._validate_dois(dois)
@@ -124,31 +124,28 @@ def get_df(dois: list,
             if progress:
                 Unpywall._progress(n/len(dois))
 
-            try:
-                r = Unpywall.get_json(doi,
-                                      errors=errors,
-                                      ignore_cache=ignore_cache)
+            r = Unpywall.get_json(doi,
+                                  errors=errors,
+                                  force=force,
+                                  ignore_cache=ignore_cache)
 
-                # check if json is not empty due to an faulty DOI
-                if not bool(r):
-                    continue
+            # check if json is not empty or None due to an faulty DOI
+            if not bool(r):
+                continue
 
-                df2 = pd.json_normalize(data=r, max_level=1, errors=errors)
+            df2 = pd.json_normalize(data=r, max_level=1, errors=errors)
 
-                df = df.append(df2)
+            df = df.append(df2)
 
-            except (AttributeError, json.decoder.JSONDecodeError):
-
-                if errors == 'raise':
-                    raise AttributeError('Unpaywall API did not return json')
-                else:
-                    continue
+        if df.empty:
+            return None
 
         return df
 
     @staticmethod
     def get_json(doi: str,
                  errors: str = 'raise',
+                 force: bool = False,
                  ignore_cache: bool = False):
         """
         This function returns all information in Unpaywall about the given DOI.
@@ -157,25 +154,40 @@ def get_json(doi: str,
         ----------
         doi : str
             The DOI of the requested paper.
+        errors : str
+            Either 'raise' or 'ignore'. If the parameter errors is set to
+            'ignore' than errors will not raise an exception.
+        force : bool
+            Whether to force the cache to retrieve a new entry.
+        ignore_cache : bool
+            Whether to use or ignore the cache.
 
         Returns
         -------
         JSON object
             A JSON data structure containing all information
             returned by Unpaywall about the given DOI.
+
+        Raises
+        ------
+        AttributeError
+            If the Unpaywall API did not respond with json.
         """
         from .cache import cache
 
-        r = cache.get(doi, errors, ignore_cache)
-        if r:
+        r = cache.get(doi,
+                      errors=errors,
+                      force=force,
+                      ignore_cache=ignore_cache)
+        try:
             return r.json()
-        else:
+        except AttributeError:
             return None
 
     @staticmethod
-    def get_pdf_link(doi: str, errors: str = 'raise'):
+    def get_pdf_link(doi: str):
         """
-        This function returns a link to the an OA pdf (if available).
+        This function returns a link to an OA pdf (if available).
 
         Parameters
         ----------
@@ -187,14 +199,14 @@ def get_pdf_link(doi: str, errors: str = 'raise'):
         str
             The URL of an OA PDF (if available).
         """
-        json_data = Unpywall.get_json(doi, errors=errors)
+        json_data = Unpywall.get_json(doi)
         try:
             return json_data['best_oa_location']['url_for_pdf']
         except (KeyError, TypeError):
             return None
 
     @staticmethod
-    def get_doc_link(doi: str, errors: str = 'raise'):
+    def get_doc_link(doi: str):
         """
         This function returns a link to the best OA location
         (not necessarily a PDF).
@@ -209,14 +221,14 @@ def get_doc_link(doi: str, errors: str = 'raise'):
         str
             The URL of the best OA location (not necessarily a PDF).
         """
-        json_data = Unpywall.get_json(doi, errors)
+        json_data = Unpywall.get_json(doi)
         try:
             return json_data['best_oa_location']['url']
         except (KeyError, TypeError):
             return None
 
     @staticmethod
-    def get_all_links(doi: str, errors: str = 'raise') -> list:
+    def get_all_links(doi: str) -> list:
         """
         This function returns a list of URLs for all open-access copies
         listed in Unpaywall.
@@ -232,14 +244,14 @@ def get_all_links(doi: str, errors: str = 'raise') -> list:
             A list of URLs leading to open-access copies.
         """
         data = []
-        for value in [Unpywall.get_doc_link(doi, errors),
-                      Unpywall.get_pdf_link(doi, errors)]:
+        for value in [Unpywall.get_doc_link(doi),
+                      Unpywall.get_pdf_link(doi)]:
             if value and value not in data:
                 data.append(value)
         return data
 
     @staticmethod
-    def download_pdf_handle(doi: str, errors: str = 'raise'):
+    def download_pdf_handle(doi: str):
         """
         This function returns a file-like object containing the requested PDF.
 
@@ -253,5 +265,91 @@ def download_pdf_handle(doi: str, errors: str = 'raise'):
         object
             The handle of the PDF file.
         """
-        pdf_link = Unpywall.get_pdf_link(doi, errors)
-        return urllib.request.urlopen(pdf_link)
+        pdf_link = Unpywall.get_pdf_link(doi)
+        r = requests.get(pdf_link)
+        return BytesIO(bytearray(r.text, encoding='utf-8'))
+
+    @staticmethod
+    def view_pdf(doi: str,
+                 mode: str = 'viewer',
+                 progress: bool = False) -> None:
+        """
+        This function opens a local copy of a PDF from a given DOI.
+
+        Parameters
+        ----------
+        doi : str
+            The DOI of the requested paper.
+        mode : str
+            The mode for viewing a PDF.
+        progress : bool
+            Whether the progress of the API call should be printed out or not.
+        """
+
+        url = Unpywall.get_pdf_link(doi)
+        r = requests.get(url, stream=url)
+        file_size = int(r.headers.get('content-length', 0))
+        block_size = 1024
+
+        if mode == 'viewer':
+
+            tmp = tempfile.NamedTemporaryFile(delete=False, suffix='.pdf')
+
+            with open(tmp.name, 'wb') as file:
+                chunk_size = 0
+                for chunk in r.iter_content(block_size):
+                    if progress:
+                        chunk_size += len(chunk)
+                        Unpywall._progress(chunk_size/file_size)
+                    file.write(chunk)
+
+                # macOS
+                if platform.system() == 'Darwin':
+                    subprocess.run(['open', tmp.name], check=True)
+                # Windows
+                elif platform.system() == 'Windows':
+                    os.startfile(tmp.name)
+                # Linux
+                else:
+                    subprocess.run(['xdg-open', tmp.name], check=True)
+
+        else:
+            webbrowser.open_new(url)
+
+    @staticmethod
+    def download_pdf_file(doi: str,
+                          filename: str,
+                          filepath: str = '.',
+                          progress: bool = False) -> None:
+        """
+        This function downloads a PDF from a given DOI.
+
+        Parameters
+        ----------
+        doi : str
+            The DOI of the requested paper.
+        filename : str
+            The filename for the PDF.
+        filepath : str
+            The path to store the downloaded PDF.
+        progress : bool
+            Whether the progress of the API call should be printed out or not.
+        """
+
+        url = Unpywall.get_pdf_link(doi)
+        r = requests.get(url, stream=url)
+        file_size = int(r.headers.get('content-length', 0))
+        block_size = 1024
+
+        path = os.path.join(filepath, filename)
+
+        if not os.path.exists(filepath):
+            os.makedirs(filepath)
+
+        with open(path, 'wb') as file:
+            chunk_size = 0
+            for chunk in r.iter_content(block_size):
+                if progress:
+                    chunk_size += len(chunk)
+                    Unpywall._progress(chunk_size/file_size)
+                file.write(chunk)