diff --git a/.gitignore b/.gitignore new file mode 100755 index 0000000..e4370f7 --- /dev/null +++ b/.gitignore @@ -0,0 +1,105 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.vscode/ +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..ab6885f --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2019 Peter Volf + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Pipfile b/Pipfile new file mode 100644 index 0000000..7958dcb --- /dev/null +++ b/Pipfile @@ -0,0 +1,18 @@ +[[source]] +name = "pypi" +url = "https://pypi.org/simple" +verify_ssl = true + +[dev-packages] +black = "*" +pylint = "*" + +[packages] +graphscraper = ">=0.5" +requests = ">=2.22" + +[requires] +python_version = "3.7" + +[pipenv] +allow_prereleases = true diff --git a/Pipfile.lock b/Pipfile.lock new file mode 100644 index 0000000..0c69a21 --- /dev/null +++ b/Pipfile.lock @@ -0,0 +1,236 @@ +{ + "_meta": { + "hash": { + "sha256": "e7cbd91a4042ac2310eac2af694ec13617d54f69c26994489b65790b217bff7a" + }, + "pipfile-spec": 6, + "requires": { + "python_version": "3.7" + }, + "sources": [ + { + "name": "pypi", + "url": "https://pypi.org/simple", + "verify_ssl": true + } + ] + }, + "default": { + "certifi": { + "hashes": [ + "sha256:017c25db2a153ce562900032d5bc68e9f191e44e9a0f762f373977de9df1fbb3", + "sha256:25b64c7da4cd7479594d035c08c2d809eb4aab3a26e5a990ea98cc450c320f1f" + ], + "version": "==2019.11.28" + }, + "chardet": { + "hashes": [ + "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae", + "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691" + ], + "version": "==3.0.4" + }, + "graphscraper": { + "hashes": [ + "sha256:1fcc9581753ba0eab79a560703c601489b987d57732f5ae5b704ad73b95b5e8a", + "sha256:a12d2683c9a6129889f8936f7681d523dcf8968ccd0239f9a0a97c2933cc8b20" + ], + "index": "pypi", + "version": "==0.5.0" + }, + "idna": { + "hashes": [ + "sha256:7588d1c14ae4c77d74036e8c22ff447b26d0fde8f007354fd48a7814db15b7cb", + "sha256:a068a21ceac8a4d63dbfd964670474107f541babbd2250d61922f029858365fa" + ], + "version": "==2.9" + }, + "requests": { + "hashes": [ + "sha256:43999036bfa82904b6af1d99e4882b560e5e2c68e5c4b0aa03b655f3d7d73fee", + "sha256:b3f43d496c6daba4493e7c431722aeb7dbc6288f52a6e04e7b6023b0247817e6" + ], + "index": "pypi", + "version": "==2.23.0" + }, + "sqlalchemy": { + "hashes": [ + "sha256:64a7b71846db6423807e96820993fa12a03b89127d278290ca25c0b11ed7b4fb" + ], + "version": "==1.3.13" + }, + "urllib3": { + "hashes": [ + "sha256:2f3db8b19923a873b3e5256dc9c2dedfa883e33d87c690d9c7913e1f40673cdc", + "sha256:87716c2d2a7121198ebcb7ce7cccf6ce5e9ba539041cfbaeecfb641dc0bf6acc" + ], + "version": "==1.25.8" + } + }, + "develop": { + "appdirs": { + "hashes": [ + "sha256:9e5896d1372858f8dd3344faf4e5014d21849c756c8d5701f78f8a103b372d92", + "sha256:d8b24664561d0d34ddfaec54636d502d7cea6e29c3eaf68f3df6180863e2166e" + ], + "version": "==1.4.3" + }, + "astroid": { + "hashes": [ + "sha256:71ea07f44df9568a75d0f354c49143a4575d90645e9fead6dfb52c26a85ed13a", + "sha256:840947ebfa8b58f318d42301cf8c0a20fd794a33b61cc4638e28e9e61ba32f42" + ], + "version": "==2.3.3" + }, + "attrs": { + "hashes": [ + "sha256:08a96c641c3a74e44eb59afb61a24f2cb9f4d7188748e76ba4bb5edfa3cb7d1c", + "sha256:f7b7ce16570fe9965acd6d30101a28f62fb4a7f9e926b3bbc9b61f8b04247e72" + ], + "version": "==19.3.0" + }, + "black": { + "hashes": [ + "sha256:1b30e59be925fafc1ee4565e5e08abef6b03fe455102883820fe5ee2e4734e0b", + "sha256:c2edb73a08e9e0e6f65a0e6af18b059b8b1cdd5bef997d7a0b181df93dc81539" + ], + "index": "pypi", + "version": "==19.10b0" + }, + "click": { + "hashes": [ + "sha256:2335065e6395b9e67ca716de5f7526736bfa6ceead690adf616d925bdc622b13", + "sha256:5b94b49521f6456670fdb30cd82a4eca9412788a93fa6dd6df72c94d5a8ff2d7" + ], + "version": "==7.0" + }, + "isort": { + "hashes": [ + "sha256:54da7e92468955c4fceacd0c86bd0ec997b0e1ee80d97f67c35a78b719dccab1", + "sha256:6e811fcb295968434526407adb8796944f1988c5b65e8139058f2014cbe100fd" + ], + "version": "==4.3.21" + }, + "lazy-object-proxy": { + "hashes": [ + "sha256:0c4b206227a8097f05c4dbdd323c50edf81f15db3b8dc064d08c62d37e1a504d", + "sha256:194d092e6f246b906e8f70884e620e459fc54db3259e60cf69a4d66c3fda3449", + "sha256:1be7e4c9f96948003609aa6c974ae59830a6baecc5376c25c92d7d697e684c08", + "sha256:4677f594e474c91da97f489fea5b7daa17b5517190899cf213697e48d3902f5a", + "sha256:48dab84ebd4831077b150572aec802f303117c8cc5c871e182447281ebf3ac50", + "sha256:5541cada25cd173702dbd99f8e22434105456314462326f06dba3e180f203dfd", + "sha256:59f79fef100b09564bc2df42ea2d8d21a64fdcda64979c0fa3db7bdaabaf6239", + "sha256:8d859b89baf8ef7f8bc6b00aa20316483d67f0b1cbf422f5b4dc56701c8f2ffb", + "sha256:9254f4358b9b541e3441b007a0ea0764b9d056afdeafc1a5569eee1cc6c1b9ea", + "sha256:9651375199045a358eb6741df3e02a651e0330be090b3bc79f6d0de31a80ec3e", + "sha256:97bb5884f6f1cdce0099f86b907aa41c970c3c672ac8b9c8352789e103cf3156", + "sha256:9b15f3f4c0f35727d3a0fba4b770b3c4ebbb1fa907dbcc046a1d2799f3edd142", + "sha256:a2238e9d1bb71a56cd710611a1614d1194dc10a175c1e08d75e1a7bcc250d442", + "sha256:a6ae12d08c0bf9909ce12385803a543bfe99b95fe01e752536a60af2b7797c62", + "sha256:ca0a928a3ddbc5725be2dd1cf895ec0a254798915fb3a36af0964a0a4149e3db", + "sha256:cb2c7c57005a6804ab66f106ceb8482da55f5314b7fcb06551db1edae4ad1531", + "sha256:d74bb8693bf9cf75ac3b47a54d716bbb1a92648d5f781fc799347cfc95952383", + "sha256:d945239a5639b3ff35b70a88c5f2f491913eb94871780ebfabb2568bd58afc5a", + "sha256:eba7011090323c1dadf18b3b689845fd96a61ba0a1dfbd7f24b921398affc357", + "sha256:efa1909120ce98bbb3777e8b6f92237f5d5c8ea6758efea36a473e1d38f7d3e4", + "sha256:f3900e8a5de27447acbf900b4750b0ddfd7ec1ea7fbaf11dfa911141bc522af0" + ], + "version": "==1.4.3" + }, + "mccabe": { + "hashes": [ + "sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42", + "sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f" + ], + "version": "==0.6.1" + }, + "pathspec": { + "hashes": [ + "sha256:163b0632d4e31cef212976cf57b43d9fd6b0bac6e67c26015d611a647d5e7424", + "sha256:562aa70af2e0d434367d9790ad37aed893de47f1693e4201fd1d3dca15d19b96" + ], + "version": "==0.7.0" + }, + "pylint": { + "hashes": [ + "sha256:3db5468ad013380e987410a8d6956226963aed94ecb5f9d3a28acca6d9ac36cd", + "sha256:886e6afc935ea2590b462664b161ca9a5e40168ea99e5300935f6591ad467df4" + ], + "index": "pypi", + "version": "==2.4.4" + }, + "regex": { + "hashes": [ + "sha256:061f5b049a4a75ab662d843c343b58d17dbbbf943890b36a74c796c0145256b0", + "sha256:12a18821e38669cfd54d01e2351bcbe55632009c9b5736a159a4711d39abf266", + "sha256:1551d6bf97e48d8eb06ab513868041e58a0473296cc636180df105dacc7b546e", + "sha256:4216e5f9b659014d1a9f36d920fd1207f1ed1364231e4192295ff85ad469c971", + "sha256:4bfc09ed38ca2c6da17bd82febc2c260d142776e56ab7092036ee86b66ed3be0", + "sha256:4d6f0646c8c8ed566391e7cb49230f4e953c39121d38eaae2c573666ba0235be", + "sha256:54df3a00c5f8ece5ff969e0ee23fb01b927e9c265ea43b73da501677170b4746", + "sha256:55f344f930bbcaae3146bc2cb8a761ea993c81d9777ec9fa530330cd62762653", + "sha256:5e6826ad52f3f6f7000163bcaa1e19bd21c22478d00490875df5fa0ac5e95637", + "sha256:5efd84785b764114a86c308e43103163e52e6189d24a5ecbbdd23b79dd715b89", + "sha256:685450ce1e63e7375f867093a7e0f15817e778ffa7b4bbdfae59cd73dedf7095", + "sha256:6b9a165a96cad84a6403c8375eb09c03a91b3ce13749fcff5619a7de9323f712", + "sha256:6cbb96b49932a47bbe6a7c16b60e92ad5571cafbcda34fa178eecf6df1e90884", + "sha256:776908974bf26133abdb4a7b83943537aa84a207e0d36b6be9b0680e0d370163", + "sha256:7af2199c44511d6b962817817aa14eb673b132c940c2b00809c5fb7906381015", + "sha256:7b2bb82b815015826d3ffbfa6dc8919375cf8e0653db023fe7d34d799727d2ef", + "sha256:a022be296f9ff54423a31bf9f7761c979e8654a81fffa83509585d674f600faa", + "sha256:a9fa68b54e88ac027ae6ab8e1e181807f13713943e728e20bcb4c34c5cc4827a", + "sha256:ba08ecc10eb23dad6b18dc0da7e60dae508fc43381d4d7a9855345db22e0162b", + "sha256:bbd2fc931fed31e1f4fe45b7acc076983a4ad6b3ee83ae962eecfe553c842791", + "sha256:c55cbe57a35eeef524ad323ee0e04c4a0ed724d1736a4d15adeca00852cd8bf9" + ], + "version": "==2020.2.18" + }, + "six": { + "hashes": [ + "sha256:236bdbdce46e6e6a3d61a337c0f8b763ca1e8717c03b369e87a7ec7ce1319c0a", + "sha256:8f3cd2e254d8f793e7f3d6d9df77b92252b52637291d0f0da013c76ea2724b6c" + ], + "version": "==1.14.0" + }, + "toml": { + "hashes": [ + "sha256:229f81c57791a41d65e399fc06bf0848bab550a9dfd5ed66df18ce5f05e73d5c", + "sha256:235682dd292d5899d361a811df37e04a8828a5b1da3115886b73cf81ebc9100e" + ], + "version": "==0.10.0" + }, + "typed-ast": { + "hashes": [ + "sha256:0666aa36131496aed8f7be0410ff974562ab7eeac11ef351def9ea6fa28f6355", + "sha256:0c2c07682d61a629b68433afb159376e24e5b2fd4641d35424e462169c0a7919", + "sha256:249862707802d40f7f29f6e1aad8d84b5aa9e44552d2cc17384b209f091276aa", + "sha256:24995c843eb0ad11a4527b026b4dde3da70e1f2d8806c99b7b4a7cf491612652", + "sha256:269151951236b0f9a6f04015a9004084a5ab0d5f19b57de779f908621e7d8b75", + "sha256:4083861b0aa07990b619bd7ddc365eb7fa4b817e99cf5f8d9cf21a42780f6e01", + "sha256:498b0f36cc7054c1fead3d7fc59d2150f4d5c6c56ba7fb150c013fbc683a8d2d", + "sha256:4e3e5da80ccbebfff202a67bf900d081906c358ccc3d5e3c8aea42fdfdfd51c1", + "sha256:6daac9731f172c2a22ade6ed0c00197ee7cc1221aa84cfdf9c31defeb059a907", + "sha256:715ff2f2df46121071622063fc7543d9b1fd19ebfc4f5c8895af64a77a8c852c", + "sha256:73d785a950fc82dd2a25897d525d003f6378d1cb23ab305578394694202a58c3", + "sha256:8c8aaad94455178e3187ab22c8b01a3837f8ee50e09cf31f1ba129eb293ec30b", + "sha256:8ce678dbaf790dbdb3eba24056d5364fb45944f33553dd5869b7580cdbb83614", + "sha256:aaee9905aee35ba5905cfb3c62f3e83b3bec7b39413f0a7f19be4e547ea01ebb", + "sha256:bcd3b13b56ea479b3650b82cabd6b5343a625b0ced5429e4ccad28a8973f301b", + "sha256:c9e348e02e4d2b4a8b2eedb48210430658df6951fa484e59de33ff773fbd4b41", + "sha256:d205b1b46085271b4e15f670058ce182bd1199e56b317bf2ec004b6a44f911f6", + "sha256:d43943ef777f9a1c42bf4e552ba23ac77a6351de620aa9acf64ad54933ad4d34", + "sha256:d5d33e9e7af3b34a40dc05f498939f0ebf187f07c385fd58d591c533ad8562fe", + "sha256:fc0fea399acb12edbf8a628ba8d2312f583bdbdb3335635db062fa98cf71fca4", + "sha256:fe460b922ec15dd205595c9b5b99e2f056fd98ae8f9f56b888e7a17dc2b757e7" + ], + "markers": "implementation_name == 'cpython' and python_version < '3.8'", + "version": "==1.4.1" + }, + "wrapt": { + "hashes": [ + "sha256:565a021fd19419476b9362b05eeaa094178de64f8361e44468f9e9d7843901e1" + ], + "version": "==1.11.2" + } + } +} diff --git a/README.md b/README.md new file mode 100644 index 0000000..9210039 --- /dev/null +++ b/README.md @@ -0,0 +1,50 @@ +# USPTO-patent-citation-graph + +Graph that downloads patent citation data from USPTO's [PatentsView](https://www.patentsview.org) API on-demand and stores it locally in an SQL database (and in memory) for fast access later. + +The project is based on the [graphscraper](https://pypi.org/project/graphscraper/) project, please see that project for the details of the graph API. + +## Installation + +Install the latest version of the project from the Python Package Index using `pip install uspto-patent-citation-graph`. + +## Getting started + +Creating a graph instance that will use a default, on-disk SQLite database: + +```Python +from uspto_patent_citation_graph import USPTOPatentCitationGraph + +graph = USPTOPatentCitationGraph(None) +``` + +Loading a node that is not in the local database yet: + +```Python +# `can_validate_and_load=True` tells the graph's node list that it is allowed to +# load data from the PatentsView API. Its default value is `False`, and the +# argument can be omitted if the given patent is already in the local database. +patent_number = "4733665" # Stent patent +stent_patent = g.nodes.get_node_by_name(patent_number, can_validate_and_load=True) +``` + +Accessing a node's neighbors (cited and cited-by patents): + +```Python +print(f"Neighbors of {stent_patent.name}:") +for neighbor in stent_patent.neighbors: + print(f" - {neighbor.name}: {neighbor.external_id}") +``` + +## Community guidelines + +Any form of constructive contribution is welcome: + +- Questions, feedback, bug reports: please open an issue in the issue tracker of the project or contact the repository owner by email, whichever you feel appropriate. +- Contribution to the software: please open an issue in the issue tracker of the project that describes the changes you would like to make to the software and open a pull request with the changes. The description of the pull request must references the corresponding issue. + +The following types of contribution are especially appreciated: + +## License - MIT + +The library is open-sourced under the conditions of the [MIT license](https://choosealicense.com/licenses/mit/). diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..01449c9 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +graphscraper>=0.5 +requests>=2.22 diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..9ca8b25 --- /dev/null +++ b/setup.py @@ -0,0 +1,68 @@ +from codecs import open +from os import path +import re +from setuptools import setup, find_packages + +# Constants + +NAME = "uspto-patent-citation-graph" +ROOT = NAME.replace("-", "_") + +# Get the long description from the README file +with open(path.join(path.abspath(path.dirname(__file__)), "README.md"), encoding="utf-8") as f: + readme = f.read() + +# Get the version from the root __init__.py file. +with open(path.join(path.abspath(path.dirname(__file__)), ROOT, "__init__.py"), encoding="utf-8") as f: + content = f.read() + _author = re.search("__author__ = \"(.*?)\"", content).group(1) + _email = re.search("__email__ = \"(.*?)\"", content).group(1) + _license = re.search("__license__ = \"(.*?)\"", content).group(1) + _url = re.search("__url__ = \"(.*?)\"", content).group(1) + _version = re.search("__version__ = \"(.*?)\"", content).group(1) + +# Get the requirements from requirements.txt. +req_filename = "requirements.txt" +exp = re.compile("(?P\\w+)\\s*(?P[<>=!~]+)\\s*(?P[\\w.]+)") +requirements = [] +with open(path.join(path.dirname(path.abspath(__file__)), req_filename)) as req_file: + for line in req_file: + line = line.split("#", maxsplit=1)[0].strip() + match = exp.match(line) if line else None + if match is not None: + requirements.append(("".join((match["req"], match["op"], match["ver"])))) +requirements.sort(key=lambda s: s.casefold()) + +setup( + name=NAME, + version=_version, + description="graph patent citation USPTO database graphscraper webscraper", + long_description=readme, + long_description_content_type="text/markdown", + url=_url, + author=_author, + author_email=_email, + license=_license, + classifiers=[ + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "Intended Audience :: Education", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: MIT License", + "Natural Language :: English", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Topic :: Database", + "Topic :: Education", + "Topic :: Internet :: WWW/HTTP :: Dynamic Content", + "Topic :: Scientific/Engineering", + "Typing :: Typed" + ], + keywords="bootstrap html markup generator utility", + packages=find_packages(exclude=["test"]), + python_requires=">=3.6", + install_requires=requirements +) diff --git a/uspto_patent_citation_graph/__init__.py b/uspto_patent_citation_graph/__init__.py new file mode 100644 index 0000000..86ff3c1 --- /dev/null +++ b/uspto_patent_citation_graph/__init__.py @@ -0,0 +1,125 @@ +from typing import Optional + +from itertools import chain, repeat +import logging + +from graphscraper.base import Graph, Node, NodeList +from graphscraper.db import GraphDatabaseInterface, create_graph_database_interface +import requests + +__author__ = "Peter Volf" +__copyright__ = "Copyright 2020, Peter Volf" +__email__ = "do.volfp@gmail.com" +__license__ = "MIT" +__url__ = "https://github.com/volfpeter/uspto-patent-citation-graph" +__version__ = "0.2002.0" + + +class USPTOPatentCitationGraph(Graph): + + def __init__(self, database: Optional[GraphDatabaseInterface], *, log_neighbor_loading: bool = False) -> None: + """ + Initialization. + + Arguments: + database: The database interface to use. If `None`, then a default one will be created. + log_neighbor_loading: Whether to log when the graph loads citations from the USPTO API. + """ + if database is None: + database = USPTOPatentCitationGraph.create_default_database() + + super().__init__(database) + + self._logger: Optional[logging.Logger] = None + if log_neighbor_loading: + self._logger = logging.getLogger(self.__class__.__name__) + self._logger.setLevel(logging.DEBUG) + handler: logging.Handler = logging.StreamHandler() + handler.setLevel(logging.DEBUG) + handler.setFormatter(logging.Formatter( + "%(levelname)s | %(asctime)s | %(name)s\n -- %(message)s" + )) + self._logger.addHandler(handler) + + @staticmethod + def create_default_database(reset: bool = False) -> GraphDatabaseInterface: + """ + Creates and returns a default SQLAlchemy database interface to use. + + Arguments: + reset (bool): Whether to reset the database if it happens to exist already. + """ + import sqlalchemy + from sqlalchemy.ext.declarative import declarative_base + from sqlalchemy.orm import sessionmaker + from sqlalchemy.pool import StaticPool + + Base = declarative_base() + engine = sqlalchemy.create_engine("sqlite:///USPTOPatentCitationGraph.db", poolclass=StaticPool) + Session = sessionmaker(bind=engine) + + dbi: GraphDatabaseInterface = create_graph_database_interface( + sqlalchemy, Session(), Base, sqlalchemy.orm.relationship + ) + + if reset: + Base.metadata.drop_all(engine) + Base.metadata.create_all(engine) + + return dbi + + def get_authentic_node_name(self, node_name: str) -> Optional[str]: + return node_name.strip() + + def log_neighbor_loading(self, patent_number: str) -> None: + """ + Logs that the neighbors of the given patent are being loaded from the USPTO API. + + Arguments: + patent_number: The patent number whose neighbors are being loaded. + """ + if self._logger is not None: + self._logger.debug(f"Loading neighbors of {patent_number}") + + def _create_node_list(self) -> NodeList: + return USPTOPatentCitationNodeList(self) + + +class USPTOPatentCitationNode(Node): + def _load_neighbors_from_external_source(self) -> None: + graph = self._graph + graph.log_neighbor_loading(self.name) + + query = {"patent_number": self.name} + fields = ["cited_patent_number", "cited_patent_title", "citedby_patent_number", "citedby_patent_title"] + response = requests.post("https://www.patentsview.org/api/patents/query", json={"q": query, "f": fields}) + if response.status_code != 200: + raise ValueError("Request failed") + + patents = response.json()["patents"][0] + cited_patents = patents.get("cited_patents", []) + citedby_patents = patents.get("citedby_patents", []) + + + nodes = graph.nodes + + for patent, prefix in chain(zip(cited_patents, repeat("cited")), zip(citedby_patents, repeat("citedby"))): + patent_number = patent.get(f"{prefix}_patent_number") + patent_title = patent.get(f"{prefix}_patent_title") + if patent_number is None or patent_title is None: + continue + + neighbor = nodes.get_node_by_name( + patent_number.strip(), + can_validate_and_load=True, + external_id=patent_title.strip() + ) + + if neighbor is not None: + graph.add_edge(self, neighbor) + + +class USPTOPatentCitationNodeList(NodeList): + + def _create_node(self, index: int, name: str, external_id: Optional[str] = None) -> Node: + return USPTOPatentCitationNode(graph=self._graph, index=index, name=name, external_id=external_id)