From 6b29fe50d4a468fc6a491b0f33ffbb9a9d5aecca Mon Sep 17 00:00:00 2001 From: volfpeter Date: Thu, 20 Feb 2020 17:58:29 +0100 Subject: [PATCH] initial commit --- .gitignore | 105 +++++++++++ LICENSE | 21 +++ Pipfile | 18 ++ Pipfile.lock | 236 ++++++++++++++++++++++++ README.md | 50 +++++ requirements.txt | 2 + setup.py | 68 +++++++ uspto_patent_citation_graph/__init__.py | 125 +++++++++++++ 8 files changed, 625 insertions(+) create mode 100755 .gitignore create mode 100644 LICENSE create mode 100644 Pipfile create mode 100644 Pipfile.lock create mode 100644 README.md create mode 100644 requirements.txt create mode 100644 setup.py create mode 100644 uspto_patent_citation_graph/__init__.py diff --git a/.gitignore b/.gitignore new file mode 100755 index 0000000..e4370f7 --- /dev/null +++ b/.gitignore @@ -0,0 +1,105 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.vscode/ +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..ab6885f --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2019 Peter Volf + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Pipfile b/Pipfile new file mode 100644 index 0000000..7958dcb --- /dev/null +++ b/Pipfile @@ -0,0 +1,18 @@ +[[source]] +name = "pypi" +url = "https://pypi.org/simple" +verify_ssl = true + +[dev-packages] +black = "*" +pylint = "*" + +[packages] +graphscraper = ">=0.5" +requests = ">=2.22" + +[requires] +python_version = "3.7" + +[pipenv] +allow_prereleases = true diff --git a/Pipfile.lock b/Pipfile.lock new file mode 100644 index 0000000..0c69a21 --- /dev/null +++ b/Pipfile.lock @@ -0,0 +1,236 @@ +{ + "_meta": { + "hash": { + "sha256": "e7cbd91a4042ac2310eac2af694ec13617d54f69c26994489b65790b217bff7a" + }, + "pipfile-spec": 6, + "requires": { + "python_version": "3.7" + }, + "sources": [ + { + "name": "pypi", + "url": "https://pypi.org/simple", + "verify_ssl": true + } + ] + }, + "default": { + "certifi": { + "hashes": [ + "sha256:017c25db2a153ce562900032d5bc68e9f191e44e9a0f762f373977de9df1fbb3", + "sha256:25b64c7da4cd7479594d035c08c2d809eb4aab3a26e5a990ea98cc450c320f1f" + ], + "version": "==2019.11.28" + }, + "chardet": { + "hashes": [ + "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae", + "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691" + ], + "version": "==3.0.4" + }, + "graphscraper": { + "hashes": [ + "sha256:1fcc9581753ba0eab79a560703c601489b987d57732f5ae5b704ad73b95b5e8a", + "sha256:a12d2683c9a6129889f8936f7681d523dcf8968ccd0239f9a0a97c2933cc8b20" + ], + "index": "pypi", + "version": "==0.5.0" + }, + "idna": { + "hashes": [ + "sha256:7588d1c14ae4c77d74036e8c22ff447b26d0fde8f007354fd48a7814db15b7cb", + "sha256:a068a21ceac8a4d63dbfd964670474107f541babbd2250d61922f029858365fa" + ], + "version": "==2.9" + }, + "requests": { + "hashes": [ + "sha256:43999036bfa82904b6af1d99e4882b560e5e2c68e5c4b0aa03b655f3d7d73fee", + "sha256:b3f43d496c6daba4493e7c431722aeb7dbc6288f52a6e04e7b6023b0247817e6" + ], + "index": "pypi", + "version": "==2.23.0" + }, + "sqlalchemy": { + "hashes": [ + "sha256:64a7b71846db6423807e96820993fa12a03b89127d278290ca25c0b11ed7b4fb" + ], + "version": "==1.3.13" + }, + "urllib3": { + "hashes": [ + "sha256:2f3db8b19923a873b3e5256dc9c2dedfa883e33d87c690d9c7913e1f40673cdc", + "sha256:87716c2d2a7121198ebcb7ce7cccf6ce5e9ba539041cfbaeecfb641dc0bf6acc" + ], + "version": "==1.25.8" + } + }, + "develop": { + "appdirs": { + "hashes": [ + "sha256:9e5896d1372858f8dd3344faf4e5014d21849c756c8d5701f78f8a103b372d92", + "sha256:d8b24664561d0d34ddfaec54636d502d7cea6e29c3eaf68f3df6180863e2166e" + ], + "version": "==1.4.3" + }, + "astroid": { + "hashes": [ + "sha256:71ea07f44df9568a75d0f354c49143a4575d90645e9fead6dfb52c26a85ed13a", + "sha256:840947ebfa8b58f318d42301cf8c0a20fd794a33b61cc4638e28e9e61ba32f42" + ], + "version": "==2.3.3" + }, + "attrs": { + "hashes": [ + "sha256:08a96c641c3a74e44eb59afb61a24f2cb9f4d7188748e76ba4bb5edfa3cb7d1c", + "sha256:f7b7ce16570fe9965acd6d30101a28f62fb4a7f9e926b3bbc9b61f8b04247e72" + ], + "version": "==19.3.0" + }, + "black": { + "hashes": [ + "sha256:1b30e59be925fafc1ee4565e5e08abef6b03fe455102883820fe5ee2e4734e0b", + "sha256:c2edb73a08e9e0e6f65a0e6af18b059b8b1cdd5bef997d7a0b181df93dc81539" + ], + "index": "pypi", + "version": "==19.10b0" + }, + "click": { + "hashes": [ + "sha256:2335065e6395b9e67ca716de5f7526736bfa6ceead690adf616d925bdc622b13", + "sha256:5b94b49521f6456670fdb30cd82a4eca9412788a93fa6dd6df72c94d5a8ff2d7" + ], + "version": "==7.0" + }, + "isort": { + "hashes": [ + "sha256:54da7e92468955c4fceacd0c86bd0ec997b0e1ee80d97f67c35a78b719dccab1", + "sha256:6e811fcb295968434526407adb8796944f1988c5b65e8139058f2014cbe100fd" + ], + "version": "==4.3.21" + }, + "lazy-object-proxy": { + "hashes": [ + "sha256:0c4b206227a8097f05c4dbdd323c50edf81f15db3b8dc064d08c62d37e1a504d", + "sha256:194d092e6f246b906e8f70884e620e459fc54db3259e60cf69a4d66c3fda3449", + "sha256:1be7e4c9f96948003609aa6c974ae59830a6baecc5376c25c92d7d697e684c08", + "sha256:4677f594e474c91da97f489fea5b7daa17b5517190899cf213697e48d3902f5a", + "sha256:48dab84ebd4831077b150572aec802f303117c8cc5c871e182447281ebf3ac50", + "sha256:5541cada25cd173702dbd99f8e22434105456314462326f06dba3e180f203dfd", + "sha256:59f79fef100b09564bc2df42ea2d8d21a64fdcda64979c0fa3db7bdaabaf6239", + "sha256:8d859b89baf8ef7f8bc6b00aa20316483d67f0b1cbf422f5b4dc56701c8f2ffb", + "sha256:9254f4358b9b541e3441b007a0ea0764b9d056afdeafc1a5569eee1cc6c1b9ea", + "sha256:9651375199045a358eb6741df3e02a651e0330be090b3bc79f6d0de31a80ec3e", + "sha256:97bb5884f6f1cdce0099f86b907aa41c970c3c672ac8b9c8352789e103cf3156", + "sha256:9b15f3f4c0f35727d3a0fba4b770b3c4ebbb1fa907dbcc046a1d2799f3edd142", + "sha256:a2238e9d1bb71a56cd710611a1614d1194dc10a175c1e08d75e1a7bcc250d442", + "sha256:a6ae12d08c0bf9909ce12385803a543bfe99b95fe01e752536a60af2b7797c62", + "sha256:ca0a928a3ddbc5725be2dd1cf895ec0a254798915fb3a36af0964a0a4149e3db", + "sha256:cb2c7c57005a6804ab66f106ceb8482da55f5314b7fcb06551db1edae4ad1531", + "sha256:d74bb8693bf9cf75ac3b47a54d716bbb1a92648d5f781fc799347cfc95952383", + "sha256:d945239a5639b3ff35b70a88c5f2f491913eb94871780ebfabb2568bd58afc5a", + "sha256:eba7011090323c1dadf18b3b689845fd96a61ba0a1dfbd7f24b921398affc357", + "sha256:efa1909120ce98bbb3777e8b6f92237f5d5c8ea6758efea36a473e1d38f7d3e4", + "sha256:f3900e8a5de27447acbf900b4750b0ddfd7ec1ea7fbaf11dfa911141bc522af0" + ], + "version": "==1.4.3" + }, + "mccabe": { + "hashes": [ + "sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42", + "sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f" + ], + "version": "==0.6.1" + }, + "pathspec": { + "hashes": [ + "sha256:163b0632d4e31cef212976cf57b43d9fd6b0bac6e67c26015d611a647d5e7424", + "sha256:562aa70af2e0d434367d9790ad37aed893de47f1693e4201fd1d3dca15d19b96" + ], + "version": "==0.7.0" + }, + "pylint": { + "hashes": [ + "sha256:3db5468ad013380e987410a8d6956226963aed94ecb5f9d3a28acca6d9ac36cd", + "sha256:886e6afc935ea2590b462664b161ca9a5e40168ea99e5300935f6591ad467df4" + ], + "index": "pypi", + "version": "==2.4.4" + }, + "regex": { + "hashes": [ + "sha256:061f5b049a4a75ab662d843c343b58d17dbbbf943890b36a74c796c0145256b0", + "sha256:12a18821e38669cfd54d01e2351bcbe55632009c9b5736a159a4711d39abf266", + "sha256:1551d6bf97e48d8eb06ab513868041e58a0473296cc636180df105dacc7b546e", + "sha256:4216e5f9b659014d1a9f36d920fd1207f1ed1364231e4192295ff85ad469c971", + "sha256:4bfc09ed38ca2c6da17bd82febc2c260d142776e56ab7092036ee86b66ed3be0", + "sha256:4d6f0646c8c8ed566391e7cb49230f4e953c39121d38eaae2c573666ba0235be", + "sha256:54df3a00c5f8ece5ff969e0ee23fb01b927e9c265ea43b73da501677170b4746", + "sha256:55f344f930bbcaae3146bc2cb8a761ea993c81d9777ec9fa530330cd62762653", + "sha256:5e6826ad52f3f6f7000163bcaa1e19bd21c22478d00490875df5fa0ac5e95637", + "sha256:5efd84785b764114a86c308e43103163e52e6189d24a5ecbbdd23b79dd715b89", + "sha256:685450ce1e63e7375f867093a7e0f15817e778ffa7b4bbdfae59cd73dedf7095", + "sha256:6b9a165a96cad84a6403c8375eb09c03a91b3ce13749fcff5619a7de9323f712", + "sha256:6cbb96b49932a47bbe6a7c16b60e92ad5571cafbcda34fa178eecf6df1e90884", + "sha256:776908974bf26133abdb4a7b83943537aa84a207e0d36b6be9b0680e0d370163", + "sha256:7af2199c44511d6b962817817aa14eb673b132c940c2b00809c5fb7906381015", + "sha256:7b2bb82b815015826d3ffbfa6dc8919375cf8e0653db023fe7d34d799727d2ef", + "sha256:a022be296f9ff54423a31bf9f7761c979e8654a81fffa83509585d674f600faa", + "sha256:a9fa68b54e88ac027ae6ab8e1e181807f13713943e728e20bcb4c34c5cc4827a", + "sha256:ba08ecc10eb23dad6b18dc0da7e60dae508fc43381d4d7a9855345db22e0162b", + "sha256:bbd2fc931fed31e1f4fe45b7acc076983a4ad6b3ee83ae962eecfe553c842791", + "sha256:c55cbe57a35eeef524ad323ee0e04c4a0ed724d1736a4d15adeca00852cd8bf9" + ], + "version": "==2020.2.18" + }, + "six": { + "hashes": [ + "sha256:236bdbdce46e6e6a3d61a337c0f8b763ca1e8717c03b369e87a7ec7ce1319c0a", + "sha256:8f3cd2e254d8f793e7f3d6d9df77b92252b52637291d0f0da013c76ea2724b6c" + ], + "version": "==1.14.0" + }, + "toml": { + "hashes": [ + "sha256:229f81c57791a41d65e399fc06bf0848bab550a9dfd5ed66df18ce5f05e73d5c", + "sha256:235682dd292d5899d361a811df37e04a8828a5b1da3115886b73cf81ebc9100e" + ], + "version": "==0.10.0" + }, + "typed-ast": { + "hashes": [ + "sha256:0666aa36131496aed8f7be0410ff974562ab7eeac11ef351def9ea6fa28f6355", + "sha256:0c2c07682d61a629b68433afb159376e24e5b2fd4641d35424e462169c0a7919", + "sha256:249862707802d40f7f29f6e1aad8d84b5aa9e44552d2cc17384b209f091276aa", + "sha256:24995c843eb0ad11a4527b026b4dde3da70e1f2d8806c99b7b4a7cf491612652", + "sha256:269151951236b0f9a6f04015a9004084a5ab0d5f19b57de779f908621e7d8b75", + "sha256:4083861b0aa07990b619bd7ddc365eb7fa4b817e99cf5f8d9cf21a42780f6e01", + "sha256:498b0f36cc7054c1fead3d7fc59d2150f4d5c6c56ba7fb150c013fbc683a8d2d", + "sha256:4e3e5da80ccbebfff202a67bf900d081906c358ccc3d5e3c8aea42fdfdfd51c1", + "sha256:6daac9731f172c2a22ade6ed0c00197ee7cc1221aa84cfdf9c31defeb059a907", + "sha256:715ff2f2df46121071622063fc7543d9b1fd19ebfc4f5c8895af64a77a8c852c", + "sha256:73d785a950fc82dd2a25897d525d003f6378d1cb23ab305578394694202a58c3", + "sha256:8c8aaad94455178e3187ab22c8b01a3837f8ee50e09cf31f1ba129eb293ec30b", + "sha256:8ce678dbaf790dbdb3eba24056d5364fb45944f33553dd5869b7580cdbb83614", + "sha256:aaee9905aee35ba5905cfb3c62f3e83b3bec7b39413f0a7f19be4e547ea01ebb", + "sha256:bcd3b13b56ea479b3650b82cabd6b5343a625b0ced5429e4ccad28a8973f301b", + "sha256:c9e348e02e4d2b4a8b2eedb48210430658df6951fa484e59de33ff773fbd4b41", + "sha256:d205b1b46085271b4e15f670058ce182bd1199e56b317bf2ec004b6a44f911f6", + "sha256:d43943ef777f9a1c42bf4e552ba23ac77a6351de620aa9acf64ad54933ad4d34", + "sha256:d5d33e9e7af3b34a40dc05f498939f0ebf187f07c385fd58d591c533ad8562fe", + "sha256:fc0fea399acb12edbf8a628ba8d2312f583bdbdb3335635db062fa98cf71fca4", + "sha256:fe460b922ec15dd205595c9b5b99e2f056fd98ae8f9f56b888e7a17dc2b757e7" + ], + "markers": "implementation_name == 'cpython' and python_version < '3.8'", + "version": "==1.4.1" + }, + "wrapt": { + "hashes": [ + "sha256:565a021fd19419476b9362b05eeaa094178de64f8361e44468f9e9d7843901e1" + ], + "version": "==1.11.2" + } + } +} diff --git a/README.md b/README.md new file mode 100644 index 0000000..9210039 --- /dev/null +++ b/README.md @@ -0,0 +1,50 @@ +# USPTO-patent-citation-graph + +Graph that downloads patent citation data from USPTO's [PatentsView](https://www.patentsview.org) API on-demand and stores it locally in an SQL database (and in memory) for fast access later. + +The project is based on the [graphscraper](https://pypi.org/project/graphscraper/) project, please see that project for the details of the graph API. + +## Installation + +Install the latest version of the project from the Python Package Index using `pip install uspto-patent-citation-graph`. + +## Getting started + +Creating a graph instance that will use a default, on-disk SQLite database: + +```Python +from uspto_patent_citation_graph import USPTOPatentCitationGraph + +graph = USPTOPatentCitationGraph(None) +``` + +Loading a node that is not in the local database yet: + +```Python +# `can_validate_and_load=True` tells the graph's node list that it is allowed to +# load data from the PatentsView API. Its default value is `False`, and the +# argument can be omitted if the given patent is already in the local database. +patent_number = "4733665" # Stent patent +stent_patent = g.nodes.get_node_by_name(patent_number, can_validate_and_load=True) +``` + +Accessing a node's neighbors (cited and cited-by patents): + +```Python +print(f"Neighbors of {stent_patent.name}:") +for neighbor in stent_patent.neighbors: + print(f" - {neighbor.name}: {neighbor.external_id}") +``` + +## Community guidelines + +Any form of constructive contribution is welcome: + +- Questions, feedback, bug reports: please open an issue in the issue tracker of the project or contact the repository owner by email, whichever you feel appropriate. +- Contribution to the software: please open an issue in the issue tracker of the project that describes the changes you would like to make to the software and open a pull request with the changes. The description of the pull request must references the corresponding issue. + +The following types of contribution are especially appreciated: + +## License - MIT + +The library is open-sourced under the conditions of the [MIT license](https://choosealicense.com/licenses/mit/). diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..01449c9 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +graphscraper>=0.5 +requests>=2.22 diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..9ca8b25 --- /dev/null +++ b/setup.py @@ -0,0 +1,68 @@ +from codecs import open +from os import path +import re +from setuptools import setup, find_packages + +# Constants + +NAME = "uspto-patent-citation-graph" +ROOT = NAME.replace("-", "_") + +# Get the long description from the README file +with open(path.join(path.abspath(path.dirname(__file__)), "README.md"), encoding="utf-8") as f: + readme = f.read() + +# Get the version from the root __init__.py file. +with open(path.join(path.abspath(path.dirname(__file__)), ROOT, "__init__.py"), encoding="utf-8") as f: + content = f.read() + _author = re.search("__author__ = \"(.*?)\"", content).group(1) + _email = re.search("__email__ = \"(.*?)\"", content).group(1) + _license = re.search("__license__ = \"(.*?)\"", content).group(1) + _url = re.search("__url__ = \"(.*?)\"", content).group(1) + _version = re.search("__version__ = \"(.*?)\"", content).group(1) + +# Get the requirements from requirements.txt. +req_filename = "requirements.txt" +exp = re.compile("(?P\\w+)\\s*(?P[<>=!~]+)\\s*(?P[\\w.]+)") +requirements = [] +with open(path.join(path.dirname(path.abspath(__file__)), req_filename)) as req_file: + for line in req_file: + line = line.split("#", maxsplit=1)[0].strip() + match = exp.match(line) if line else None + if match is not None: + requirements.append(("".join((match["req"], match["op"], match["ver"])))) +requirements.sort(key=lambda s: s.casefold()) + +setup( + name=NAME, + version=_version, + description="graph patent citation USPTO database graphscraper webscraper", + long_description=readme, + long_description_content_type="text/markdown", + url=_url, + author=_author, + author_email=_email, + license=_license, + classifiers=[ + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "Intended Audience :: Education", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: MIT License", + "Natural Language :: English", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Topic :: Database", + "Topic :: Education", + "Topic :: Internet :: WWW/HTTP :: Dynamic Content", + "Topic :: Scientific/Engineering", + "Typing :: Typed" + ], + keywords="bootstrap html markup generator utility", + packages=find_packages(exclude=["test"]), + python_requires=">=3.6", + install_requires=requirements +) diff --git a/uspto_patent_citation_graph/__init__.py b/uspto_patent_citation_graph/__init__.py new file mode 100644 index 0000000..86ff3c1 --- /dev/null +++ b/uspto_patent_citation_graph/__init__.py @@ -0,0 +1,125 @@ +from typing import Optional + +from itertools import chain, repeat +import logging + +from graphscraper.base import Graph, Node, NodeList +from graphscraper.db import GraphDatabaseInterface, create_graph_database_interface +import requests + +__author__ = "Peter Volf" +__copyright__ = "Copyright 2020, Peter Volf" +__email__ = "do.volfp@gmail.com" +__license__ = "MIT" +__url__ = "https://github.com/volfpeter/uspto-patent-citation-graph" +__version__ = "0.2002.0" + + +class USPTOPatentCitationGraph(Graph): + + def __init__(self, database: Optional[GraphDatabaseInterface], *, log_neighbor_loading: bool = False) -> None: + """ + Initialization. + + Arguments: + database: The database interface to use. If `None`, then a default one will be created. + log_neighbor_loading: Whether to log when the graph loads citations from the USPTO API. + """ + if database is None: + database = USPTOPatentCitationGraph.create_default_database() + + super().__init__(database) + + self._logger: Optional[logging.Logger] = None + if log_neighbor_loading: + self._logger = logging.getLogger(self.__class__.__name__) + self._logger.setLevel(logging.DEBUG) + handler: logging.Handler = logging.StreamHandler() + handler.setLevel(logging.DEBUG) + handler.setFormatter(logging.Formatter( + "%(levelname)s | %(asctime)s | %(name)s\n -- %(message)s" + )) + self._logger.addHandler(handler) + + @staticmethod + def create_default_database(reset: bool = False) -> GraphDatabaseInterface: + """ + Creates and returns a default SQLAlchemy database interface to use. + + Arguments: + reset (bool): Whether to reset the database if it happens to exist already. + """ + import sqlalchemy + from sqlalchemy.ext.declarative import declarative_base + from sqlalchemy.orm import sessionmaker + from sqlalchemy.pool import StaticPool + + Base = declarative_base() + engine = sqlalchemy.create_engine("sqlite:///USPTOPatentCitationGraph.db", poolclass=StaticPool) + Session = sessionmaker(bind=engine) + + dbi: GraphDatabaseInterface = create_graph_database_interface( + sqlalchemy, Session(), Base, sqlalchemy.orm.relationship + ) + + if reset: + Base.metadata.drop_all(engine) + Base.metadata.create_all(engine) + + return dbi + + def get_authentic_node_name(self, node_name: str) -> Optional[str]: + return node_name.strip() + + def log_neighbor_loading(self, patent_number: str) -> None: + """ + Logs that the neighbors of the given patent are being loaded from the USPTO API. + + Arguments: + patent_number: The patent number whose neighbors are being loaded. + """ + if self._logger is not None: + self._logger.debug(f"Loading neighbors of {patent_number}") + + def _create_node_list(self) -> NodeList: + return USPTOPatentCitationNodeList(self) + + +class USPTOPatentCitationNode(Node): + def _load_neighbors_from_external_source(self) -> None: + graph = self._graph + graph.log_neighbor_loading(self.name) + + query = {"patent_number": self.name} + fields = ["cited_patent_number", "cited_patent_title", "citedby_patent_number", "citedby_patent_title"] + response = requests.post("https://www.patentsview.org/api/patents/query", json={"q": query, "f": fields}) + if response.status_code != 200: + raise ValueError("Request failed") + + patents = response.json()["patents"][0] + cited_patents = patents.get("cited_patents", []) + citedby_patents = patents.get("citedby_patents", []) + + + nodes = graph.nodes + + for patent, prefix in chain(zip(cited_patents, repeat("cited")), zip(citedby_patents, repeat("citedby"))): + patent_number = patent.get(f"{prefix}_patent_number") + patent_title = patent.get(f"{prefix}_patent_title") + if patent_number is None or patent_title is None: + continue + + neighbor = nodes.get_node_by_name( + patent_number.strip(), + can_validate_and_load=True, + external_id=patent_title.strip() + ) + + if neighbor is not None: + graph.add_edge(self, neighbor) + + +class USPTOPatentCitationNodeList(NodeList): + + def _create_node(self, index: int, name: str, external_id: Optional[str] = None) -> Node: + return USPTOPatentCitationNode(graph=self._graph, index=index, name=name, external_id=external_id)