From b3070c3d429e0b1fd5a2b0ec57eedd5af85ae5ba Mon Sep 17 00:00:00 2001 From: Alex Florea Date: Tue, 19 Jul 2022 12:11:41 +0300 Subject: [PATCH 1/3] Fixed header interpretation related to HTTP version --- src/NlpClient.php | 260 ++++++++++++++++++++++------------------------ 1 file changed, 124 insertions(+), 136 deletions(-) diff --git a/src/NlpClient.php b/src/NlpClient.php index 533a9ea..3eb9775 100644 --- a/src/NlpClient.php +++ b/src/NlpClient.php @@ -6,284 +6,272 @@ * Simple interface to the Web64 NLP-Server (https://github.com/web64/nlpserver) for Natural Language Processing tasks */ -class NlpClient{ - +class NlpClient +{ + public $api_url; - public $api_hosts = []; - public $fail_count = 0; - public $debug = false; - private $max_retry_count = 3; - - function __construct( $hosts, $debug = false ) + public $api_hosts = []; + public $fail_count = 0; + public $debug = false; + private $max_retry_count = 3; + + function __construct($hosts, $debug = false) { $this->debug = (bool)$debug; - if ( is_array($hosts) ) - { - foreach( $hosts as $host ) - $this->addHost( $host ); - } - else - $this->addHost( $hosts ); - + if (is_array($hosts)) { + foreach ($hosts as $host) + $this->addHost($host); + } else + $this->addHost($hosts); + // pick random host as default - $this->api_url = $this->api_hosts[ array_rand( $this->api_hosts ) ]; - } - + $this->api_url = $this->api_hosts[array_rand($this->api_hosts)]; + } + /** * Spacy.io Entity Extraction */ - public function spacy_entities( $text, $lang = 'en' ) + public function spacy_entities($text, $lang = 'en') { - $data = $this->post_call('/spacy/entities', ['text' => $text, 'lang' => $lang ] ); + $data = $this->post_call('/spacy/entities', ['text' => $text, 'lang' => $lang]); - return ( !empty($data['entities']) ) ? $data['entities'] : null; + return (!empty($data['entities'])) ? $data['entities'] : null; } /** * AFINN Sentiment Analysis */ - public function afinn_sentiment( $text, $lang = 'en' ) + public function afinn_sentiment($text, $lang = 'en') { - $data = $this->post_call('/afinn', ['text' => $text, 'lang' => $lang ] ); + $data = $this->post_call('/afinn', ['text' => $text, 'lang' => $lang]); - return ( isset($data['afinn']) ) ? $data['afinn'] : null; + return (isset($data['afinn'])) ? $data['afinn'] : null; } /** * Summarize long text */ - public function summarize( $text, $word_count = null ) + public function summarize($text, $word_count = null) { - $data = $this->post_call('/gensim/summarize', ['text' => $text, 'word_count' => $word_count ] ); - - return ( !empty($data['summarize']) ) ? $data['summarize'] : null; + $data = $this->post_call('/gensim/summarize', ['text' => $text, 'word_count' => $word_count]); + + return (!empty($data['summarize'])) ? $data['summarize'] : null; } /** * Article Extraction from HTML */ - public function newspaper_html( $html ) + public function newspaper_html($html) { - $data = $this->post_call('/newspaper', ['html' => $html ] ); - - return ( !empty($data['newspaper']) ) ? $data['newspaper'] : null; + $data = $this->post_call('/newspaper', ['html' => $html]); + + return (!empty($data['newspaper'])) ? $data['newspaper'] : null; } /** * Article Extraction from URL */ - public function newspaper( $url ) + public function newspaper($url) { - $data = $this->get_call('/newspaper', ['url' => $url ] ); + $data = $this->get_call('/newspaper', ['url' => $url]); - return ( !empty($data['newspaper']) ) ? $data['newspaper'] : null; + return (!empty($data['newspaper'])) ? $data['newspaper'] : null; } - + /** * Readability Article Extraction from URL */ - public function readability( $url ) + public function readability($url) { - $data = $this->get_call('/readability', ['url' => $url ] ); + $data = $this->get_call('/readability', ['url' => $url]); - return ( !empty($data['readability']) ) ? $data['readability'] : null; + return (!empty($data['readability'])) ? $data['readability'] : null; } /** * Readability Article Extraction from HTML */ - public function readability_html( $html ) + public function readability_html($html) { - $data = $this->post_call('/readability', ['html' => $html ] ); + $data = $this->post_call('/readability', ['html' => $html]); - return ( !empty($data['readability']) ) ? $data['readability'] : null; + return (!empty($data['readability'])) ? $data['readability'] : null; } /** * Sentiment Analysis by Polyglot */ - public function sentiment( $text, $language = null ) + public function sentiment($text, $language = null) { - $data = $this->post_call('/polyglot/sentiment', ['text' => $text, 'lang' => $language ] ); + $data = $this->post_call('/polyglot/sentiment', ['text' => $text, 'lang' => $language]); - return ( isset($data['sentiment']) ) ? $data['sentiment'] : null; + return (isset($data['sentiment'])) ? $data['sentiment'] : null; } /** * Get neighbouring words */ - public function neighbours( $word, $lang = 'en') + public function neighbours($word, $lang = 'en') { - $data = $this->get_call('/polyglot/neighbours', ['word' => $word, 'lang' => $lang ] ); + $data = $this->get_call('/polyglot/neighbours', ['word' => $word, 'lang' => $lang]); - return ( !empty($data['neighbours']) ) ? $data['neighbours'] : null; + return (!empty($data['neighbours'])) ? $data['neighbours'] : null; } /** * Get entities and sentiment analysis of text */ - public function polyglot_entities( $text, $language = null ) + public function polyglot_entities($text, $language = null) { - $data = $this->post_call('/polyglot/entities', ['text' => $text, 'lang' => $language] ); - $this->msg( $data ); - return new \Web64\Nlp\Classes\PolyglotResponse( $data['polyglot'] ); + $data = $this->post_call('/polyglot/entities', ['text' => $text, 'lang' => $language]); + $this->msg($data); + return new \Web64\Nlp\Classes\PolyglotResponse($data['polyglot']); } /** * Get language code for text */ - public function language( $text ) + public function language($text) { - $data = $this->post_call('/langid', ['text' => $text] ); + $data = $this->post_call('/langid', ['text' => $text]); - if ( isset($data['langid']) && isset($data['langid']['language'])) - { + if (isset($data['langid']) && isset($data['langid']['language'])) { // return 'no' for Norwegian Bokmaal and Nynorsk - if ( $data['langid']['language'] == 'nn' || $data['langid']['language'] == 'nb' ) + if ($data['langid']['language'] == 'nn' || $data['langid']['language'] == 'nb') return 'no'; return $data['langid']['language']; } - + return null; - } + } - public function post_call($path, $params, $retry = 0 ) - { + public function post_call($path, $params, $retry = 0) + { $url = $this->api_url . $path; - $this->msg( "NLP API $path - $url "); + $this->msg("NLP API $path - $url "); $retry++; - - if ( $retry > $this->max_retry_count ) - { + + if ($retry > $this->max_retry_count) { return null; } - $opts = array('http' => - array( - 'method' => 'POST', - 'header' => 'Content-type: application/x-www-form-urlencoded', - 'content' => http_build_query( $params ), - ) + $opts = array( + 'http' => + array( + 'method' => 'POST', + 'header' => 'Content-type: application/x-www-form-urlencoded', + 'content' => http_build_query($params), + ) ); - + $context = stream_context_create($opts); $result = @file_get_contents($url, false, $context); - if ( empty($result) || ( isset($http_response_header) && $http_response_header[0] != 'HTTP/1.0 200 OK' ) ) // empty if server is down + if (empty($result) || (isset($http_response_header) && !str_contains($http_response_header[0], '200 OK'))) // empty if server is down { - $this->msg( "Host Failed: {$url}" ); + $this->msg("Host Failed: {$url}"); - if ( $retry >= $this->max_retry_count ) + if ($retry >= $this->max_retry_count) return null; $this->chooseHost(); - return $this->post_call($path, $params, $retry ); + return $this->post_call($path, $params, $retry); } - if ( empty($result) ) return null; + if (empty($result)) return null; return json_decode($result, 1); } - + public function get_call($path, $params, $retry = 0) { $url = $this->api_url . $path; - + $retry++; - - if ( !empty($params) ) - $url .= '?' . http_build_query( $params ); - $this->msg( "NLP API [GET] $path - $url "); - $result = @file_get_contents( $url, false ); + if (!empty($params)) + $url .= '?' . http_build_query($params); - if ( empty($http_response_header) || $http_response_header[0] == 'HTTP/1.0 404 NOT FOUND' ) + $this->msg("NLP API [GET] $path - $url "); + $result = @file_get_contents($url, false); + + if (empty($http_response_header) || str_contains($http_response_header[0], '404 NOT FOUND')) return null; - if ( empty($result) || ( isset($http_response_header) && $http_response_header[0] != 'HTTP/1.0 200 OK' ) ) // empty if server is down + if (empty($result) || (isset($http_response_header) && !str_contains($http_response_header[0], '200 OK'))) // empty if server is down { - $this->msg( "Host Failed: {$url}" ); + $this->msg("Host Failed: {$url}"); - if ( $retry >= $this->max_retry_count ) + if ($retry >= $this->max_retry_count) return null; $this->chooseHost(); - return $this->get_call($path, $params, $retry ); + return $this->get_call($path, $params, $retry); } - if ( empty($result) ) return null; + if (empty($result)) return null; return json_decode($result, 1); - } /** * Internals */ - public function addHost( $host ) - { - $host = rtrim( $host , '/'); - - if ( array_search($host, $this->api_hosts) === false) - $this->api_hosts[] = $host; - } - - // debug message - private function msg( $value ) - { - if ( $this->debug ) - { - if ( is_array($value) ) - { - if(!defined('STDOUT')) - { - print_r( $value ); - }else{ - fwrite(STDOUT, print_r( $value, true ) . PHP_EOL ); + public function addHost($host) + { + $host = rtrim($host, '/'); + + if (array_search($host, $this->api_hosts) === false) + $this->api_hosts[] = $host; + } + + // debug message + private function msg($value) + { + if ($this->debug) { + if (is_array($value)) { + if (!defined('STDOUT')) { + print_r($value); + } else { + fwrite(STDOUT, print_r($value, true) . PHP_EOL); } - } - else - { - if(!defined('STDOUT')){ + } else { + if (!defined('STDOUT')) { echo $value . PHP_EOL; - }else{ - fwrite(STDOUT, $value . PHP_EOL ); + } else { + fwrite(STDOUT, $value . PHP_EOL); } } - } - } + } + } // find working host private function chooseHost() { $random_a = $this->api_hosts; shuffle($random_a); // pick random host - - foreach( $random_a as $api_url ) - { - $this->msg( "chooseHost() - Testing: $api_url "); - - $content = @file_get_contents( $api_url ); - if ( empty( $content ) ) - { - $this->msg( $content ); + foreach ($random_a as $api_url) { + $this->msg("chooseHost() - Testing: $api_url "); + + $content = @file_get_contents($api_url); + if (empty($content)) { + + $this->msg($content); // Failed - $this->msg( "- Ignoring failed API URL: $api_url " ); + $this->msg("- Ignoring failed API URL: $api_url "); //print_r( $http_response_header ); - }else{ + } else { $this->api_url = $api_url; - $this->msg( "- Working API URL: $api_url" ); + $this->msg("- Working API URL: $api_url"); return true; - } - $this->msg( $content ); + $this->msg($content); } - + return false; } - } From 7e7b32d9d3fbf4378ce2108d5edc4bb69325f594 Mon Sep 17 00:00:00 2001 From: aflorea4 Date: Tue, 19 Jul 2022 12:21:35 +0300 Subject: [PATCH 2/3] Update composer.json --- composer.json | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/composer.json b/composer.json index 7b9b84f..9df4aa6 100644 --- a/composer.json +++ b/composer.json @@ -1,20 +1,16 @@ { - "name": "web64/php-nlp-client", - "description": "Library for accessing NLP apis", + "name": "aflorea4/php-nlp-client", + "description": "Library for accessing NLP apis (Fixed HTTP header interpretation)", "keywords": ["nlp", "natural language", "entity extraction", "article extraction", "language detection"], "license": "MIT", "authors": [ - { - "name": "Olav Hjertaker", - "email": "olav.hjertaker@gmail.com" - } ], "require": { "php": ">=5.6.0" }, "autoload": { "psr-4": { - "Web64\\Nlp\\": "src/" + "Aflorea4\\Nlp\\": "src/" } }, "autoload-dev": { From 921a0a14be81232d2163be28f47c5c84f61acd69 Mon Sep 17 00:00:00 2001 From: Alex Florea Date: Tue, 19 Jul 2022 13:10:59 +0300 Subject: [PATCH 3/3] fixed autoloader --- composer.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/composer.json b/composer.json index 9df4aa6..b16bcac 100644 --- a/composer.json +++ b/composer.json @@ -10,7 +10,7 @@ }, "autoload": { "psr-4": { - "Aflorea4\\Nlp\\": "src/" + "Web64\\Nlp\\": "src/" } }, "autoload-dev": {