diff --git a/test/wink-nlp-specs.js b/test/wink-nlp-specs.js index e1af8c8..2bf5de0 100644 --- a/test/wink-nlp-specs.js +++ b/test/wink-nlp-specs.js @@ -134,7 +134,7 @@ describe( 'wink-nlp test-coverage and basic behavior', function () { expect( doc.out() ).to.equal( sentence ); } ); - it( 'should tokenize/detokenize the following sentence with non-breaking spaces', function () { + it( 'should tokenize/detokenize the text with non-breaking spaces', function () { // Reconstruction. expect( nlp.readDoc( nbspTokensArray.join('\u00a0') ).out() ).to.equal( nbspTokensArray.join('\u00a0') ); expect( nlp.readDoc( nbspTokensArray.join(' \u00a0') ).out() ).to.equal( nbspTokensArray.join(' \u00a0') ); @@ -144,6 +144,27 @@ describe( 'wink-nlp test-coverage and basic behavior', function () { expect( nlp.readDoc( nbspTokensArray.join(' \u00a0\u00a0') ).out() ).to.equal( nbspTokensArray.join(' \u00a0\u00a0') ); } ); + it( 'should tokenize/detokenize a sentence with non-breaking spaces', function () { + var textWith2S = 'I met Mr.\u00a0Gandhi. Mr.\u00a0Gandhi is a nice person.'; + var sentences = nlp.readDoc( textWith2S ).sentences(); + var sentencesText = [ 'I met Mr.\u00a0Gandhi.', 'Mr.\u00a0Gandhi is a nice person.' ]; + // Reconstruction. + sentences.each( ( s, k ) => { + expect( s.out() ).to.equal( sentencesText[ k ] ); + } ); + } ); + + it( 'should tokenize/detokenize the entities\' value as text with non-breaking spaces', function () { + var textWith2S = 'I purchased 10 mangoes on March\u00a010th for US$\u00a099.00.'; + var entities = nlp.readDoc( textWith2S ).entities(); + var entitiesText = [ '10', 'March\u00a010th', 'US$\u00a099.00' ]; + // Reconstruction. + entities.each( ( e, k ) => { + expect( e.out( ) ).to.equal( entitiesText[ k ] ); + expect( e.out( its.value, as.text ) ).to.equal( entitiesText[ k ] ); + } ); + } ); + it( 'should not contain empty tokens', function () { var doc = nlp.readDoc( sentence ); expect( findEmptyTokens( doc ) ).deep.equal( [] );