Skip to content

Commit

Permalink
fix(*): add missing typescript declarations for word embeddings
Browse files Browse the repository at this point in the history
references #136
references winkjs/wink-embeddings-sg-100d#1

Co-authored-by: Rachna <rachna@graype.in>
  • Loading branch information
sanjayaksaxena and rachnachakraborty committed May 6, 2024
1 parent aff4bb1 commit 2911864
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 56 deletions.
4 changes: 3 additions & 1 deletion src/api/col-get-item.js
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,9 @@
var colGetItemAt = function ( k, start, end, itemFn ) {
// To handle relative indexing, compute actual `k` by adding `start`.
var ak = k + start;
return ( ( ak < start || ak > end ) ? undefined : itemFn( ak ) );
if ( ak < start || ak > end ) {
throw Error( `wink-nlp: ${k} is an invalid or out of bounds index.`);
} else return itemFn( ak );
}; // colGetItemAt()

module.exports = colGetItemAt;
4 changes: 3 additions & 1 deletion src/api/sel-get-item.js
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,9 @@
* @private
*/
var selGetItemAt = function ( k, selection, itemFn ) {
return ( ( k < 0 || k >= selection.length ) ? undefined : itemFn( selection[ k ] ) );
if ( k < 0 || k >= selection.length ) {
throw Error( `wink-nlp: wink-nlp: ${k} is an invalid or out of bounds index.`);
} else return itemFn( selection[ k ] );
}; // selGetItemAt()

module.exports = selGetItemAt;
24 changes: 12 additions & 12 deletions test/apiA-specs.js
Original file line number Diff line number Diff line change
Expand Up @@ -124,20 +124,20 @@ describe( 'APIs — A', function () {
// ItemAt boundary tests.
// Will need a revamp once SBD is in place (TODO):
describe( 'doc API out of range access test', function () {
it( '.sentences() should return undefined for out of range index', function () {
expect( doc1.sentences().itemAt( -1 ) ).to.equal( undefined );
expect( doc1.sentences().itemAt( doc1.sentences().length() ) ).to.equal( undefined );
it( '.sentences() should throw out of range index error', function () {
expect( () => doc1.sentences().itemAt(-1) ).to.throw( /^wink-nlp: -1 is an invalid or out of bounds index./ );
expect( () => doc1.sentences().itemAt( doc1.sentences().length() ) ).to.throw( /^wink-nlp: 3 is an invalid or out of bounds index./ );
} );

it( '.tokens().itemAt() should return undefined for out of range index', function () {
expect( doc1.tokens().itemAt( -1 ) ).to.equal( undefined );
expect( doc1.tokens().itemAt( doc1.tokens().length() ) ).to.equal( undefined );
it( '.tokens().itemAt() should throw out of range index error', function () {
expect( () => doc1.tokens().itemAt( -1 ) ).to.throw( /^wink-nlp: -1 is an invalid or out of bounds index./ );
expect( () => doc1.tokens().itemAt( doc1.tokens().length() ) ).to.throw( /^wink-nlp: 37 is an invalid or out of bounds index./ );
} );

it( '.tokens().filter().itemAt() should return undefined for out of range index', function () {
it( '.tokens().filter().itemAt() should throw out of range index error', function () {
const ftk1 = doc1.tokens().filter( ( t ) => ( t.out( its.type ) === 'word' ) );
expect( ftk1.itemAt( -1 ) ).to.equal( undefined );
expect( ftk1.itemAt( ftk1.length() ) ).to.equal( undefined );
expect( () => ftk1.itemAt( -1 ) ).to.throw( /^wink-nlp: wink-nlp: -1 is an invalid or out of bounds index./ );
expect( () => ftk1.itemAt( ftk1.length() ) ).to.throw( /^wink-nlp: wink-nlp: 14 is an invalid or out of bounds index./ );
} );
} );

Expand Down Expand Up @@ -232,7 +232,7 @@ describe( 'APIs — A', function () {
const i22 = doc2.entities().itemAt( 5 );
expect( i22.out( its.detail ) ).to.deep.equal( ae2[ 5 ] );

expect( doc2.entities().itemAt( 12 ) ).to.deep.equal( undefined );
expect( () => doc2.entities().itemAt( 12 ) ).to.throw( /^wink-nlp: 12 is an invalid or out of bounds index./ );
} );

it( '.filter() should return correctly filter entities', function () {
Expand All @@ -247,7 +247,7 @@ describe( 'APIs — A', function () {
// Also check the parent document!
expect( fe1.itemAt( 1 ).parentDocument() ).to.deep.equal( doc1 );
// Out of range item test
expect( fe1.itemAt( 2 ) ).to.deep.equal( undefined );
expect( () => fe1.itemAt( 2 ) ).to.throw( /^wink-nlp: wink-nlp: 2 is an invalid or out of bounds index./ );
// itemAt() api.
fe1.each( ( e, k ) => {
expect( e.out() ).to.deep.equal( fe1.itemAt( k ).out() );
Expand All @@ -260,7 +260,7 @@ describe( 'APIs — A', function () {
expect( fe2.out( its.detail ) ).to.deep.equal( fae2 );
expect( fe2.itemAt( 1 ).out( its.detail ) ).to.deep.equal( fae2[ 1 ] );
expect( fe2.itemAt( 1 ).parentDocument() ).to.deep.equal( doc2 );
expect( fe2.itemAt( 3 ) ).to.deep.equal( undefined );
expect( () => fe2.itemAt( 3 ) ).to.throw( /^wink-nlp: wink-nlp: 3 is an invalid or out of bounds index./ );
fe2.each( ( e, k ) => {
expect( e.out() ).to.deep.equal( fe2.itemAt( k ).out() );
} );
Expand Down
105 changes: 63 additions & 42 deletions types/index.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ declare module 'wink-nlp' {
// turn off exporting by default since we don't want to expose internal details
export { };

// *** BEGIN Language Model Specific Declarations ***
// These should be always in sync with the langauge model's type declarations.
// these types are internal details of the implementing model
type StemAddon = unknown;
type LemmatizeAddon = unknown;
Expand Down Expand Up @@ -40,6 +42,7 @@ declare module 'wink-nlp' {
featureFn: FeatureFn;
addons: ModelAddons;
}
// *** END Language Model Specific Declarations ***

// its helpers

Expand Down Expand Up @@ -106,25 +109,25 @@ declare module 'wink-nlp' {

// Its
export interface ItsHelpers {
case(index: number, token: Token, cache: Cache): Case;
uniqueId(index: number, token: Token): number;
negationFlag(index: number, token: Token): boolean;
normal(index: number, token: Token, cache: Cache): string;
contractionFlag(index: number, token: Token): boolean;
pos(index: number, token: Token, cache: Cache): PartOfSpeech;
precedingSpaces(index: number, token: Token): string;
prefix(index: number, token: Token, cache: Cache): string;
shape(index: number, token: Token, cache: Cache): string;
stopWordFlag(index: number, token: Token, cache: Cache): boolean;
abbrevFlag(index: number, token: Token, cache: Cache): boolean;
suffix(index: number, token: Token, cache: Cache): string;
type(index: number, token: Token, cache: Cache): string;
value(index: number, token: Token, cache: Cache): string;
stem(index: number, token: Token, cache: Cache, addons: ModelAddons): string;
lemma(index: number, token: Token, cache: Cache, addons: ModelAddons): string;
case(index: number, rdd: RawDocumentData): Case;
uniqueId(index: number, rdd: RawDocumentData): number;
negationFlag(index: number, rdd: RawDocumentData): boolean;
normal(index: number, rdd: RawDocumentData): string;
contractionFlag(index: number, rdd: RawDocumentData): boolean;
pos(index: number, rdd: RawDocumentData): PartOfSpeech;
precedingSpaces(index: number, rdd: RawDocumentData): string;
prefix(index: number, rdd: RawDocumentData): string;
shape(index: number, rdd: RawDocumentData): string;
stopWordFlag(index: number, rdd: RawDocumentData): boolean;
abbrevFlag(index: number, rdd: RawDocumentData): boolean;
suffix(index: number, rdd: RawDocumentData): string;
type(index: number, rdd: RawDocumentData): string;
value(index: number, rdd: RawDocumentData): string;
stem(index: number, rdd: RawDocumentData, addons: ModelAddons): string;
lemma(index: number, rdd: RawDocumentData, addons: ModelAddons): string;
vector(): number[];
detail(): Detail;
markedUpText(index: number, token: Token, cache: Cache): string;
markedUpText(index: number, rdd: RawDocumentData): string;
span(spanItem: number[]): number[];
sentenceWiseImportance(rdd: RawDocumentData): SentenceImportance[];
sentiment(spanItem: number[]): number;
Expand All @@ -134,7 +137,7 @@ declare module 'wink-nlp' {
docBOWArray(tf: ModelTermFrequencies): Bow;
bow(tf: ModelTermFrequencies): Bow;
idf(tf: ModelTermFrequencies, idf: ModelInverseDocumentFrequencies): Array<[term: string, frequency: number]>;
tf(tf: ModelTermFrequencies, idf: ModelInverseDocumentFrequencies): Array<[term: string, frequency: number]>;
tf(tf: ModelTermFrequencies): Array<[term: string, frequency: number]>;
modelJSON(tf: ModelTermFrequencies, idf: ModelInverseDocumentFrequencies): string;
}

Expand All @@ -146,6 +149,7 @@ declare module 'wink-nlp' {
freqTable<T>(tokens: T[]): Array<[token: T, freq: number]>;
bigrams<T>(tokens: T[]): Array<[T, T]>;
unique<T>(tokens: T[]): T[];
vector(token: string[]): number[];
}

// functions for use with document
Expand All @@ -168,19 +172,19 @@ declare module 'wink-nlp' {
}

export interface SelectedTokens {
each(f: (token: ItemToken) => void): void;
filter(f: (token: ItemToken) => boolean): SelectedTokens;
itemAt(k: number): ItemToken | undefined;
each(cb: ((item: ItemToken) => void) | ((item: ItemToken, index: number) => void)): void;
filter(cb: (item: ItemToken) => boolean): SelectedTokens;
itemAt(k: number): ItemToken;
length(): number;
out(): string[];
out<T>(itsf: ItsFunction<T>): T[] | string[];
out<T, U>(itsf: ItsFunction<T>, asf: AsFunction<T, U>): U | T[] | string[];
}

export interface Tokens {
each(f: (token: ItemToken) => void): void;
filter(f: (token: ItemToken) => boolean): SelectedTokens;
itemAt(k: number): ItemToken | undefined;
each(cb: ((item: ItemToken) => void) | ((item: ItemToken, index: number) => void)): void;
filter(cb: (item: ItemToken) => boolean): SelectedTokens;
itemAt(k: number): ItemToken;
length(): number;
out(): string[];
out<T>(itsf: ItsFunction<T>): T[] | string[];
Expand All @@ -189,7 +193,7 @@ declare module 'wink-nlp' {

export interface ItemEntity {
parentDocument(): Document;
markup(beginMarker: string, endMarker: string): void;
markup(beginMarker?: string, endMarker?: string): void;
out(): string;
out<T>(itsf: ItsFunction<T>): T | string;
parentSentence(): ItemSentence;
Expand All @@ -198,19 +202,19 @@ declare module 'wink-nlp' {
}

export interface SelectedEntities {
each(f: (entity: ItemEntity) => void): void;
filter(f: (entity: ItemEntity) => boolean): SelectedEntities;
itemAt(k: number): ItemEntity | undefined;
each(cb: ((item: ItemEntity) => void) | ((item: ItemEntity, index: number) => void)): void;
filter(cb: (item: ItemEntity) => boolean): SelectedEntities;
itemAt(k: number): ItemEntity;
length(): number;
out(): string[];
out<T>(itsf: ItsFunction<T>): T[] | string[];
out<T, U>(itsf: ItsFunction<T>, asf: AsFunction<T, U>): U | T[] | string[];
}

export interface Entities {
each(f: (entity: ItemEntity) => void): void;
filter(f: (entity: ItemEntity) => boolean): SelectedEntities;
itemAt(k: number): ItemEntity | undefined;
each(cb: ((item: ItemEntity) => void) | ((item: ItemEntity, index: number) => void)): void;
filter(cb: (item: ItemEntity) => boolean): SelectedEntities;
itemAt(k: number): ItemEntity;
length(): number;
out(): string[];
out<T>(itsf: ItsFunction<T>): T[] | string[];
Expand All @@ -219,7 +223,7 @@ declare module 'wink-nlp' {

export interface ItemCustomEntity {
parentDocument(): Document;
markup(beginMarker: string, endMarker: string): void;
markup(beginMarker?: string, endMarker?: string): void;
out(): string;
out<T>(itsf: ItsFunction<T>): T | string;
parentSentence(): ItemSentence;
Expand All @@ -228,19 +232,19 @@ declare module 'wink-nlp' {
}

export interface SelectedCustomEntities {
each(f: (entity: ItemCustomEntity) => void): void;
filter(f: (entity: ItemCustomEntity) => boolean): SelectedCustomEntities;
itemAt(k: number): ItemCustomEntity | undefined;
each(cb: ((item: ItemCustomEntity) => void) | ((item: ItemCustomEntity, index: number) => void)): void;
filter(cb: (item: ItemCustomEntity) => boolean): SelectedCustomEntities;
itemAt(k: number): ItemCustomEntity;
length(): number;
out(): string[];
out<T>(itsf: ItsFunction<T>): T[] | string[];
out<T, U>(itsf: ItsFunction<T>, asf: AsFunction<T, U>): U | T[] | string[];
}

export interface CustomEntities {
each(f: (entity: ItemCustomEntity) => void): void;
filter(f: (entity: ItemCustomEntity) => boolean): SelectedCustomEntities;
itemAt(k: number): ItemCustomEntity | undefined;
each(cb: ((item: ItemCustomEntity) => void) | ((item: ItemCustomEntity, index: number) => void)): void;
filter(cb: (item: ItemCustomEntity) => boolean): SelectedCustomEntities;
itemAt(k: number): ItemCustomEntity;
length(): number;
out(): string[];
out<T>(itsf: ItsFunction<T>): T[] | string[];
Expand All @@ -249,7 +253,7 @@ declare module 'wink-nlp' {

export interface ItemSentence {
parentDocument(): Document;
markup(beginMarker: string, endMarker: string): void;
markup(beginMarker?: string, endMarker?: string): void;
out(): string;
out<T>(itsf: ItsFunction<T>): T | string;
entities(): Entities;
Expand All @@ -259,8 +263,8 @@ declare module 'wink-nlp' {
}

export interface Sentences {
each(f: (entity: ItemSentence) => void): void;
itemAt(k: number): ItemSentence | undefined;
each(cb: ((item: ItemSentence) => void) | ((item: ItemSentence, index: number) => void)): void;
itemAt(k: number): ItemSentence;
length(): number;
out(): string[];
out<T>(itsf: ItsFunction<T>): T[] | string[];
Expand All @@ -277,6 +281,8 @@ declare module 'wink-nlp' {
sentences(): Sentences;
tokens(): Tokens;
printTokens(): void;
pipeConfig(): string[];
contextualVectors(lemma: boolean, specifcWordVectors: string[], similarWordVectors: boolean, wordVectorsLimit: number): string;
}

export interface CerExample {
Expand All @@ -295,6 +301,18 @@ declare module 'wink-nlp' {
patterns: string[];
}

// Wink word embeddings structure, should stay in sync with emdedding repo.
interface WordEmbedding {
precision: number;
l2NormIndex: number;
wordIndex: number;
dimensions: number;
unkVector: number[];
size: number;
words: string[];
vectors: Record<string, number[]>;
}

export interface WinkMethods {
readDoc(text: string): Document;
// returns number of learned entities
Expand All @@ -303,7 +321,7 @@ declare module 'wink-nlp' {
as: AsHelpers;
}

export default function WinkFn(theModel: Model, pipe?: string[]): WinkMethods;
export default function WinkFn(theModel: Model, pipe?: string[], wordEmbeddings?: WordEmbedding): WinkMethods;
}

declare module 'wink-nlp/utilities/bm25-vectorizer' {
Expand Down Expand Up @@ -348,6 +366,9 @@ declare module 'wink-nlp/utilities/similarity' {
tversky<T>(setA: Set<T>, setB: Set<T>, alpha?: number, beta?: number): number;
oo<T>(setA: Set<T>, setB: Set<T>): number;
};
vector: {
cosine(vectorA: number[], vectorB: number[]): number;
};
}

const similarity: SimilarityHelper;
Expand Down

0 comments on commit 2911864

Please sign in to comment.