From 857f377ec37211756bbee885ac4b154701accc66 Mon Sep 17 00:00:00 2001 From: tarundhankhar Date: Tue, 5 Apr 2022 16:33:56 +0530 Subject: [PATCH] unit tests added for aadhaar parser --- src/_tests_/aadhaar_parser.mock.ts | 107 ++++++++++++++++++++++++++ src/_tests_/aadhaar_parser.test.ts | 66 ++++++++++++++++ src/document-parser/aadhaar-parser.ts | 59 +------------- src/interfaces/DocumentParser.ts | 4 - 4 files changed, 176 insertions(+), 60 deletions(-) create mode 100644 src/_tests_/aadhaar_parser.mock.ts create mode 100644 src/_tests_/aadhaar_parser.test.ts diff --git a/src/_tests_/aadhaar_parser.mock.ts b/src/_tests_/aadhaar_parser.mock.ts new file mode 100644 index 0000000..82d2096 --- /dev/null +++ b/src/_tests_/aadhaar_parser.mock.ts @@ -0,0 +1,107 @@ +const DOCUMENT_DETAILS_BASE = { + "date_of_birth": undefined, + "fathers_name": undefined, + "gender": undefined, + "identification_number": undefined, + "name": undefined, + "document_type": undefined, + "address": undefined, +}; + +export default { + RAW_TEXTS: { + NON_ENGLISH: [ + "भारतीय विशिष्ट पहचान प्राधिकरण", + "ARDNAAR", + "GUIGUEL SUATAN SUDHORITY OF INDIA", + "Address:", + "முகவரி:", + "3/1, M G R.", + "3/1, எம்.ஜி. NAGAR. MANALURPET,", + "ஆர் நகர், மணலூர்பேட்டை Manafurpet, Viluppuram,", + "மணலூர்பேட்டை,", + "விழுப்புரம்,", + "தமிழ் நாடு - 4", + "TamilNadu-605754", + ], + FIRST_OCCURING_PIN_CODE: [ + "UNIQUE IDENTIFICATION AUTHORITY OF INDIA", + "Address: Pawar Vadi Panchak,", + "Relavay Lain Jawal, Jail Road.", + "Nashik Road, Nashik Road,", + "Nashik, Maharashtra, 422101", + "Bengaluru-580001", + "आधार", + "पता पवार वाडी पंचक, रेलवे लाईन", + "जवळ, जेल रोड, नाशिक रोड, नाशिक", + "रोड, नाशिक, महाराष्ट्र, 422101", + ], + GUARDIAN_NAME_ADDRESS_HEADER: [ + "Unique Identification Authority of India", + "Address: S/O Subhash, B-260, SECTOR-3,", + "PHASE-3, DWARKA, South West Delhi,", + "Delhi, 110078", + ], + UNWANTED_PREFIX_SUFFIX: [ + "Unique Identification Authority of India", + "Address: -B-260, SECTOR-3,", + "PHASE-3, DWARKA, South West Delhi,", + "Delhi, 110078", + ], + UNDEFINED_ADDRESS: [ + "Unique Identification Authority of India", + "Address: B-260, SECTOR-3,", + "PHASE-3, DWARKA, South West Delhi,", + "Delhi, 110", + ], + ADDRESS_END_LINE: [ + "Unique Identification Authority of India", + "Address: B-260, SECTOR-3,", + "PHASE-3, DWARKA, South West Delhi,", + "Delhi-110078. ", + ], + ADDRESS_START_LINE: [ + "Address: B-260, SECTOR-3,", + "PHASE-3, DWARKA, South West Delhi,", + "Delhi, 110078", + ] + }, + PARSED_DETAILS: { + NON_ENGLISH: { + ...DOCUMENT_DETAILS_BASE, + "document_type": "AADHAAR_CARD", + "address": "3/1, M G NAGAR. MANALURPET, Manafurpet, Viluppuram, TamilNadu-605754", + }, + FIRST_OCCURING_PIN_CODE: { + ...DOCUMENT_DETAILS_BASE, + "document_type": "AADHAAR_CARD", + "address": "Pawar Vadi Panchak Relavay Lain Jawal, Jail Road. Nashik Road, Nashik Road, Nashik, Maharashtra, 422101", + "fathers_name": "Pawar Vadi Panchak" + }, + GUARDIAN_NAME_ADDRESS_HEADER: { + ...DOCUMENT_DETAILS_BASE, + "document_type": "AADHAAR_CARD", + "address": "S/O Subhash B-260 SECTOR-3 PHASE-3, DWARKA, South West Delhi, Delhi, 110078", + "fathers_name": "Subhash" + }, + UNWANTED_PREFIX_SUFFIX: { + ...DOCUMENT_DETAILS_BASE, + "document_type": "AADHAAR_CARD", + "address": "B-260 SECTOR-3 PHASE-3, DWARKA, South West Delhi, Delhi, 110078", + }, + UNDEFINED_ADDRESS: { + ...DOCUMENT_DETAILS_BASE, + "document_type": "AADHAAR_CARD", + }, + ADDRESS_END_LINE: { + ...DOCUMENT_DETAILS_BASE, + "document_type": "AADHAAR_CARD", + "address": "B-260 SECTOR-3 PHASE-3, DWARKA, South West Delhi, Delhi-110078", + }, + ADDRESS_START_LINE: { + ...DOCUMENT_DETAILS_BASE, + "document_type": "AADHAAR_CARD", + "address": "B-260 SECTOR-3 PHASE-3, DWARKA, South West Delhi, Delhi, 110078", + } + } +}; diff --git a/src/_tests_/aadhaar_parser.test.ts b/src/_tests_/aadhaar_parser.test.ts new file mode 100644 index 0000000..d2f4359 --- /dev/null +++ b/src/_tests_/aadhaar_parser.test.ts @@ -0,0 +1,66 @@ +import AadhaarParser from "../document-parser/aadhaar-parser"; +import AADHAAR_PARSER_MOCKS from "./aadhaar_parser.mock"; + +test('Address parser should remove all the non-english text segments', () => { + const parsedDetails = AadhaarParser.parseDocumentDetails({ + raw_text: AADHAAR_PARSER_MOCKS.RAW_TEXTS.NON_ENGLISH + }); + expect(parsedDetails).toMatchObject({ + is_document_valid: true, + document_details: AADHAAR_PARSER_MOCKS.PARSED_DETAILS.NON_ENGLISH + }) +}); +test('Address parser should mark address end at the first occurance of pin code', () => { + const parsedDetails = AadhaarParser.parseDocumentDetails({ + raw_text: AADHAAR_PARSER_MOCKS.RAW_TEXTS.FIRST_OCCURING_PIN_CODE + }); + expect(parsedDetails).toMatchObject({ + is_document_valid: true, + document_details: AADHAAR_PARSER_MOCKS.PARSED_DETAILS.FIRST_OCCURING_PIN_CODE + }) +}); +test('Address parser should keep the address header starting with S/O, C/O, D/O etc.', () => { + const parsedDetails = AadhaarParser.parseDocumentDetails({ + raw_text: AADHAAR_PARSER_MOCKS.RAW_TEXTS.GUARDIAN_NAME_ADDRESS_HEADER + }); + expect(parsedDetails).toMatchObject({ + is_document_valid: true, + document_details: AADHAAR_PARSER_MOCKS.PARSED_DETAILS.GUARDIAN_NAME_ADDRESS_HEADER + }) +}); +test('Address parser should remove the unwanted prefix or suffix noise from the address', () => { + const parsedDetails = AadhaarParser.parseDocumentDetails({ + raw_text: AADHAAR_PARSER_MOCKS.RAW_TEXTS.UNWANTED_PREFIX_SUFFIX + }); + expect(parsedDetails).toMatchObject({ + is_document_valid: true, + document_details: AADHAAR_PARSER_MOCKS.PARSED_DETAILS.UNWANTED_PREFIX_SUFFIX + }) +}); +test('Address parser should return undefined address if the end line or start line of address is not identified', () => { + const parsedDetails = AadhaarParser.parseDocumentDetails({ + raw_text: AADHAAR_PARSER_MOCKS.RAW_TEXTS.UNDEFINED_ADDRESS + }); + expect(parsedDetails).toMatchObject({ + is_document_valid: true, + document_details: AADHAAR_PARSER_MOCKS.PARSED_DETAILS.UNDEFINED_ADDRESS + }) +}); +test('Address parser should identify the end line of address by pin code, even if it is followed by some unwanted characters', () => { + const parsedDetails = AadhaarParser.parseDocumentDetails({ + raw_text: AADHAAR_PARSER_MOCKS.RAW_TEXTS.ADDRESS_END_LINE + }); + expect(parsedDetails).toMatchObject({ + is_document_valid: true, + document_details: AADHAAR_PARSER_MOCKS.PARSED_DETAILS.ADDRESS_END_LINE + }) +}); +test('Address parser should identify the start line of address, even if it is first line of the raw text', () => { + const parsedDetails = AadhaarParser.parseDocumentDetails({ + raw_text: AADHAAR_PARSER_MOCKS.RAW_TEXTS.ADDRESS_START_LINE + }); + expect(parsedDetails).toMatchObject({ + is_document_valid: true, + document_details: AADHAAR_PARSER_MOCKS.PARSED_DETAILS.ADDRESS_START_LINE + }) +}); diff --git a/src/document-parser/aadhaar-parser.ts b/src/document-parser/aadhaar-parser.ts index dfff840..9bcf884 100644 --- a/src/document-parser/aadhaar-parser.ts +++ b/src/document-parser/aadhaar-parser.ts @@ -1,12 +1,10 @@ import _ from "lodash"; import * as moment from "moment"; import Constants from "../constants"; -import { GooglePlaceConstants } from "./constants"; import { ParseDocumentDetailsRequest, ParseDocumentDetailsResponse } from "../interfaces/DocumentParser"; -import requestPromise from "request-promise"; // TODO update regex rules const AADHAAR_REGEX = { @@ -398,52 +396,6 @@ const validateAadhaarText = ( ); }; -const getGooglePlacesDetails = async pinCode => { - const googlePlacesResponse = await requestPromise({ - method: "GET", - url: - GooglePlaceConstants.BASE_URL + - pinCode + - GooglePlaceConstants.REGION + - GooglePlaceConstants.KEY_CONNECTOR + - GooglePlaceConstants.API_KEY - }); - const addressComponents = _.get( - JSON.parse(googlePlacesResponse), - "results[0].address_components", - [] - ); - const localityComponent = _.find(addressComponents, component => { - const types = _.get(component, "types", []); - return _.includes(types, "locality"); - }); - const cityComponent = _.find(addressComponents, component => { - const types = _.get(component, "types", []); - return _.includes(types, "administrative_area_level_2"); - }); - const stateComponent = _.find(addressComponents, component => { - const types = _.get(component, "types", []); - return _.includes(types, "administrative_area_level_1"); - }); - return { - locality: _.get(localityComponent, "long_name"), - city: _.get(cityComponent, "long_name"), - state: _.get(stateComponent, "long_name") - }; -}; - -const populateAadhaarAddressDetails = async address => { - const pinCode = _.slice(address, -6).join(""); - const { locality, city, state } = await getGooglePlacesDetails(pinCode); - return { - address, - locality, - city, - state, - pinCode - }; -}; - // ******************************************************* // // Logic for internal functions ends here // // ******************************************************* // @@ -451,9 +403,9 @@ const populateAadhaarAddressDetails = async address => { // ******************************************************* // // Logic for API handlers starts here // // ******************************************************* // -AadhaarParser.parseDocumentDetails = async ( +AadhaarParser.parseDocumentDetails = ( params: ParseDocumentDetailsRequest -): Promise => { +): ParseDocumentDetailsResponse => { const { raw_text: rawTextLines } = params; const textLines = filterRelevantAadhaarText(rawTextLines); const aadhaarHeadingLineNumbers = parseAadhaarHeadingLineNumbers(textLines); @@ -463,14 +415,9 @@ AadhaarParser.parseDocumentDetails = async ( } const parsedDetails = parseAadhaarText(textLines, aadhaarHeadingLineNumbers); - const address = parsedDetails.address; - const addressDetails = !_.isEmpty(address) - ? await populateAadhaarAddressDetails(address) - : {}; - const populateParsedDetails = { ...parsedDetails, ...addressDetails }; return { is_document_valid: true, - document_details: populateParsedDetails + document_details: parsedDetails }; }; // ******************************************************* // diff --git a/src/interfaces/DocumentParser.ts b/src/interfaces/DocumentParser.ts index fc33cf7..e0136c0 100644 --- a/src/interfaces/DocumentParser.ts +++ b/src/interfaces/DocumentParser.ts @@ -6,10 +6,6 @@ interface DocumentDetails { date_of_birth?: string; gender?: string; address?: string; - city?: string; - state?: string; - locality?: string; - pinCode?: string; } export interface ParseDocumentDetailsRequest {