Skip to content

Commit

Permalink
unit tests added for aadhaar parser
Browse files Browse the repository at this point in the history
  • Loading branch information
tarundhankhar committed Apr 5, 2022
1 parent e7e8796 commit 857f377
Show file tree
Hide file tree
Showing 4 changed files with 176 additions and 60 deletions.
107 changes: 107 additions & 0 deletions src/_tests_/aadhaar_parser.mock.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
const DOCUMENT_DETAILS_BASE = {
"date_of_birth": undefined,
"fathers_name": undefined,
"gender": undefined,
"identification_number": undefined,
"name": undefined,
"document_type": undefined,
"address": undefined,
};

export default {
RAW_TEXTS: {
NON_ENGLISH: [
"भारतीय विशिष्ट पहचान प्राधिकरण",
"ARDNAAR",
"GUIGUEL SUATAN SUDHORITY OF INDIA",
"Address:",
"முகவரி:",
"3/1, M G R.",
"3/1, எம்.ஜி. NAGAR. MANALURPET,",
"ஆர் நகர், மணலூர்பேட்டை Manafurpet, Viluppuram,",
"மணலூர்பேட்டை,",
"விழுப்புரம்,",
"தமிழ் நாடு - 4",
"TamilNadu-605754",
],
FIRST_OCCURING_PIN_CODE: [
"UNIQUE IDENTIFICATION AUTHORITY OF INDIA",
"Address: Pawar Vadi Panchak,",
"Relavay Lain Jawal, Jail Road.",
"Nashik Road, Nashik Road,",
"Nashik, Maharashtra, 422101",
"Bengaluru-580001",
"आधार",
"पता पवार वाडी पंचक, रेलवे लाईन",
"जवळ, जेल रोड, नाशिक रोड, नाशिक",
"रोड, नाशिक, महाराष्ट्र, 422101",
],
GUARDIAN_NAME_ADDRESS_HEADER: [
"Unique Identification Authority of India",
"Address: S/O Subhash, B-260, SECTOR-3,",
"PHASE-3, DWARKA, South West Delhi,",
"Delhi, 110078",
],
UNWANTED_PREFIX_SUFFIX: [
"Unique Identification Authority of India",
"Address: -B-260, SECTOR-3,",
"PHASE-3, DWARKA, South West Delhi,",
"Delhi, 110078",
],
UNDEFINED_ADDRESS: [
"Unique Identification Authority of India",
"Address: B-260, SECTOR-3,",
"PHASE-3, DWARKA, South West Delhi,",
"Delhi, 110",
],
ADDRESS_END_LINE: [
"Unique Identification Authority of India",
"Address: B-260, SECTOR-3,",
"PHASE-3, DWARKA, South West Delhi,",
"Delhi-110078. ",
],
ADDRESS_START_LINE: [
"Address: B-260, SECTOR-3,",
"PHASE-3, DWARKA, South West Delhi,",
"Delhi, 110078",
]
},
PARSED_DETAILS: {
NON_ENGLISH: {
...DOCUMENT_DETAILS_BASE,
"document_type": "AADHAAR_CARD",
"address": "3/1, M G NAGAR. MANALURPET, Manafurpet, Viluppuram, TamilNadu-605754",
},
FIRST_OCCURING_PIN_CODE: {
...DOCUMENT_DETAILS_BASE,
"document_type": "AADHAAR_CARD",
"address": "Pawar Vadi Panchak Relavay Lain Jawal, Jail Road. Nashik Road, Nashik Road, Nashik, Maharashtra, 422101",
"fathers_name": "Pawar Vadi Panchak"
},
GUARDIAN_NAME_ADDRESS_HEADER: {
...DOCUMENT_DETAILS_BASE,
"document_type": "AADHAAR_CARD",
"address": "S/O Subhash B-260 SECTOR-3 PHASE-3, DWARKA, South West Delhi, Delhi, 110078",
"fathers_name": "Subhash"
},
UNWANTED_PREFIX_SUFFIX: {
...DOCUMENT_DETAILS_BASE,
"document_type": "AADHAAR_CARD",
"address": "B-260 SECTOR-3 PHASE-3, DWARKA, South West Delhi, Delhi, 110078",
},
UNDEFINED_ADDRESS: {
...DOCUMENT_DETAILS_BASE,
"document_type": "AADHAAR_CARD",
},
ADDRESS_END_LINE: {
...DOCUMENT_DETAILS_BASE,
"document_type": "AADHAAR_CARD",
"address": "B-260 SECTOR-3 PHASE-3, DWARKA, South West Delhi, Delhi-110078",
},
ADDRESS_START_LINE: {
...DOCUMENT_DETAILS_BASE,
"document_type": "AADHAAR_CARD",
"address": "B-260 SECTOR-3 PHASE-3, DWARKA, South West Delhi, Delhi, 110078",
}
}
};
66 changes: 66 additions & 0 deletions src/_tests_/aadhaar_parser.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import AadhaarParser from "../document-parser/aadhaar-parser";
import AADHAAR_PARSER_MOCKS from "./aadhaar_parser.mock";

test('Address parser should remove all the non-english text segments', () => {
const parsedDetails = AadhaarParser.parseDocumentDetails({
raw_text: AADHAAR_PARSER_MOCKS.RAW_TEXTS.NON_ENGLISH
});
expect(parsedDetails).toMatchObject({
is_document_valid: true,
document_details: AADHAAR_PARSER_MOCKS.PARSED_DETAILS.NON_ENGLISH
})
});
test('Address parser should mark address end at the first occurance of pin code', () => {
const parsedDetails = AadhaarParser.parseDocumentDetails({
raw_text: AADHAAR_PARSER_MOCKS.RAW_TEXTS.FIRST_OCCURING_PIN_CODE
});
expect(parsedDetails).toMatchObject({
is_document_valid: true,
document_details: AADHAAR_PARSER_MOCKS.PARSED_DETAILS.FIRST_OCCURING_PIN_CODE
})
});
test('Address parser should keep the address header starting with S/O, C/O, D/O etc.', () => {
const parsedDetails = AadhaarParser.parseDocumentDetails({
raw_text: AADHAAR_PARSER_MOCKS.RAW_TEXTS.GUARDIAN_NAME_ADDRESS_HEADER
});
expect(parsedDetails).toMatchObject({
is_document_valid: true,
document_details: AADHAAR_PARSER_MOCKS.PARSED_DETAILS.GUARDIAN_NAME_ADDRESS_HEADER
})
});
test('Address parser should remove the unwanted prefix or suffix noise from the address', () => {
const parsedDetails = AadhaarParser.parseDocumentDetails({
raw_text: AADHAAR_PARSER_MOCKS.RAW_TEXTS.UNWANTED_PREFIX_SUFFIX
});
expect(parsedDetails).toMatchObject({
is_document_valid: true,
document_details: AADHAAR_PARSER_MOCKS.PARSED_DETAILS.UNWANTED_PREFIX_SUFFIX
})
});
test('Address parser should return undefined address if the end line or start line of address is not identified', () => {
const parsedDetails = AadhaarParser.parseDocumentDetails({
raw_text: AADHAAR_PARSER_MOCKS.RAW_TEXTS.UNDEFINED_ADDRESS
});
expect(parsedDetails).toMatchObject({
is_document_valid: true,
document_details: AADHAAR_PARSER_MOCKS.PARSED_DETAILS.UNDEFINED_ADDRESS
})
});
test('Address parser should identify the end line of address by pin code, even if it is followed by some unwanted characters', () => {
const parsedDetails = AadhaarParser.parseDocumentDetails({
raw_text: AADHAAR_PARSER_MOCKS.RAW_TEXTS.ADDRESS_END_LINE
});
expect(parsedDetails).toMatchObject({
is_document_valid: true,
document_details: AADHAAR_PARSER_MOCKS.PARSED_DETAILS.ADDRESS_END_LINE
})
});
test('Address parser should identify the start line of address, even if it is first line of the raw text', () => {
const parsedDetails = AadhaarParser.parseDocumentDetails({
raw_text: AADHAAR_PARSER_MOCKS.RAW_TEXTS.ADDRESS_START_LINE
});
expect(parsedDetails).toMatchObject({
is_document_valid: true,
document_details: AADHAAR_PARSER_MOCKS.PARSED_DETAILS.ADDRESS_START_LINE
})
});
59 changes: 3 additions & 56 deletions src/document-parser/aadhaar-parser.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
import _ from "lodash";
import * as moment from "moment";
import Constants from "../constants";
import { GooglePlaceConstants } from "./constants";
import {
ParseDocumentDetailsRequest,
ParseDocumentDetailsResponse
} from "../interfaces/DocumentParser";
import requestPromise from "request-promise";

// TODO update regex rules
const AADHAAR_REGEX = {
Expand Down Expand Up @@ -398,62 +396,16 @@ const validateAadhaarText = (
);
};

const getGooglePlacesDetails = async pinCode => {
const googlePlacesResponse = await requestPromise({
method: "GET",
url:
GooglePlaceConstants.BASE_URL +
pinCode +
GooglePlaceConstants.REGION +
GooglePlaceConstants.KEY_CONNECTOR +
GooglePlaceConstants.API_KEY
});
const addressComponents = _.get(
JSON.parse(googlePlacesResponse),
"results[0].address_components",
[]
);
const localityComponent = _.find(addressComponents, component => {
const types = _.get(component, "types", []);
return _.includes(types, "locality");
});
const cityComponent = _.find(addressComponents, component => {
const types = _.get(component, "types", []);
return _.includes(types, "administrative_area_level_2");
});
const stateComponent = _.find(addressComponents, component => {
const types = _.get(component, "types", []);
return _.includes(types, "administrative_area_level_1");
});
return {
locality: _.get(localityComponent, "long_name"),
city: _.get(cityComponent, "long_name"),
state: _.get(stateComponent, "long_name")
};
};

const populateAadhaarAddressDetails = async address => {
const pinCode = _.slice(address, -6).join("");
const { locality, city, state } = await getGooglePlacesDetails(pinCode);
return {
address,
locality,
city,
state,
pinCode
};
};

// ******************************************************* //
// Logic for internal functions ends here //
// ******************************************************* //

// ******************************************************* //
// Logic for API handlers starts here //
// ******************************************************* //
AadhaarParser.parseDocumentDetails = async (
AadhaarParser.parseDocumentDetails = (
params: ParseDocumentDetailsRequest
): Promise<ParseDocumentDetailsResponse> => {
): ParseDocumentDetailsResponse => {
const { raw_text: rawTextLines } = params;
const textLines = filterRelevantAadhaarText(rawTextLines);
const aadhaarHeadingLineNumbers = parseAadhaarHeadingLineNumbers(textLines);
Expand All @@ -463,14 +415,9 @@ AadhaarParser.parseDocumentDetails = async (
}

const parsedDetails = parseAadhaarText(textLines, aadhaarHeadingLineNumbers);
const address = parsedDetails.address;
const addressDetails = !_.isEmpty(address)
? await populateAadhaarAddressDetails(address)
: {};
const populateParsedDetails = { ...parsedDetails, ...addressDetails };
return {
is_document_valid: true,
document_details: populateParsedDetails
document_details: parsedDetails
};
};
// ******************************************************* //
Expand Down
4 changes: 0 additions & 4 deletions src/interfaces/DocumentParser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,6 @@ interface DocumentDetails {
date_of_birth?: string;
gender?: string;
address?: string;
city?: string;
state?: string;
locality?: string;
pinCode?: string;
}

export interface ParseDocumentDetailsRequest {
Expand Down

0 comments on commit 857f377

Please sign in to comment.