-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #5 from tarundhankhar/tarun/aadhaar
Tarun/aadhaar
- Loading branch information
Showing
4 changed files
with
228 additions
and
31 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
const DOCUMENT_DETAILS_BASE = { | ||
"date_of_birth": undefined, | ||
"fathers_name": undefined, | ||
"gender": undefined, | ||
"identification_number": undefined, | ||
"name": undefined, | ||
"document_type": undefined, | ||
"address": undefined, | ||
}; | ||
|
||
export default { | ||
RAW_TEXTS: { | ||
NON_ENGLISH: [ | ||
"भारतीय विशिष्ट पहचान प्राधिकरण", | ||
"ARDNAAR", | ||
"GUIGUEL SUATAN SUDHORITY OF INDIA", | ||
"Address:", | ||
"முகவரி:", | ||
"3/1, M G R.", | ||
"3/1, எம்.ஜி. NAGAR. MANALURPET,", | ||
"ஆர் நகர், மணலூர்பேட்டை Manafurpet, Viluppuram,", | ||
"மணலூர்பேட்டை,", | ||
"விழுப்புரம்,", | ||
"தமிழ் நாடு - 4", | ||
"TamilNadu-605754", | ||
], | ||
FIRST_OCCURING_PIN_CODE: [ | ||
"UNIQUE IDENTIFICATION AUTHORITY OF INDIA", | ||
"Address: Pawar Vadi Panchak,", | ||
"Relavay Lain Jawal, Jail Road.", | ||
"Nashik Road, Nashik Road,", | ||
"Nashik, Maharashtra, 422101", | ||
"Bengaluru-580001", | ||
"आधार", | ||
"पता पवार वाडी पंचक, रेलवे लाईन", | ||
"जवळ, जेल रोड, नाशिक रोड, नाशिक", | ||
"रोड, नाशिक, महाराष्ट्र, 422101", | ||
], | ||
GUARDIAN_NAME_ADDRESS_HEADER: [ | ||
"Unique Identification Authority of India", | ||
"Address: S/O Subhash, B-260, SECTOR-3,", | ||
"PHASE-3, DWARKA, South West Delhi,", | ||
"Delhi, 110078", | ||
], | ||
UNWANTED_PREFIX_SUFFIX: [ | ||
"Unique Identification Authority of India", | ||
"Address: -B-260, SECTOR-3,", | ||
"PHASE-3, DWARKA, South West Delhi,", | ||
"Delhi, 110078", | ||
], | ||
UNDEFINED_ADDRESS: [ | ||
"Unique Identification Authority of India", | ||
"Address: B-260, SECTOR-3,", | ||
"PHASE-3, DWARKA, South West Delhi,", | ||
"Delhi, 110", | ||
], | ||
ADDRESS_END_LINE: [ | ||
"Unique Identification Authority of India", | ||
"Address: B-260, SECTOR-3,", | ||
"PHASE-3, DWARKA, South West Delhi,", | ||
"Delhi-110078. ", | ||
], | ||
ADDRESS_START_LINE: [ | ||
"Address: B-260, SECTOR-3,", | ||
"PHASE-3, DWARKA, South West Delhi,", | ||
"Delhi, 110078", | ||
] | ||
}, | ||
PARSED_DETAILS: { | ||
NON_ENGLISH: { | ||
...DOCUMENT_DETAILS_BASE, | ||
"document_type": "AADHAAR_CARD", | ||
"address": "3/1, M G NAGAR. MANALURPET, Manafurpet, Viluppuram, TamilNadu-605754", | ||
}, | ||
FIRST_OCCURING_PIN_CODE: { | ||
...DOCUMENT_DETAILS_BASE, | ||
"document_type": "AADHAAR_CARD", | ||
"address": "Pawar Vadi Panchak Relavay Lain Jawal, Jail Road. Nashik Road, Nashik Road, Nashik, Maharashtra, 422101", | ||
"fathers_name": "Pawar Vadi Panchak" | ||
}, | ||
GUARDIAN_NAME_ADDRESS_HEADER: { | ||
...DOCUMENT_DETAILS_BASE, | ||
"document_type": "AADHAAR_CARD", | ||
"address": "S/O Subhash B-260 SECTOR-3 PHASE-3, DWARKA, South West Delhi, Delhi, 110078", | ||
"fathers_name": "Subhash" | ||
}, | ||
UNWANTED_PREFIX_SUFFIX: { | ||
...DOCUMENT_DETAILS_BASE, | ||
"document_type": "AADHAAR_CARD", | ||
"address": "B-260 SECTOR-3 PHASE-3, DWARKA, South West Delhi, Delhi, 110078", | ||
}, | ||
UNDEFINED_ADDRESS: { | ||
...DOCUMENT_DETAILS_BASE, | ||
"document_type": "AADHAAR_CARD", | ||
}, | ||
ADDRESS_END_LINE: { | ||
...DOCUMENT_DETAILS_BASE, | ||
"document_type": "AADHAAR_CARD", | ||
"address": "B-260 SECTOR-3 PHASE-3, DWARKA, South West Delhi, Delhi-110078", | ||
}, | ||
ADDRESS_START_LINE: { | ||
...DOCUMENT_DETAILS_BASE, | ||
"document_type": "AADHAAR_CARD", | ||
"address": "B-260 SECTOR-3 PHASE-3, DWARKA, South West Delhi, Delhi, 110078", | ||
} | ||
} | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
import AadhaarParser from "../document-parser/aadhaar-parser"; | ||
import AADHAAR_PARSER_MOCKS from "./aadhaar_parser.mock"; | ||
|
||
test('Address parser should remove all the non-english text segments', () => { | ||
const parsedDetails = AadhaarParser.parseDocumentDetails({ | ||
raw_text: AADHAAR_PARSER_MOCKS.RAW_TEXTS.NON_ENGLISH | ||
}); | ||
expect(parsedDetails).toMatchObject({ | ||
is_document_valid: true, | ||
document_details: AADHAAR_PARSER_MOCKS.PARSED_DETAILS.NON_ENGLISH | ||
}) | ||
}); | ||
test('Address parser should mark address end at the first occurance of pin code', () => { | ||
const parsedDetails = AadhaarParser.parseDocumentDetails({ | ||
raw_text: AADHAAR_PARSER_MOCKS.RAW_TEXTS.FIRST_OCCURING_PIN_CODE | ||
}); | ||
expect(parsedDetails).toMatchObject({ | ||
is_document_valid: true, | ||
document_details: AADHAAR_PARSER_MOCKS.PARSED_DETAILS.FIRST_OCCURING_PIN_CODE | ||
}) | ||
}); | ||
test('Address parser should keep the address header starting with S/O, C/O, D/O etc.', () => { | ||
const parsedDetails = AadhaarParser.parseDocumentDetails({ | ||
raw_text: AADHAAR_PARSER_MOCKS.RAW_TEXTS.GUARDIAN_NAME_ADDRESS_HEADER | ||
}); | ||
expect(parsedDetails).toMatchObject({ | ||
is_document_valid: true, | ||
document_details: AADHAAR_PARSER_MOCKS.PARSED_DETAILS.GUARDIAN_NAME_ADDRESS_HEADER | ||
}) | ||
}); | ||
test('Address parser should remove the unwanted prefix or suffix noise from the address', () => { | ||
const parsedDetails = AadhaarParser.parseDocumentDetails({ | ||
raw_text: AADHAAR_PARSER_MOCKS.RAW_TEXTS.UNWANTED_PREFIX_SUFFIX | ||
}); | ||
expect(parsedDetails).toMatchObject({ | ||
is_document_valid: true, | ||
document_details: AADHAAR_PARSER_MOCKS.PARSED_DETAILS.UNWANTED_PREFIX_SUFFIX | ||
}) | ||
}); | ||
test('Address parser should return undefined address if the end line or start line of address is not identified', () => { | ||
const parsedDetails = AadhaarParser.parseDocumentDetails({ | ||
raw_text: AADHAAR_PARSER_MOCKS.RAW_TEXTS.UNDEFINED_ADDRESS | ||
}); | ||
expect(parsedDetails).toMatchObject({ | ||
is_document_valid: true, | ||
document_details: AADHAAR_PARSER_MOCKS.PARSED_DETAILS.UNDEFINED_ADDRESS | ||
}) | ||
}); | ||
test('Address parser should identify the end line of address by pin code, even if it is followed by some unwanted characters', () => { | ||
const parsedDetails = AadhaarParser.parseDocumentDetails({ | ||
raw_text: AADHAAR_PARSER_MOCKS.RAW_TEXTS.ADDRESS_END_LINE | ||
}); | ||
expect(parsedDetails).toMatchObject({ | ||
is_document_valid: true, | ||
document_details: AADHAAR_PARSER_MOCKS.PARSED_DETAILS.ADDRESS_END_LINE | ||
}) | ||
}); | ||
test('Address parser should identify the start line of address, even if it is first line of the raw text', () => { | ||
const parsedDetails = AadhaarParser.parseDocumentDetails({ | ||
raw_text: AADHAAR_PARSER_MOCKS.RAW_TEXTS.ADDRESS_START_LINE | ||
}); | ||
expect(parsedDetails).toMatchObject({ | ||
is_document_valid: true, | ||
document_details: AADHAAR_PARSER_MOCKS.PARSED_DETAILS.ADDRESS_START_LINE | ||
}) | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters