In [1]:
import numpy as np
import pandas as pd
import string
import unicodedata
import re

# Uncomment to see full cell text
pd.set_option('display.max_colwidth', 0)

raw = pd.read_excel("../data/raw/inquire-boulder-faq-text/inquire-boulder-active-faqs-2019-01-02.xls.xlsx")
faq = raw.drop(labels=('Active'), axis=1).dropna()

In [2]:
def find_heading(row):
    entry = row['FAQ']
    if entry.find('?') < 0:
        return entry
    q = entry.split('?')[0].split('. ')[-1]
    heading = entry.split(q)[0]
    return heading

def strip_heading(row):
    return row['FAQ'].replace(row['heading'], '')

def find_questions(entry):
    questions = []
    qna = entry.split('?')
    for x in qna:
        if x.find('. ') < 0:
            q = x
        else:
            q = x.split('. ')[-1]
        if not q.isspace() and q:
            questions.append(q.strip())
    return questions

def find_answers(entry):
    answers = []
    a = ''
    qna = entry.split('?')
    for x in qna:
        if x.find('. ') < 0:
            continue
        else:
            a = x.replace(x.split('. ')[-1], '')
        if not a.isspace() and a:
                answers.append(a.strip())
    return answers

def preprocess(entry):
    entry = entry.replace('\n', ' ')
    entry = unicodedata.normalize("NFKD", entry)
    entry = entry.replace('?', '? ')
    re.sub(r'.(?=[A-Z])', '. ', entry)
    return entry + ' '

# Pick out questions-and-answer pairs with a regular expression
def qna_pairs(row):
    l = 0
    relist = []
    match = re.findall(r'([A-Z][^.]*\?\s)([^?]+(?:\.\s|\.\"|\.(?=[A-Z])|\)(?=I)|[a-z](?=[A-Z])))', row.FAQ)
    if(match):
        for s in match:
            relist.append([s[0], s[1]])
            l += len(s[0])
            l += len(s[1])
#             print('-----------------')
#             print(s[0])
#             print('****')
#             print(s[1])            
#         print("%d: diff length %d\t%s" % (row.name, len(row.FAQ) - l, row.Topic))
        row['qna'] = relist
    else:
        row['qna'] = [[row['Topic'], row['heading']]]
#         print("%d: NA\t\t\t%s" % (row.name, row.Topic))
        l
    return row

def process_text(df):
    df.FAQ = df.FAQ.apply(preprocess)
    df['heading'] = df.apply(find_heading, axis=1)
    df['FAQ'] = df.apply(strip_heading, axis=1)
    df = df.apply(qna_pairs, axis=1)
    return df
    
faq = process_text(faq)

In [3]:
# faq['questions'] = faq.FAQ.apply(find_questions)
# faq['answers'] = faq.FAQ.apply(find_answers)

# def count_qna(row):
#     print('%d:\t%d\t%d' % (row.name, len(row.questions), len(row.answers)))
#     return abs(len(row.questions) - len(row.answers))
# s = faq.apply(count_qna, axis=1)
# s.sum()

In [4]:
s = faq.apply(lambda x: pd.Series(x['qna']), axis=1).stack().reset_index(level=1, drop=True)
s.name = 'question'
faq = faq.drop(['FAQ', 'qna'], axis=1).join(s).reset_index(drop=True)
faq['answer'] = faq.question.apply(lambda x: x[1])
faq['question'] = faq.question.apply(lambda x: x[0])
faq

Unnamed: 0,Topic,Category,Department,Updated,heading,question,answer
0,Affordable Homeownership,Housing,Housing,2015-06-19,"The City of Boulder is committed to making housing in our community available to a variety of people, including those who could not otherwise purchase a home in Boulder's housing market. The Division of Housing offers opportunities for homeownership to those with low and moderate incomes - from down payment assistance on market rate homes to homes that must be sold at affordable prices.",How long does it take to become income-certified?,Generally up to two weeks.
1,Affordable Homeownership,Housing,Housing,2015-06-19,"The City of Boulder is committed to making housing in our community available to a variety of people, including those who could not otherwise purchase a home in Boulder's housing market. The Division of Housing offers opportunities for homeownership to those with low and moderate incomes - from down payment assistance on market rate homes to homes that must be sold at affordable prices.",Who needs to be on the application?,"Everyone who lives in the household or is part of the household, in addition to all people who will be listed on the property title. Household members should include all individuals: legal spouse, domestic partner, or common-law spouse; children(under 18 who reside with the applicant at least 50 percent of the time; and/or a significant other whom the applicant chooses to include as a member of his/her household, who will be occupying the house. The applicant's spouse must be included unless they are legally divorced or separated. Persons not counted include foster children, unborn children, and children under 18 who reside with the applicant less then 50 percent of the time."
2,Affordable Homeownership,Housing,Housing,2015-06-19,"The City of Boulder is committed to making housing in our community available to a variety of people, including those who could not otherwise purchase a home in Boulder's housing market. The Division of Housing offers opportunities for homeownership to those with low and moderate incomes - from down payment assistance on market rate homes to homes that must be sold at affordable prices.",How is annual income calculated?,"Annual income is defined as the anticipated total income for the next 12-month period received from all sources by each member (over the age of 18) of the household. It is assumed that today's circumstances will continue for the next 12 months. The applicant is required to verify this by either submitting a letter from his/her employer stating his/her annual wage or by completing an employer verification form. To obtain this form, call the Housing Division at 303-441-3157 ext. 2."
3,Affordable Homeownership,Housing,Housing,2015-06-19,"The City of Boulder is committed to making housing in our community available to a variety of people, including those who could not otherwise purchase a home in Boulder's housing market. The Division of Housing offers opportunities for homeownership to those with low and moderate incomes - from down payment assistance on market rate homes to homes that must be sold at affordable prices.",How is income verified for self-employed people?,Self-employed individuals should submit copies of their last three years of federal income-tax forms and must also submit an up-to-date profit/loss statement.
4,Affordable Homeownership,Housing,Housing,2015-06-19,"The City of Boulder is committed to making housing in our community available to a variety of people, including those who could not otherwise purchase a home in Boulder's housing market. The Division of Housing offers opportunities for homeownership to those with low and moderate incomes - from down payment assistance on market rate homes to homes that must be sold at affordable prices.",How are assets calculated?,"Assets are calculated based on annual income earned (interest), not the value of the asset itself."
5,Affordable Homeownership,Housing,Housing,2015-06-19,"The City of Boulder is committed to making housing in our community available to a variety of people, including those who could not otherwise purchase a home in Boulder's housing market. The Division of Housing offers opportunities for homeownership to those with low and moderate incomes - from down payment assistance on market rate homes to homes that must be sold at affordable prices.",Can I combine any programs?,"Neither First Home (20 percent down payment) nor the House to Home (15 percent down payment) Assistance Programs can be combined with a HomeWorks (permanently affordable) Home, since these homes have already been subsidized by the City of Boulder or the developer. The 3 percent solution may be used with HomeWorks."
6,Affordable Homeownership,Housing,Housing,2015-06-19,"The City of Boulder is committed to making housing in our community available to a variety of people, including those who could not otherwise purchase a home in Boulder's housing market. The Division of Housing offers opportunities for homeownership to those with low and moderate incomes - from down payment assistance on market rate homes to homes that must be sold at affordable prices.",Can I have a cosigner?,"Yes, a non-occupying cosigner will not be included in the household. However, if the cosigner's name appears on the title, her/his income must be included in the income certification application."
7,Affordable Homeownership,Housing,Housing,2015-06-19,"The City of Boulder is committed to making housing in our community available to a variety of people, including those who could not otherwise purchase a home in Boulder's housing market. The Division of Housing offers opportunities for homeownership to those with low and moderate incomes - from down payment assistance on market rate homes to homes that must be sold at affordable prices.",Do I have to be a first-time home buyer?,"To qualify for the down payment assistance programs, you must be a first-time home buyer, which means you have not owned a home in the last three years unless you have been divorced (in the last three years) or are a displaced homemaker. You do not have to be a first-time home buyer to purchase an Affordable Home."
8,Affordable Homeownership,Housing,Housing,2015-06-19,"The City of Boulder is committed to making housing in our community available to a variety of people, including those who could not otherwise purchase a home in Boulder's housing market. The Division of Housing offers opportunities for homeownership to those with low and moderate incomes - from down payment assistance on market rate homes to homes that must be sold at affordable prices.",Do I have to purchase in the city limits of Boulder?,Yes. Find out if the property is located in the Boulder City Limits.
9,Affordable Homeownership,Housing,Housing,2015-06-19,"The City of Boulder is committed to making housing in our community available to a variety of people, including those who could not otherwise purchase a home in Boulder's housing market. The Division of Housing offers opportunities for homeownership to those with low and moderate incomes - from down payment assistance on market rate homes to homes that must be sold at affordable prices.",How are improvements calculated for Affordable Homes?,"The low-income covenant lists all of the pre-approved capital improvements. The home owner should contact the city before the improvement has begun in order to ensure how the improvement will be calculated into the resale price. Homeowners are allowed to add a maximum of $25,000 to the resale calculation for pre-approved capital improvements. For more information, please visit the Affordable Homeownership Program website."


In [5]:
faq.to_csv('../data/interim/faq-text-separated.csv', index=False)