### Extract Phone

In [6]:
from etk.core import Core
import json

c = Core()

# you can extract phone from either url or text(tokens), difference is in the way url needs to be tokenized
url = 'http://some_url.com/ad/town/602-228-4192/1/310054'
source_type = 'url'
include_context = True
output_format = 'obfuscation' # (or 'list')
extracted_phone = c._extract_phone(url, source_type, include_context, output_format)

print json.dumps(extracted_phone, indent=2)

[
  {
    "obfuscation": "False", 
    "value": "6022284192"
  }
]


In [7]:
from etk.core import Core
import json

c = Core()

# in this example we'll extract an obfuscated phone
text = 'new person in town searching for a great date wiff u  \
        fresh person here searching 4 a great date wiff you Sweet new person in town \
        seeking for a good date with u for80 2sixseven one9zerofor'

#phone extractor needs tokens as inputs, so here goes
tokens = c.extract_tokens_from_crf(c.extract_crftokens(text)) # looks complicated, needs to be simplified
source_type = 'text'
include_context = True
output_format = 'obfuscation' # (or 'list')
extracted_phone = c._extract_phone(tokens, source_type, include_context, output_format)

print json.dumps(extracted_phone, indent=2)

[
  {
    "obfuscation": "True", 
    "value": "4802671904"
  }
]


### Extract Weight

In [8]:
from etk.core import Core
import json

c = Core()
text = "Measurements: 105lbs 5\'2\" with a beautiful face"
extracted_weight = c._extract_weight(text)

print json.dumps(extracted_weight, indent=2)

[
  {
    "context": {
      "start": 14, 
      "end": 20
    }, 
    "value": "105", 
    "metadata": {
      "unit": "pound"
    }
  }
]


### Extract  Height

In [9]:
from etk.core import Core
import json

c = Core()
text = "Nationality:   Swedish  Height:   155 cm   Weight:   47 Kg   Hair Colour:   Blonde"
extracted_height = c._extract_height(text)

print json.dumps(extracted_height, indent=2)

[
  {
    "context": {
      "start": 34, 
      "end": 40
    }, 
    "value": "155", 
    "metadata": {
      "unit": "centimeter"
    }
  }, 
  {
    "context": {
      "start": 30, 
      "end": 40
    }, 
    "value": "155", 
    "metadata": {
      "unit": "centimeter"
    }
  }
]


### Extract email

In [10]:
from etk.core import Core
import json

c = Core()

text = 'contact me at some_email@gmail.com'
extracted_email = c._extract_email(text, True)

print json.dumps(extracted_email, indent=2)

[
  {
    "context": {
      "start": 14, 
      "obfuscation": false, 
      "end": 34
    }, 
    "value": "some_email@gmail.com"
  }
]


### Extract using Regex

In [11]:
from etk.core import Core
import json

c = Core()

regex = "(?:my[\\s]+name[\\s]+is[\\s]+([-a-z0-9@$!]+))"
text = "hi there, my name is jessica, join me at so and so"
include_context = True # return the start and end index of the regex match in string
flags = 0
extracted_name = c._extract_using_regex(text, regex, include_context, flags)

print json.dumps(extracted_name, indent=2)

[
  {
    "context": {
      "start": 10, 
      "end": 28
    }, 
    "value": "jessica"
  }
]


In [None]:
### Extract Price

In [2]:
from etk.core import Core
import json

c = Core()
text = 'Hh- 150 200 -Full Hr'
extracted_price = c._extract_price(text)
print json.dumps(extracted_price, indent=2)

[
  {
    "value": 150, 
    "metadata": {
      "currency": "", 
      "time_unit": "200"
    }
  }
]
