In [3]:
import re

# 1. Regex basics

In [24]:
out = re.match(r"hello", "hello world")

print(
    out.start(),
    out.end(),
    bool(out)
)

0 5 True


In [17]:
regex = r"hello"

for text in ["hello world", "bye world"]:
    out = re.match(regex, text)
    
    print(text, "\tFound:", bool(out))

hello world 	Found: True
bye world 	Found: False


## 1.1. `match` vs `search`

In [30]:
text = """hello world
bye world"""

print(bool(re.match(r"hello", text)))
print(bool(re.match(r"world", text)))
print(bool(re.match(r"bye", text)))

print(bool(re.search(r"hello", text)))
print(bool(re.search(r"world", text)))
print(bool(re.search(r"bye", text)))

True
False
False
True
True
True


## 1.2. `re.compile`

In [31]:
%timeit re.match(r"hello", "hello world")

857 ns ± 21 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [32]:
regex = re.compile(r"hello")

In [33]:
%timeit regex.match("hello world")

334 ns ± 7.9 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


## 1.3. Groups

In [54]:
filenames = [
    "Informe mensual Indexa Capital - AABBCCDD - 2020-01.pdf",
    "Informe mensual Indexa Capital - XXYYZZWW - 2020-01.pdf",
    "Informe mensual Indexa Capital - AABBCCDD - 2021-03.pdf",
    "Informe mensual Indexa Capital - XXYYZZWW - 2020-12.pdf",
]

### 1.3.1. Unnamed groups

In [55]:
regex = re.compile(r"Informe mensual Indexa Capital - (\w{8}) - (\d{4})-(\d{2}).pdf")

In [56]:
out = regex.match(filenames[0])
out.groups()

('AABBCCDD', '2020', '01')

### 1.3.2. Named groups

In [57]:
regex = re.compile(r"Informe mensual Indexa Capital - (?P<account>\w{8}) - (?P<year>\d{4})-(?P<month>\d{2}).pdf")

In [58]:
out = regex.match(filenames[1])
out.groupdict()

{'account': 'XXYYZZWW', 'year': '2020', 'month': '01'}

### 1.3.3. Using named groups

In [63]:
out_pattern = "{year}_{month} Indexa capital {account}.pdf"

In [64]:
for filename in filenames:
    groups = regex.match(filename).groupdict()
    new_name = out_pattern.format(**groups)
    
    print(filename, "\n-", new_name, "\n")

Informe mensual Indexa Capital - AABBCCDD - 2020-01.pdf 
- 2020_01 Indexa capital AABBCCDD.pdf 

Informe mensual Indexa Capital - XXYYZZWW - 2020-01.pdf 
- 2020_01 Indexa capital XXYYZZWW.pdf 

Informe mensual Indexa Capital - AABBCCDD - 2021-03.pdf 
- 2021_03 Indexa capital AABBCCDD.pdf 

Informe mensual Indexa Capital - XXYYZZWW - 2020-12.pdf 
- 2020_12 Indexa capital XXYYZZWW.pdf 



# 2. Pandas

In [65]:
import pandas as pd

In [None]:
re.