Skip to content

Commit

Permalink
FormatURL parsing safelinks (demisto#28032)
Browse files Browse the repository at this point in the history
* Updated wrappers regex

* RN + Ruff

* RN

* RN

* Bump pack from version CommonScripts to 1.11.98.

* Bump pack from version CommonScripts to 1.11.99.

---------

Co-authored-by: Content Bot <bot@demisto.com>
  • Loading branch information
2 people authored and xsoar-bot committed Aug 2, 2023
1 parent c669ffd commit 9a8a25b
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 19 deletions.
7 changes: 7 additions & 0 deletions Packs/CommonScripts/ReleaseNotes/1_11_99.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@

#### Scripts

##### FormatURL
- Updated the Docker image to: *demisto/python3:3.10.12.63474*.

- Updated the regex for URL wrappers to allow safelinks without a scheme (i.e. - https).
31 changes: 13 additions & 18 deletions Packs/CommonScripts/Scripts/FormatURL/FormatURL.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@
import tldextract
import urllib.parse
from CommonServerPython import *
from typing import Match
from re import Match


class URLError(Exception):
pass


class URLType(object):
class URLType:
"""
A class to represent an url and its parts
"""
Expand All @@ -30,7 +30,7 @@ def __str__(self):
f'Path = {self.path}\nQuery = {self.query}\nFragment = {self.fragment}')


class URLCheck(object):
class URLCheck:
"""
This class will build and validate a URL based on "URL Living Standard" (https://url.spec.whatwg.org)
"""
Expand Down Expand Up @@ -268,7 +268,7 @@ def host_check(self):
elif self.modified_url[index] == "]":

if not self.inside_brackets:
if self.check_domain(host) and all([char in self.brackets for char in self.modified_url[index:]]):
if self.check_domain(host) and all(char in self.brackets for char in self.modified_url[index:]):
# Domain is valid with trailing "]" and brackets, the formatter will remove the extra chars
self.done = True
return
Expand All @@ -290,8 +290,7 @@ def host_check(self):
self.inside_brackets = False
break

else:
raise URLError(f"Only IPv6 is allowed within square brackets, not {host}")
raise URLError(f"Only IPv6 is allowed within square brackets, not {host}")

else:
self.output += self.modified_url[index]
Expand Down Expand Up @@ -485,11 +484,7 @@ def check_codepoint_validity(char: str) -> bool:
elif char in url_code_points:
return True

elif unicode_code_points["start"] <= char <= unicode_code_points["end"]:
return True

else:
return False
return unicode_code_points['start'] <= char <= unicode_code_points['end']

def check_domain(self, host: str) -> bool:
"""
Expand Down Expand Up @@ -597,14 +592,15 @@ def remove_leading_chars(self):
self.modified_url = self.modified_url[beginning:end + 1]


class URLFormatter(object):
class URLFormatter:

# URL Security Wrappers
ATP_regex = re.compile('https://.*?\.safelinks\.protection\.outlook\.com/\?url=(.*?)&', re.I)
ATP_regex = re.compile('.*?\.safelinks\.protection\.outlook\.com/\?url=(.*?)&', re.I)
fireeye_regex = re.compile('.*?fireeye[.]com.*?&u=(.*)', re.I)
proofpoint_regex = re.compile('(?i)(?:proofpoint.com/v[1-2]/(?:url\?u=)?(.+?)(?:&amp|&d|$)|'
'https?(?::|%3A)//urldefense[.]\w{2,3}/v3/__(.+?)(?:__;|$))')
trendmicro_regex = re.compile('https://.*?trendmicro\.com(?::443)?/wis/clicktime/.*?/?url==3d(.*?)&', re.I)
trendmicro_regex = re.compile('.*?trendmicro\.com(?::443)?/wis/clicktime/.*?/?url==3d(.*?)&', # disable-secrets-detection
re.I)

# Scheme slash fixer
scheme_fix = re.compile("https?(:[/|\\\]*)")
Expand Down Expand Up @@ -675,8 +671,7 @@ def strip_wrappers(url: str) -> str:
else:
wrapper = False

else:
return url
return url

@staticmethod
def extract_url_proofpoint(url: str) -> str:
Expand All @@ -692,7 +687,7 @@ def extract_url_proofpoint(url: str) -> str:

if url[0]:
# Proofpoint v1 and v2
return urllib.parse.unquote((url[0].replace("-", "%").replace("_", "/")))
return urllib.parse.unquote(url[0].replace("-", "%").replace("_", "/"))

else:
# Proofpoint v3
Expand All @@ -714,7 +709,7 @@ def correct_and_refang_url(url: str) -> str:
url = url.replace("[.]", ".")
url = url.replace("[:]", ":")
lower_url = url.lower()
if lower_url.startswith("hxxp") or lower_url.startswith('meow'):
if lower_url.startswith(('hxxp', 'meow')):
url = re.sub(schemas, "http", url, count=1)

def fix_scheme(match: Match) -> str:
Expand Down
2 changes: 1 addition & 1 deletion Packs/CommonScripts/pack_metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"name": "Common Scripts",
"description": "Frequently used scripts pack.",
"support": "xsoar",
"currentVersion": "1.11.98",
"currentVersion": "1.11.99",
"author": "Cortex XSOAR",
"url": "https://www.paloaltonetworks.com/cortex",
"email": "",
Expand Down

0 comments on commit 9a8a25b

Please sign in to comment.