Permalink
Browse files

Merge pull request #77 from satufk/master

Adding new encodings to P.A.C.K.E.R. unpacker.
  • Loading branch information...
2 parents e5c8a0c + 47f6634 commit 2a34726a857a385301f09900eb7f9ef67f0b0c07 Stefano Sanfilippo committed Oct 16, 2011
Showing with 58 additions and 12 deletions.
  1. +16 −0 python/js-beautify-profile
  2. +42 −12 python/jsbeautifier/unpackers/packer.py
View
16 python/js-beautify-profile
@@ -0,0 +1,16 @@
+#! /usr/bin/env python
+
+import sys
+import unittest
+
+#Speedup things...
+try:
+ import cProfile as profile
+except ImportError:
+ import profile
+
+def run():
+ sys.argv.append('discover')
+ unittest.main()
+
+profile.run('run()')
View
54 python/jsbeautifier/unpackers/packer.py
@@ -26,16 +26,18 @@ def unpack(source):
"""Unpacks P.A.C.K.E.R. packed js code."""
payload, symtab, radix, count = _filterargs(source)
- if radix != 62:
- raise UnpackingError('Unknown p.a.c.k.e.r. encoding.')
-
if count != len(symtab):
raise UnpackingError('Malformed p.a.c.k.e.r. symtab.')
+ try:
+ unbase = Unbaser(radix)
+ except TypeError:
+ raise UnpackingError('Unknown p.a.c.k.e.r. encoding.')
+
def lookup(match):
"""Look up symbols in the synthetic symtab."""
word = match.group(0)
- return symtab[unbase62(word)] or word
+ return symtab[unbase(word)] or word
source = re.sub(r'\b\w+\b', lookup, payload)
return _replacestrings(source)
@@ -45,6 +47,7 @@ def _filterargs(source):
argsregex = (r"}\('(.*)', *(\d+), *(\d+), *'(.*)'\."
r"split\('\|'\), *(\d+), *(.*)\)\)")
args = re.search(argsregex, source, re.DOTALL).groups()
+
try:
return args[0], args[3].split('|'), int(args[1]), int(args[2])
except ValueError:
@@ -53,6 +56,7 @@ def _filterargs(source):
def _replacestrings(source):
"""Strip string lookup table (list) and replace values in source."""
match = re.search(r'var *(_\w+)\=\["(.*?)"\];', source, re.DOTALL)
+
if match:
varname, strings = match.groups()
startpoint = len(match.group(0))
@@ -63,12 +67,38 @@ def _replacestrings(source):
return source[startpoint:]
return source
-ALPHABET = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
-BASE_DICT = dict((cipher, index) for index, cipher in enumerate(ALPHABET))
-def unbase62(string):
- """Decodes a base62 value to an integer."""
- ret = 0
- for index, cipher in enumerate(string[::-1]):
- ret += (62 ** index) * BASE_DICT[cipher]
- return ret
+class Unbaser(object):
+ """Functor for a given base. Will efficiently convert
+ strings to natural numbers."""
+ ALPHABET = {
+ 62 : '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ',
+ 95 : (' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+ '[\]^_`abcdefghijklmnopqrstuvwxyz{|}~')
+ }
+
+ def __init__(self, base):
+ self.base = base
+
+ # If base can be handled by int() builtin, let it do it for us
+ if 2 <= base <= 36:
+ self.unbase = lambda string: int(string, base)
+ else:
+ # Build conversion dictionary cache
+ try:
+ self.dictionary = dict((cipher, index) for
+ index, cipher in enumerate(self.ALPHABET[base]))
+ except KeyError:
+ raise TypeError('Unsupported base encoding.')
+
+ self.unbase = self._dictunbaser
+
+ def __call__(self, string):
+ return self.unbase(string)
+
+ def _dictunbaser(self, string):
+ """Decodes a value to an integer."""
+ ret = 0
+ for index, cipher in enumerate(string[::-1]):
+ ret += (self.base ** index) * self.dictionary[cipher]
+ return ret

0 comments on commit 2a34726

Please sign in to comment.