diff --git a/examples/luffaproject.conf b/examples/luffaproject.conf index 2842f82..60fa332 100644 --- a/examples/luffaproject.conf +++ b/examples/luffaproject.conf @@ -6,6 +6,11 @@ # # Project Setup # +# fullname - the name of the project, used for reporting purposes +# path.uri = the path to the base of the code. Currently this supports only one path. +# source.ext.whitelist - The list of extensions for files you want to scan. Other files are ignored. There is no way as of +# yet to handle search on binary files. Not sure if this makes sense for our purposes. +# You could potentially scan symbols in object files and strings in classfiles, but naaaah. project.fullname=Project Luffa project.path.uri=../../../../src project.source.ext.whitelist=.py,.doc,.txt @@ -27,9 +32,19 @@ license.gpl.v3=foo # # Watchlist should be stuff that we want to flag in a report, typically # Format should be watchlist.KEY=REGEXP -watchlist.names=REGEXP +# Python raw string notation will be appended to whatever you use for the regex. I had the raw string notation here +# originally, but had problems with the string escaping. +# +# names - should be list of known names of team members, aliases, nicknames +# companies - any company names possibly referenced. This is a good idea if you've acquired a company, or you think your +# dev team might have lifted code from someone else inadvertantly (yikes) +# badwords - we all know what these are...don't use these words in your code unless you are creating apps for adult industry. +# The concrete 5 project has a nice list of bad words. +# emailaddresses - a * indicates search for all email addresses. Otherwise, only search for email addresses in the list +watchlist.names=(David|Mike|Truedat) watchlist.companies=REGEXP -watchlist.words=REGEXP +watchlist.badwords=REGEXP +watchlist.emailaddresses=* # # Reports diff --git a/src/razortooth/luffa/tools/scan.py b/src/razortooth/luffa/tools/scan.py index 2809e24..5a7b44d 100644 --- a/src/razortooth/luffa/tools/scan.py +++ b/src/razortooth/luffa/tools/scan.py @@ -36,6 +36,7 @@ import os import unittest import string +import re class scan: confFile = 0 @@ -83,6 +84,7 @@ def deepScan(self, currentPath): # print "about to deep scan %s" % os.path.join(currentPath, f) self.deepScan(os.path.join(currentPath, f)) else: + # XXX TODO Make sure this section handles double byte character encodings extList = self.luffaProjectEnv.get("project.source.ext.whitelist") for ext in extList.split(","): # Convert this to a regex, more efficient if (currentPath.endswith(ext)): # XXX TODO FIX this to handle upper case @@ -108,6 +110,15 @@ def testInitEnv(self): def testDeepScan1(self): propsRead = self.aLuffa.initEnv("../../../../examples/luffaproject.conf") self.aLuffa.deepScan(str(self.aLuffa.luffaProjectEnv["project.path.uri"]).rstrip()) # Watch the newlines. Why? + def testWatchlistNames(self): + propsRead = self.aLuffa.initEnv("../../../../examples/luffaproject.conf") + pattern = self.aLuffa.luffaWatchlistEnv.get('watchlist.names').rstrip() + print "loaded watchlist.names pattern = %s" % pattern + p = re.compile(r"" + pattern + "", re.IGNORECASE) + result1 = p.findall("Mike and David are cool") + self.assert_(result1 > 0) + print result1 + self.assert_(len(result1) == 2) def tearDown(self): print "tearing down" if __name__ == '__main__':