Skip to content

Commit

Permalink
Added regex for watchlist.names. Added test case for verifying the wa…
Browse files Browse the repository at this point in the history
…tchlist.names. Need to plug this search in, but will do this after I wraup up the other handful of regexes needed.
  • Loading branch information
truedat101 committed May 22, 2009
1 parent 1bafcf6 commit acdf862
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 2 deletions.
19 changes: 17 additions & 2 deletions examples/luffaproject.conf
Expand Up @@ -6,6 +6,11 @@
#
# Project Setup
#
# fullname - the name of the project, used for reporting purposes
# path.uri = the path to the base of the code. Currently this supports only one path.
# source.ext.whitelist - The list of extensions for files you want to scan. Other files are ignored. There is no way as of
# yet to handle search on binary files. Not sure if this makes sense for our purposes.
# You could potentially scan symbols in object files and strings in classfiles, but naaaah.
project.fullname=Project Luffa
project.path.uri=../../../../src
project.source.ext.whitelist=.py,.doc,.txt
Expand All @@ -27,9 +32,19 @@ license.gpl.v3=foo
#
# Watchlist should be stuff that we want to flag in a report, typically
# Format should be watchlist.KEY=REGEXP
watchlist.names=REGEXP
# Python raw string notation will be appended to whatever you use for the regex. I had the raw string notation here
# originally, but had problems with the string escaping.
#
# names - should be list of known names of team members, aliases, nicknames
# companies - any company names possibly referenced. This is a good idea if you've acquired a company, or you think your
# dev team might have lifted code from someone else inadvertantly (yikes)
# badwords - we all know what these are...don't use these words in your code unless you are creating apps for adult industry.
# The concrete 5 project has a nice list of bad words.
# emailaddresses - a * indicates search for all email addresses. Otherwise, only search for email addresses in the list
watchlist.names=(David|Mike|Truedat)
watchlist.companies=REGEXP
watchlist.words=REGEXP
watchlist.badwords=REGEXP
watchlist.emailaddresses=*

#
# Reports
Expand Down
11 changes: 11 additions & 0 deletions src/razortooth/luffa/tools/scan.py
Expand Up @@ -36,6 +36,7 @@
import os
import unittest
import string
import re

class scan:
confFile = 0
Expand Down Expand Up @@ -83,6 +84,7 @@ def deepScan(self, currentPath):
# print "about to deep scan %s" % os.path.join(currentPath, f)
self.deepScan(os.path.join(currentPath, f))
else:
# XXX TODO Make sure this section handles double byte character encodings
extList = self.luffaProjectEnv.get("project.source.ext.whitelist")
for ext in extList.split(","): # Convert this to a regex, more efficient
if (currentPath.endswith(ext)): # XXX TODO FIX this to handle upper case
Expand All @@ -108,6 +110,15 @@ def testInitEnv(self):
def testDeepScan1(self):
propsRead = self.aLuffa.initEnv("../../../../examples/luffaproject.conf")
self.aLuffa.deepScan(str(self.aLuffa.luffaProjectEnv["project.path.uri"]).rstrip()) # Watch the newlines. Why?
def testWatchlistNames(self):
propsRead = self.aLuffa.initEnv("../../../../examples/luffaproject.conf")
pattern = self.aLuffa.luffaWatchlistEnv.get('watchlist.names').rstrip()
print "loaded watchlist.names pattern = %s" % pattern
p = re.compile(r"" + pattern + "", re.IGNORECASE)
result1 = p.findall("Mike and David are cool")
self.assert_(result1 > 0)
print result1
self.assert_(len(result1) == 2)
def tearDown(self):
print "tearing down"
if __name__ == '__main__':
Expand Down

0 comments on commit acdf862

Please sign in to comment.