Skip to content

Commit

Permalink
Defend against RCE and XSS; modernize.
Browse files Browse the repository at this point in the history
RCE was theoretically possible depending on how strict
cgi.parse_header() reads Content-Type.  If it ever returned shell
metacharacters, those would be passed directly to the shell.

This patch switches the code from popen2 to subprocess, since the
latter is deprecated, and we get lots of warnings about that in our
Apache error logs.
  • Loading branch information
brett committed Jan 31, 2014
1 parent 971e2a9 commit d6c21fd
Showing 1 changed file with 93 additions and 109 deletions.
202 changes: 93 additions & 109 deletions spellchecker
Original file line number Diff line number Diff line change
Expand Up @@ -15,25 +15,24 @@ $Id$
branched from v 1.46
"""

import os
import cgi
import checkremote
import http_auth
import os
import sys
import string
import urlparse
import popen2

customized_dico="/usr/local/share/aspell/w3c.dat"
from cleanhtml import *
from subprocess import Popen, PIPE

languages = {"en_US":"English","fr":"French"}
def format_option(a,b,c):
if a:
selected=""
if a==c:
selected=" selected='selected'"
return "<option value='%s'%s>%s</option>" % (a,selected,b)

def concat(a,b):
return a+b

return clean_format("<option value='%s'%s>%s</option>",
a, selected, b)

Page1 ="""Content-Type:text/html; charset=utf-8
Expand Down Expand Up @@ -93,108 +92,93 @@ Last Modified: $Date$
"""

def format(fp,suggest):
line = fp.readline()
words = {}
count = 0
while line!="":
if line!="\n" and line !="*\n" and line[0]!="@":
line = line[:-1]
parts = string.split(line,": ")
fields = string.split(parts[0]," ")
if fields[0]=="&":
values = string.split(parts[1],", ")
if (not words.has_key(fields[1])):
words[fields[1]]=values
for line in fp:
line = line.rstrip('\n')
if line and line != "*" and line[0] != "@":
parts = line.split(': ')
fields = parts[0].split()
if fields[1] in words:
continue
elif fields[0]=="&":
words[fields[1]] = parts[1].split(", ")
elif fields[0]=="#":
if (not words.has_key(fields[1])):
words[fields[1]]=[]
elif line=="\n":
count = count + 1
line = fp.readline()
offsets = {}
count = 0
if len(words):
keys = words.keys()
keys.sort()
print "<form action=\"http://www.w3.org/Team/update_dictionary\" method=\"post\"><ol>"
for error in keys:
print "<li>\"<span class='no'>%s</span>\" (<input type=\"checkbox\" name=\"list[]\" value=\"%s\"/> add to the dictionary)" % (error,error)

if len(words[error]) and suggest:
print "; suggestions:<ul class='suggestions'>"
for option in words[error]:
print "<li>%s</li>" % option
print "</ul>"
print "</li>"
print "</ol><p><label><input type=\"submit\" value=\"Update dictionary\"/> (W3C Comm Team only)</label></p></form>"
else:
words[fields[1]] = []
if not words:
print "<p><span class='yes'>No errors</span> found.</p>"
return
for error in sorted(words):
clean_print("<li>\"<span class='no'>%s</span>\"", error)
if words[error] and suggest:
print "; suggestions:<ul class='suggestions'>"
for option in words[error]:
clean_print("<li>%s</li>", option)
print "</ul>"
print "</li>"

def getLangSetup(fields):
lang = fields.getfirst('lang')
if lang not in languages:
lang = 'en_US'
lang_opts = ''.join(format_option(code, languages[code], lang)
for code in languages)
return lang, lang_opts

def getSuggestSetup(fields):
if fields.getfirst('suggest') == 'on':
return True, " checked='checked'"
return False, ""

def getURI(fields):
uri = fields.getfirst('uri')
if (not uri) and ('referrer' in fields):
uri = os.environ.get('HTTP_REFERER')
return uri

if __name__ == '__main__':
if os.environ.has_key('SCRIPT_NAME'):
fields = cgi.FieldStorage()
uri =""
uri_text =""
uri_text1=""
suggest=0
suggest_txt=''
if fields.has_key('uri'):
uri = fields['uri'].value
elif fields.has_key('referrer') and os.environ.has_key('HTTP_REFERER'):
uri = os.environ['HTTP_REFERER']
if uri:
uri_text1="for %s" % (cgi.escape(uri))
uri_text=" for <a href=\"%s\">%s</a>" %(cgi.escape(uri),cgi.escape(uri))
lang = "en_US"
if fields.has_key('lang') and fields['lang'].value in languages.keys():
lang=fields['lang'].value
languages_options = reduce(concat,map(format_option,languages.keys(),languages.values(),[lang for x in languages.keys()]))

if fields.has_key('suggest'):
if fields['suggest'].value=='on':
suggest=1
suggest_txt=" checked='checked'"
if uri:
import http_auth
url_opener = http_auth.ProxyAuthURLopener()
try:
fp = url_opener.open(uri)
except IOError as e:
url_opener.error = "I/O error: %s %s" % (e.errno,e.strerror)
fp = None
print Page1 % ('<meta name="ROBOTS" content="NOINDEX,NOFOLLOW"/>',uri_text1,uri_text,cgi.escape(uri),languages_options,suggest_txt)
if fp:
personal = "--personal=%s" % customized_dico
if lang!="en_US":
personal = ""
headers = fp.info()
charset_opt = ""
if headers.has_key('Content-Type'):
contentType = cgi.parse_header(headers["Content-Type"])
if contentType[1].has_key('charset'):
charset_opt = "-assume_charset=%s" % contentType[1]['charset']
command = "/usr/bin/lynx %s -cfg=/usr/local/lib/lynx.cfg -nolist -dump -stdin|/usr/bin/aspell --encoding=utf-8 --lang %s -a %s --sug-mode=fast" % (charset_opt,lang,personal)

(piperfd,pipewfd,pipeErr) = popen2.popen3(command)

pipewfd.write(fp.read())
fp.close()
pipewfd.close()
# Need to find a way to display any errors if relevant
processingErrors=""
if (processingErrors):
print "<p>The following error occurred when trying to process your request :</p><pre class='no'>"
print "</pre>"
pipeErr.close()
if (piperfd):
print "<h2>Errors found in the page</h2>"
format(piperfd,suggest)
piperfd.close()
else:
print "<p><span class='no'>Unable to read</span> <a href='%s'>%s</a> (%s). Sorry, check the URI.</p>" % (cgi.escape(uri),cgi.escape(uri), url_opener.error)
else:
print Page1 % ('',uri_text1,uri_text,cgi.escape(uri),languages_options,suggest_txt)
print Page2



fields = cgi.FieldStorage()
lang, languages_options = getLangSetup(fields)
suggest, suggest_txt = getSuggestSetup(fields)
uri = getURI(fields)
if not uri:
print Page1 % ('', '', '', clean_str(uri), languages_options,
suggest_txt)
print Page2
sys.exit()

uri_text1 = clean_format("for %s", uri)
uri_text = clean_format(" for <a href=\"%s\">%s</a>", uri, uri)
print Page1 % ('<meta name="ROBOTS" content="NOINDEX,NOFOLLOW"/>',
uri_text1, uri_text, clean_str(uri), languages_options,
suggest_txt)

url_opener = http_auth.ProxyAuthURLopener()
try:
fp = url_opener.open(uri)
except IOError as e:
url_opener.error = "I/O error: %s %s" % (e.errno,e.strerror)
fp = None
if fp is None:
clean_print("<p><span class='no'>Unable to read</span> <a href='%s'>%s</a> (%s). Sorry, check the URI.</p>", uri, uri, url_opener.error)
print Page2
sys.exit()

headers = fp.info()
lynx_cmd = ['/usr/bin/lynx', '-cfg=/usr/local/lib/lynx.cfg', '-nolist',
'-dump', '-stdin']
if headers.has_key('Content-Type'):
contentType = cgi.parse_header(headers['Content-Type'])
if contentType[1].has_key('charset'):
lynx_cmd.append('-assume_charset=%s' %
contentType[1]['charset'])

lynx_proc = Popen(lynx_cmd, stdin=fp, stdout=PIPE)
aspell_proc = Popen(['/usr/bin/aspell', '-a', '--encoding=utf-8',
'--sug-mode=fast', '--lang', lang],
stdin=lynx_proc.stdout, stdout=PIPE)
lynx_proc.wait()
lynx_proc.stdout.close()
print "<h2>Errors found in the page</h2>"
format(aspell_proc.stdout, suggest)
aspell_proc.stdout.close()
print Page2

0 comments on commit d6c21fd

Please sign in to comment.