Skip to content
This repository
Browse code

Merge pull request #3695 from Karlson2k/http_header_rfc2616

Fix: process HTTP header according to RFC 2616
  • Loading branch information...
commit 80221152faec9ae7f1691ab1d9df5d27a58b6b17 2 parents e20eee9 + e0a06ab
jmarshallnz authored December 09, 2013
135  xbmc/utils/HttpHeader.cpp
@@ -21,6 +21,10 @@
21 21
 #include "HttpHeader.h"
22 22
 #include "utils/StringUtils.h"
23 23
 
  24
+// header white space characters according to RFC 2616
  25
+const char* const CHttpHeader::m_whitespaceChars = " \t";
  26
+
  27
+
24 28
 CHttpHeader::CHttpHeader()
25 29
 {
26 30
   m_headerdone = false;
@@ -32,42 +36,68 @@ CHttpHeader::~CHttpHeader()
32 36
 
33 37
 void CHttpHeader::Parse(const std::string& strData)
34 38
 {
35  
-  if (m_headerdone)
36  
-    Clear();
37  
-
38 39
   size_t pos = 0;
39 40
   const size_t len = strData.length();
  41
+  const char* const strDataC = strData.c_str();
  42
+
  43
+  // According to RFC 2616 any header line can have continuation on next line, if next line is started from whitespace char
  44
+  // This code at first checks for whitespace char at the begging of the line, and if found, then current line is appended to m_lastHeaderLine
  45
+  // If current line is NOT started from whitespace char, then previously stored (and completed) m_lastHeaderLine is parsed and current line is assigned to m_lastHeaderLine (to be parsed later)
40 46
   while (pos < len)
41 47
   {
42  
-    const size_t valueStart = strData.find(':', pos);
43  
-    const size_t lineEnd = strData.find("\r\n", pos);
  48
+    const size_t lineEnd = strData.find("\x0d\x0a", pos); // use "\x0d\x0a" instead of "\r\n" to be platform independent
44 49
 
45 50
     if (lineEnd == std::string::npos)
46  
-      break;
  51
+      return; // error: expected only complete lines
47 52
 
48  
-    if (lineEnd == pos)
49  
-    {
50  
-      m_headerdone = true;
51  
-      break;
52  
-    }
53  
-    else if (valueStart != std::string::npos && valueStart < lineEnd)
54  
-    {
55  
-      std::string strParam(strData, pos, valueStart - pos);
56  
-      std::string strValue(strData, valueStart + 1, lineEnd - valueStart - 1);
  53
+    if (m_headerdone)
  54
+      Clear(); // clear previous header and process new one
57 55
 
58  
-      StringUtils::Trim(strParam);
59  
-      StringUtils::ToLower(strParam);
  56
+    if (strDataC[pos] == ' ' || strDataC[pos] == '\t') // same chars as in CHttpHeader::m_whitespaceChars
  57
+    { // line is started from whitespace char: this is continuation of previous line
  58
+      pos = strData.find_first_not_of(m_whitespaceChars);
60 59
 
61  
-      StringUtils::Trim(strValue);
  60
+      m_lastHeaderLine.push_back(' '); // replace all whitespace chars at start of the line with single space
  61
+      m_lastHeaderLine.append(strData, pos, lineEnd - pos); // append current line
  62
+    }
  63
+    else
  64
+    { // this line is NOT continuation, this line is new header line
  65
+      if (!m_lastHeaderLine.empty())
  66
+        ParseLine(m_lastHeaderLine); // process previously stored completed line (if any)
62 67
 
63  
-      if (!strParam.empty() && !strValue.empty())
64  
-        m_params.push_back(HeaderParams::value_type(strParam, strValue));
  68
+      m_lastHeaderLine.assign(strData, pos, lineEnd - pos); // store current line to (possibly) complete later. Will be parsed on next turns.
  69
+
  70
+      if (pos == lineEnd)
  71
+        m_headerdone = true; // current line is bare "\r\n", means end of header; no need to process current m_lastHeaderLine
65 72
     }
66  
-    else if (m_protoLine.empty())
67  
-      m_protoLine.assign(strData, pos, lineEnd - pos);
68 73
 
69  
-    pos = lineEnd + 2;
  74
+    pos = lineEnd + 2; // '+2' for "\r\n": go to next line (if any)
  75
+  }
  76
+}
  77
+
  78
+bool CHttpHeader::ParseLine(const std::string& headerLine)
  79
+{
  80
+  const size_t valueStart = headerLine.find(':');
  81
+
  82
+  if (valueStart != std::string::npos)
  83
+  {
  84
+    std::string strParam(headerLine, 0, valueStart);
  85
+    std::string strValue(headerLine, valueStart + 1);
  86
+
  87
+    StringUtils::Trim(strParam, m_whitespaceChars);
  88
+    StringUtils::ToLower(strParam);
  89
+
  90
+    StringUtils::Trim(strValue, m_whitespaceChars);
  91
+
  92
+    if (!strParam.empty() && !strValue.empty())
  93
+      m_params.push_back(HeaderParams::value_type(strParam, strValue));
  94
+    else
  95
+      return false;
70 96
   }
  97
+  else if (m_protoLine.empty())
  98
+    m_protoLine = headerLine;
  99
+
  100
+  return true;
71 101
 }
72 102
 
73 103
 void CHttpHeader::AddParam(const std::string& param, const std::string& value, const bool overwrite /*= false*/)
@@ -143,7 +173,10 @@ std::string CHttpHeader::GetMimeType(void) const
143 173
 {
144 174
   std::string strValue(GetValueRaw("content-type"));
145 175
 
146  
-  return strValue.substr(0, strValue.find(';'));
  176
+  std::string mimeType(strValue, 0, strValue.find(';'));
  177
+  StringUtils::TrimRight(mimeType, m_whitespaceChars);
  178
+
  179
+  return mimeType;
147 180
 }
148 181
 
149 182
 std::string CHttpHeader::GetCharset(void) const
@@ -152,20 +185,47 @@ std::string CHttpHeader::GetCharset(void) const
152 185
   if (strValue.empty())
153 186
     return strValue;
154 187
 
155  
-  const size_t semicolonPos = strValue.find(';');
156  
-  if (semicolonPos == std::string::npos)
157  
-    return "";
158  
-
159 188
   StringUtils::ToUpper(strValue);
160  
-  size_t posCharset;
161  
-  if ((posCharset = strValue.find("; CHARSET=", semicolonPos)) != std::string::npos)
162  
-    posCharset += 10;
163  
-  else if ((posCharset = strValue.find(";CHARSET=", semicolonPos)) != std::string::npos)
164  
-    posCharset += 9;
165  
-  else
166  
-    return "";
167  
-
168  
-  return strValue.substr(posCharset, strValue.find(';', posCharset) - posCharset);
  189
+  const size_t len = strValue.length();
  190
+  const char* const strValueC = strValue.c_str();
  191
+
  192
+  // extract charset value from 'contenttype/contentsubtype;pram1=param1Val ; charset=XXXX\t;param2=param2Val'
  193
+  // most common form: 'text/html; charset=XXXX'
  194
+  // charset value can be in double quotes: 'text/xml; charset="XXX XX"'
  195
+
  196
+  size_t pos = strValue.find(';');
  197
+  while (pos < len)
  198
+  {
  199
+    // move to the next non-whitespace character
  200
+    pos = strValue.find_first_not_of(m_whitespaceChars, pos + 1);
  201
+
  202
+    if (pos != std::string::npos)
  203
+    {
  204
+      if (strValue.compare(pos, 8, "CHARSET=", 8) == 0)
  205
+      {
  206
+        std::string charset(strValue, pos, strValue.find(';', pos));  // intentionally ignoring possible ';' inside quoted string
  207
+                                                                      // as we don't support any charset with ';' in name
  208
+        StringUtils::Trim(charset, m_whitespaceChars);
  209
+        if (!charset.empty())
  210
+        {
  211
+          if (charset[0] != '"')
  212
+            return charset;
  213
+          else
  214
+          { // charset contains quoted string (allowed according to RFC 2616)
  215
+            StringUtils::Replace(charset, "\\", ""); // unescape chars, ignoring possible '\"' and '\\'
  216
+            const size_t closingQ = charset.find('"', 1);
  217
+            if (closingQ == std::string::npos)
  218
+              return ""; // no closing quote
  219
+
  220
+            return charset.substr(1, closingQ - 1);
  221
+          }
  222
+        }
  223
+      }
  224
+      pos = strValue.find(';', pos); // find next parameter
  225
+    }
  226
+  }
  227
+
  228
+  return ""; // no charset is detected
169 229
 }
170 230
 
171 231
 void CHttpHeader::Clear()
@@ -173,4 +233,5 @@ void CHttpHeader::Clear()
173 233
   m_params.clear();
174 234
   m_protoLine.clear();
175 235
   m_headerdone = false;
  236
+  m_lastHeaderLine.clear();
176 237
 }
3  xbmc/utils/HttpHeader.h
@@ -54,9 +54,12 @@ class CHttpHeader
54 54
 
55 55
 protected:
56 56
   std::string GetValueRaw(const std::string& strParam) const;
  57
+  bool ParseLine(const std::string& headerLine);
57 58
 
58 59
   HeaderParams m_params;
59 60
   std::string   m_protoLine;
60 61
   bool m_headerdone;
  62
+  std::string m_lastHeaderLine;
  63
+  static const char* const m_whitespaceChars;
61 64
 };
62 65
 

0 notes on commit 8022115

Please sign in to comment.
Something went wrong with that request. Please try again.