diff --git a/src/Symfony/Component/DomCrawler/Crawler.php b/src/Symfony/Component/DomCrawler/Crawler.php index de65978614aa..ae98db7d6180 100644 --- a/src/Symfony/Component/DomCrawler/Crawler.php +++ b/src/Symfony/Component/DomCrawler/Crawler.php @@ -108,8 +108,10 @@ public function addContent($content, $type = null) } } + // http://www.w3.org/TR/encoding/#encodings + // http://www.w3.org/TR/REC-xml/#NT-EncName if (null === $charset && - preg_match('/\]+charset *= *["\']?([a-zA-Z\-0-9]+)/i', $content, $matches)) { + preg_match('/\]+charset *= *["\']?([a-zA-Z\-0-9_:.]+)/i', $content, $matches)) { $charset = $matches[1]; } diff --git a/src/Symfony/Component/DomCrawler/Tests/CrawlerTest.php b/src/Symfony/Component/DomCrawler/Tests/CrawlerTest.php index 04921f693ce2..d96d1ebf71b3 100644 --- a/src/Symfony/Component/DomCrawler/Tests/CrawlerTest.php +++ b/src/Symfony/Component/DomCrawler/Tests/CrawlerTest.php @@ -232,6 +232,10 @@ public function testAddContent() $crawler = new Crawler(); $crawler->addContent('中文'); $this->assertEquals('中文', $crawler->filterXPath('//span')->text(), '->addContent() guess wrong charset'); + + $crawler = new Crawler(); + $crawler->addContent(mb_convert_encoding('日本語', 'SJIS', 'UTF-8')); + $this->assertEquals('日本語', $crawler->filterXPath('//body')->text(), '->addContent() can recognize "Shift_JIS" in html5 meta charset tag'); } /**