Skip to content

Commit

Permalink
Adding Fix for Issue tomgco#4 UTF-8 Encoding Issue
Browse files Browse the repository at this point in the history
(cherry picked from commit 70240e5)
  • Loading branch information
tomgco committed Jul 1, 2011
1 parent f7718f0 commit 4516dc2
Show file tree
Hide file tree
Showing 5 changed files with 60 additions and 7 deletions.
14 changes: 7 additions & 7 deletions lib/staticGzip.js
Expand Up @@ -29,12 +29,12 @@ var gzippoCache = {};
* gzip file.
*/

var gzippo = function(filename, callback) {
var gzippo = function(filename, charset, callback) {
var gzip = new compress.Gzip();
gzip.init();
fs.readFile(filename, function (err, data) {
fs.readFile(filename, function (err, data) {
if (err) throw err;
var gzippedData = gzip.deflate(data, 'binary') + gzip.end();
var gzippedData = gzip.deflate(data, charset) + gzip.end();
callback(gzippedData);
});
};
Expand Down Expand Up @@ -74,7 +74,7 @@ exports = module.exports = function staticGzip(dirPath, options){
if (!contentTypeMatch.test) throw new Error('contentTypeMatch: must be a regular expression.');

return function staticGzip(req, res, next){
var url, filename, contentType, acceptEncoding;
var url, filename, contentType, acceptEncoding, charset;

function pass(name) {
var o = Object.create(options);
Expand All @@ -83,7 +83,6 @@ exports = module.exports = function staticGzip(dirPath, options){
}

function sendGzipped(data) {
var charset = mime.charsets.lookup(contentType);
contentType = contentType + (charset ? '; charset=' + charset : '');
res.setHeader('Content-Type', contentType);
res.setHeader('Content-Encoding', 'gzip');
Expand All @@ -93,10 +92,10 @@ exports = module.exports = function staticGzip(dirPath, options){
}

function gzipAndSend(filename, gzipName, mtime) {
gzippo(filename, function(gzippedData) {
gzippo(filename, charset, function(gzippedData) {
gzippoCache[gzipName] = {
'ctime': Date.now(),
'mtime': mtime,
'mtime': mtime,
'content': gzippedData
};
sendGzipped(gzippedData);
Expand All @@ -112,6 +111,7 @@ exports = module.exports = function staticGzip(dirPath, options){
filename = path.join(dirPath, url.pathname);

contentType = mime.lookup(filename);
charset = mime.charsets.lookup(contentType);
acceptEncoding = req.headers['accept-encoding'] || '';

if (!contentTypeMatch.test(contentType)) {
Expand Down
Empty file added test/fixtures/utf8.gz
Empty file.
32 changes: 32 additions & 0 deletions test/fixtures/utf8.txt
@@ -0,0 +1,32 @@
English: The quick brown fox jumps over the lazy dog.
Jamaican: Chruu, a kwik di kwik brong fox a jomp huova di liezi daag de, yu no siit?
Irish: "An ḃfuil do ċroí ag bualaḋ ó ḟaitíos an ġrá a ṁeall lena ṗóg éada ó ṡlí do leasa ṫú?" "D'ḟuascail Íosa Úrṁac na hÓiġe Beannaiṫe pór Éava agus Áḋaiṁ."
Dutch: Pa's wijze lynx bezag vroom het fikse aquaduct.
German: Falsches Üben von Xylophonmusik quält jeden größeren Zwerg. (1)
German: Im finſteren Jagdſchloß am offenen Felsquellwaſſer patzte der affig-flatterhafte kauzig-höf‌liche Bäcker über ſeinem verſifften kniffligen C-Xylophon. (2)
Norwegian: Blåbærsyltetøy ("blueberry jam", includes every extra letter used in Norwegian).
Swedish: Flygande bäckasiner söka strax hwila på mjuka tuvor.
Icelandic: Sævör grét áðan því úlpan var ónýt.
Finnish: (5) Törkylempijävongahdus (This is a perfect pangram, every letter appears only once. Translating it is an art on its own, but I'll say "rude lover's yelp". :-D)
Finnish: (5) Albert osti fagotin ja töräytti puhkuvan melodian. (Albert bought a bassoon and hooted an impressive melody.)
Finnish: (5) On sangen hauskaa, että polkupyörä on maanteiden jokapäiväinen ilmiö. (It's pleasantly amusing, that the bicycle is an everyday sight on the roads.)
Polish: Pchnąć w tę łódź jeża lub osiem skrzyń fig.
Czech: Příliš žluťoučký kůň úpěl ďábelské kódy.
Slovak: Starý kôň na hŕbe kníh žuje tíško povädnuté ruže, na stĺpe sa ďateľ učí kvákať novú ódu o živote.
Greek (monotonic): ξεσκεπάζω την ψυχοφθόρα βδελυγμία
Greek (polytonic): ξεσκεπάζω τὴν ψυχοφθόρα βδελυγμία
Russian: Съешь же ещё этих мягких французских булок да выпей чаю.
Russian: В чащах юга жил-был цитрус? Да, но фальшивый экземпляр! ёъ.
Bulgarian: Жълтата дюля беше щастлива, че пухът, който цъфна, замръзна като гьон.
Sami (Northern): Vuol Ruoŧa geđggiid leat máŋga luosa ja čuovžža.
Hungarian: Árvíztűrő tükörfúrógép.
Spanish: El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y frío, añoraba a su querido cachorro.
Portuguese: O próximo vôo à noite sobre o Atlântico, põe freqüentemente o único médico. (3)
French: Les naïfs ægithales hâtifs pondant à Noël où il gèle sont sûrs d'être déçus en voyant leurs drôles d'œufs abîmés.
Esperanto: Eĥoŝanĝo ĉiuĵaŭde.
Hebrew: זה כיף סתם לשמוע איך תנצח קרפד עץ טוב בגן.
Japanese (Hiragana):
いろはにほへど ちりぬるを
わがよたれぞ つねならむ
うゐのおくやま けふこえて
あさきゆめみじ ゑひもせず (4)
Binary file added test/fixtures/utf8.txt.gz
Binary file not shown.
21 changes: 21 additions & 0 deletions test/staticGzipTest.js
Expand Up @@ -88,5 +88,26 @@ module.exports = {
res.headers.should.have.property('content-length', '15');
}
);
},
'requesting gzipped utf-8 file succeeds': function() {
assert.response(app,
{
url: '/utf8.txt',
headers: {
'Accept-Encoding':"gzip"
}
},
function(res){
var gzippedData = res.body;
assert.response(app, { url: '/utf8.txt.gz' }, function(res) {
assert.equal(gzippedData, res.body, "Data is not gzipped");
});

res.statusCode.should.equal(200);
res.headers.should.have.property('content-type', 'text/plain; charset=UTF-8');
res.headers.should.have.property('content-length', '2031');
res.headers.should.have.property('content-encoding', 'gzip');
}
);
}
};

0 comments on commit 4516dc2

Please sign in to comment.