diff --git a/README.md b/README.md index 767daed..643a8bf 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,9 @@ buf = iconv.encode("Sample input string", 'win1251'); // Check if encoding is supported iconv.encodingExists("us-ascii") + +// Calculate actual length in bytes. +len = iconv.byteLength('hello, world!', 'win1251') ``` ### Streaming API (Node v0.10+) diff --git a/encodings/dbcs-codec.js b/encodings/dbcs-codec.js index 7b3c980..8b35f96 100644 --- a/encodings/dbcs-codec.js +++ b/encodings/dbcs-codec.js @@ -431,6 +431,13 @@ DBCSEncoder.prototype.end = function() { return newBuf.slice(0, j); } +DBCSEncoder.prototype.byteLength = function(str) { + var res = this.write(str); + var trail = this.end(); + + return res.length + ((trail === undefined) ? 0 : trail.length); +} + // Export for testing DBCSEncoder.prototype.findIdx = findIdx; diff --git a/encodings/internal.js b/encodings/internal.js index 4223a98..269b84c 100644 --- a/encodings/internal.js +++ b/encodings/internal.js @@ -75,6 +75,10 @@ InternalEncoder.prototype.write = function(str) { InternalEncoder.prototype.end = function() { } +InternalEncoder.prototype.byteLength = function(str) { + return Buffer.byteLength(str, this.enc) +} + //------------------------------------------------------------------------------ // Except base64 encoder, which must keep its state. @@ -96,6 +100,10 @@ InternalEncoderBase64.prototype.end = function() { return new Buffer(this.prevStr, "base64"); } +InternalEncoderBase64.prototype.byteLength = function(str) { + return Buffer.byteLength(str, "base64"); +} + //------------------------------------------------------------------------------ // CESU-8 encoder is also special. @@ -127,6 +135,10 @@ InternalEncoderCesu8.prototype.write = function(str) { InternalEncoderCesu8.prototype.end = function() { } +InternalEncoderCesu8.prototype.byteLength = function(str) { + return this.write(str).length; +} + //------------------------------------------------------------------------------ // CESU-8 decoder is not implemented in Node v4.0+ diff --git a/encodings/sbcs-codec.js b/encodings/sbcs-codec.js index 7789e00..07dae54 100644 --- a/encodings/sbcs-codec.js +++ b/encodings/sbcs-codec.js @@ -48,6 +48,10 @@ SBCSEncoder.prototype.write = function(str) { return buf; } +SBCSEncoder.prototype.byteLength = function(str) { + return str.length; +} + SBCSEncoder.prototype.end = function() { } diff --git a/encodings/utf16.js b/encodings/utf16.js index 7e8f159..eaa8909 100644 --- a/encodings/utf16.js +++ b/encodings/utf16.js @@ -30,6 +30,10 @@ Utf16BEEncoder.prototype.write = function(str) { Utf16BEEncoder.prototype.end = function() { } +Utf16BEEncoder.prototype.byteLength = function(str) { + return Buffer.byteLength(str, 'ucs2'); +} + // -- Decoding @@ -98,6 +102,10 @@ Utf16Encoder.prototype.end = function() { return this.encoder.end(); } +Utf16Encoder.prototype.byteLength = function(str) { + return Buffer.byteLength(str, 'ucs2'); +} + // -- Decoding diff --git a/encodings/utf7.js b/encodings/utf7.js index 19b7194..d8579aa 100644 --- a/encodings/utf7.js +++ b/encodings/utf7.js @@ -36,6 +36,10 @@ Utf7Encoder.prototype.write = function(str) { Utf7Encoder.prototype.end = function() { } +Utf7Encoder.prototype.byteLength = function(str) { + return this.write(str).length; +} + // -- Decoding diff --git a/lib/index.d.ts b/lib/index.d.ts index b9c8361..476d315 100644 --- a/lib/index.d.ts +++ b/lib/index.d.ts @@ -12,6 +12,8 @@ declare module 'iconv-lite' { export function encodingExists(encoding: string): boolean; + export function byteLength(content: string, encoding: string): Number; + export function decodeStream(encoding: string, options?: Options): NodeJS.ReadWriteStream; export function encodeStream(encoding: string, options?: Options): NodeJS.ReadWriteStream; diff --git a/lib/index.js b/lib/index.js index 10aced4..e01fb3a 100644 --- a/lib/index.js +++ b/lib/index.js @@ -128,6 +128,11 @@ iconv.getDecoder = function getDecoder(encoding, options) { return decoder; } +iconv.byteLength = function byteLength(str, encoding) { + str = "" + (str || ""); // Ensure string. + + return iconv.getEncoder(encoding).byteLength(str) +} // Load extensions in Node. All of them are omitted in Browserify build via 'browser' field in package.json. var nodeVer = typeof process !== 'undefined' && process.versions && process.versions.node; diff --git a/test/cesu8-test.js b/test/cesu8-test.js index 3c7c56c..965e3e8 100644 --- a/test/cesu8-test.js +++ b/test/cesu8-test.js @@ -20,4 +20,7 @@ describe("CESU-8 codec", function() { assert.equal(iconv.decode(new Buffer("eda081edb080", 'hex'), "cesu8"), "𐐀"); assert.equal(iconv.decode(new Buffer("eda0bdedb8b1", 'hex'), "cesu8"), "😱"); }); + it('has correct length in bytes', function() { + assert.equal(iconv.byteLength("😱", "cesu8"), Buffer.byteLength('eda0bdedb8b1', 'hex')); + }); }); diff --git a/test/dbcs-test.js b/test/dbcs-test.js index 127fef2..560b0ef 100644 --- a/test/dbcs-test.js +++ b/test/dbcs-test.js @@ -243,5 +243,17 @@ describe("Full DBCS encoding tests", function() { }); })(enc); } -}); + + it('correct length in bytes', function() { + var testStringLatin1 = 'hello'; + + for (var enc in iconv.encodings) { + if (iconv.encodings[enc].type === '_dbcs') { + assert.equal(iconv.byteLength(testStringLatin1, enc), testStringLatin1.length); + return; + } + } + this.skip(); + }); +}); diff --git a/test/main-test.js b/test/main-test.js index a9cdbc7..a2342bf 100644 --- a/test/main-test.js +++ b/test/main-test.js @@ -78,4 +78,12 @@ describe("Generic UTF8-UCS2 tests", function() { delete Object.prototype.permits; delete Array.prototype.sample2; }); + + it('Internal encodings has correct length in bytes', function() { + assert.equal(iconv.byteLength(testStringLatin1, 'utf8'), Buffer.byteLength(testStringLatin1, 'utf8')); + assert.equal(iconv.byteLength(testStringLatin1, 'ucs2'), Buffer.byteLength(testStringLatin1, 'ucs2')); + assert.equal(iconv.byteLength(testStringLatin1, 'binary'), Buffer.byteLength(testStringLatin1, 'binary')); + assert.equal(iconv.byteLength(testStringBase64, 'base64'), Buffer.byteLength(testStringBase64, 'base64')); + assert.equal(iconv.byteLength(testStringHex, 'hex'), Buffer.byteLength(testStringHex, 'hex')); + }); }); diff --git a/test/sbcs-test.js b/test/sbcs-test.js index ac7665d..847a8b4 100644 --- a/test/sbcs-test.js +++ b/test/sbcs-test.js @@ -157,5 +157,17 @@ describe("Full SBCS encoding tests", function() { assert.strictEqual(iconv.encode(str, enc).toString('hex'), convBack.convert(new Buffer(str)).toString('hex')); */ })(enc); -}); + it('correct length in bytes', function() { + var testStringLatin1 = 'hello'; + + for (var enc in iconv.encodings) { + if (iconv.encodings[enc].type === '_sbcs') { + assert.equal(iconv.byteLength(testStringLatin1, enc), testStringLatin1.length); + return; + } + } + + this.skip(); + }); +}); diff --git a/test/utf16-test.js b/test/utf16-test.js index 58dc7b7..457e0a8 100644 --- a/test/utf16-test.js +++ b/test/utf16-test.js @@ -20,6 +20,10 @@ describe("UTF-16BE codec", function() { it("decodes uneven length buffers with no error", function() { assert.equal(iconv.decode(new Buffer([0, 0x61, 0]), 'UTF16-BE'), "a"); }); + + it('has correct length in bytes', function() { + assert.equal(iconv.byteLength(testStr, 'UTF16-BE'), utf16beBuf.length); + }); }); describe("UTF-16 encoder", function() { @@ -31,6 +35,10 @@ describe("UTF-16 encoder", function() { assert.equal(iconv.encode(testStr, "utf-16", {use: 'UTF-16LE'}).toString('hex'), utf16leBOM.toString('hex') + new Buffer(testStr, 'ucs2').toString('hex')); }); + + it('has correct length in bytes', function() { + assert.equal(iconv.byteLength(testStr, 'utf-16'), utf16leBuf.length); + }); }); describe("UTF-16 decoder", function() { diff --git a/test/utf7-test.js b/test/utf7-test.js index 1c21c02..260769e 100644 --- a/test/utf7-test.js +++ b/test/utf7-test.js @@ -85,6 +85,10 @@ describe("UTF-7 codec", function() { assert.equal(iconv.decode(new Buffer("+AMAA4A!Next"), 'utf-7'), "\u00c0\u00e0!Next"); }); + + it('has correct length in bytes', function() { + assert.equal(iconv.byteLength('A\u2262\u0391.', 'utf-7'), 'A+ImIDkQ-.'.length); + }); }); describe("UTF-7-IMAP codec", function() {