Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement byteLength method. #158

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ buf = iconv.encode("Sample input string", 'win1251');

// Check if encoding is supported
iconv.encodingExists("us-ascii")

// Calculate actual length in bytes.
len = iconv.byteLength('hello, world!', 'win1251')
```

### Streaming API (Node v0.10+)
Expand Down
7 changes: 7 additions & 0 deletions encodings/dbcs-codec.js
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,13 @@ DBCSEncoder.prototype.end = function() {
return newBuf.slice(0, j);
}

DBCSEncoder.prototype.byteLength = function(str) {
var res = this.write(str);
var trail = this.end();

return res.length + ((trail === undefined) ? 0 : trail.length);
}

// Export for testing
DBCSEncoder.prototype.findIdx = findIdx;

Expand Down
12 changes: 12 additions & 0 deletions encodings/internal.js
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,10 @@ InternalEncoder.prototype.write = function(str) {
InternalEncoder.prototype.end = function() {
}

InternalEncoder.prototype.byteLength = function(str) {
return Buffer.byteLength(str, this.enc)
}


//------------------------------------------------------------------------------
// Except base64 encoder, which must keep its state.
Expand All @@ -96,6 +100,10 @@ InternalEncoderBase64.prototype.end = function() {
return new Buffer(this.prevStr, "base64");
}

InternalEncoderBase64.prototype.byteLength = function(str) {
return Buffer.byteLength(str, "base64");
}


//------------------------------------------------------------------------------
// CESU-8 encoder is also special.
Expand Down Expand Up @@ -127,6 +135,10 @@ InternalEncoderCesu8.prototype.write = function(str) {
InternalEncoderCesu8.prototype.end = function() {
}

InternalEncoderCesu8.prototype.byteLength = function(str) {
return this.write(str).length;
}

//------------------------------------------------------------------------------
// CESU-8 decoder is not implemented in Node v4.0+

Expand Down
4 changes: 4 additions & 0 deletions encodings/sbcs-codec.js
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,10 @@ SBCSEncoder.prototype.write = function(str) {
return buf;
}

SBCSEncoder.prototype.byteLength = function(str) {
return str.length;
}

SBCSEncoder.prototype.end = function() {
}

Expand Down
8 changes: 8 additions & 0 deletions encodings/utf16.js
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ Utf16BEEncoder.prototype.write = function(str) {
Utf16BEEncoder.prototype.end = function() {
}

Utf16BEEncoder.prototype.byteLength = function(str) {
return Buffer.byteLength(str, 'ucs2');
}


// -- Decoding

Expand Down Expand Up @@ -98,6 +102,10 @@ Utf16Encoder.prototype.end = function() {
return this.encoder.end();
}

Utf16Encoder.prototype.byteLength = function(str) {
return Buffer.byteLength(str, 'ucs2');
}


// -- Decoding

Expand Down
4 changes: 4 additions & 0 deletions encodings/utf7.js
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ Utf7Encoder.prototype.write = function(str) {
Utf7Encoder.prototype.end = function() {
}

Utf7Encoder.prototype.byteLength = function(str) {
return this.write(str).length;
}


// -- Decoding

Expand Down
2 changes: 2 additions & 0 deletions lib/index.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ declare module 'iconv-lite' {

export function encodingExists(encoding: string): boolean;

export function byteLength(content: string, encoding: string): Number;

export function decodeStream(encoding: string, options?: Options): NodeJS.ReadWriteStream;

export function encodeStream(encoding: string, options?: Options): NodeJS.ReadWriteStream;
Expand Down
5 changes: 5 additions & 0 deletions lib/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,11 @@ iconv.getDecoder = function getDecoder(encoding, options) {
return decoder;
}

iconv.byteLength = function byteLength(str, encoding) {
str = "" + (str || ""); // Ensure string.

return iconv.getEncoder(encoding).byteLength(str)
}

// Load extensions in Node. All of them are omitted in Browserify build via 'browser' field in package.json.
var nodeVer = typeof process !== 'undefined' && process.versions && process.versions.node;
Expand Down
3 changes: 3 additions & 0 deletions test/cesu8-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,7 @@ describe("CESU-8 codec", function() {
assert.equal(iconv.decode(new Buffer("eda081edb080", 'hex'), "cesu8"), "𐐀");
assert.equal(iconv.decode(new Buffer("eda0bdedb8b1", 'hex'), "cesu8"), "😱");
});
it('has correct length in bytes', function() {
assert.equal(iconv.byteLength("😱", "cesu8"), Buffer.byteLength('eda0bdedb8b1', 'hex'));
});
});
14 changes: 13 additions & 1 deletion test/dbcs-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -243,5 +243,17 @@ describe("Full DBCS encoding tests", function() {
});
})(enc);
}
});

it('correct length in bytes', function() {
var testStringLatin1 = 'hello';

for (var enc in iconv.encodings) {
if (iconv.encodings[enc].type === '_dbcs') {
assert.equal(iconv.byteLength(testStringLatin1, enc), testStringLatin1.length);
return;
}
}

this.skip();
});
});
8 changes: 8 additions & 0 deletions test/main-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -78,4 +78,12 @@ describe("Generic UTF8-UCS2 tests", function() {
delete Object.prototype.permits;
delete Array.prototype.sample2;
});

it('Internal encodings has correct length in bytes', function() {
assert.equal(iconv.byteLength(testStringLatin1, 'utf8'), Buffer.byteLength(testStringLatin1, 'utf8'));
assert.equal(iconv.byteLength(testStringLatin1, 'ucs2'), Buffer.byteLength(testStringLatin1, 'ucs2'));
assert.equal(iconv.byteLength(testStringLatin1, 'binary'), Buffer.byteLength(testStringLatin1, 'binary'));
assert.equal(iconv.byteLength(testStringBase64, 'base64'), Buffer.byteLength(testStringBase64, 'base64'));
assert.equal(iconv.byteLength(testStringHex, 'hex'), Buffer.byteLength(testStringHex, 'hex'));
});
});
14 changes: 13 additions & 1 deletion test/sbcs-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -157,5 +157,17 @@ describe("Full SBCS encoding tests", function() {
assert.strictEqual(iconv.encode(str, enc).toString('hex'), convBack.convert(new Buffer(str)).toString('hex'));
*/
})(enc);
});

it('correct length in bytes', function() {
var testStringLatin1 = 'hello';

for (var enc in iconv.encodings) {
if (iconv.encodings[enc].type === '_sbcs') {
assert.equal(iconv.byteLength(testStringLatin1, enc), testStringLatin1.length);
return;
}
}

this.skip();
});
});
8 changes: 8 additions & 0 deletions test/utf16-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ describe("UTF-16BE codec", function() {
it("decodes uneven length buffers with no error", function() {
assert.equal(iconv.decode(new Buffer([0, 0x61, 0]), 'UTF16-BE'), "a");
});

it('has correct length in bytes', function() {
assert.equal(iconv.byteLength(testStr, 'UTF16-BE'), utf16beBuf.length);
});
});

describe("UTF-16 encoder", function() {
Expand All @@ -31,6 +35,10 @@ describe("UTF-16 encoder", function() {
assert.equal(iconv.encode(testStr, "utf-16", {use: 'UTF-16LE'}).toString('hex'),
utf16leBOM.toString('hex') + new Buffer(testStr, 'ucs2').toString('hex'));
});

it('has correct length in bytes', function() {
assert.equal(iconv.byteLength(testStr, 'utf-16'), utf16leBuf.length);
});
});

describe("UTF-16 decoder", function() {
Expand Down
4 changes: 4 additions & 0 deletions test/utf7-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,10 @@ describe("UTF-7 codec", function() {
assert.equal(iconv.decode(new Buffer("+AMAA4A!Next"), 'utf-7'), "\u00c0\u00e0!Next");

});

it('has correct length in bytes', function() {
assert.equal(iconv.byteLength('A\u2262\u0391.', 'utf-7'), 'A+ImIDkQ-.'.length);
});
});

describe("UTF-7-IMAP codec", function() {
Expand Down