From e9e73226cd49ed764353017c71550fb5f84d55c7 Mon Sep 17 00:00:00 2001 From: Yarchik Date: Thu, 25 Jun 2026 20:21:41 +0100 Subject: [PATCH] fix: encode lone UTF-16 surrogates as U+FFFD to match native bcrypt A password containing an unpaired UTF-16 surrogate was encoded as WTF-8 (e.g. ED A0 80 for U+D800) by utf8Array, whereas Buffer.from(str, "utf8"), TextEncoder, and the native C++ bcrypt binding all map a lone surrogate to the U+FFFD replacement character (EF BF BD). The differing key bytes made bcryptjs hashes of such passwords non-verifiable across implementations, contrary to the README's "Compatible to the C++ bcrypt binding" claim. Emit EF BF BD for any code unit in the surrogate range that is not part of a valid pair. utf8Length already counts a lone surrogate as 3 bytes, so the buffer sizing is unchanged. --- index.js | 7 +++++++ tests/index.js | 19 +++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/index.js b/index.js index 4a19b3d..288a1cb 100644 --- a/index.js +++ b/index.js @@ -380,6 +380,13 @@ function utf8Array(string) { buffer[offset++] = ((c1 >> 12) & 63) | 128; buffer[offset++] = ((c1 >> 6) & 63) | 128; buffer[offset++] = (c1 & 63) | 128; + } else if ((c1 & 0xf800) === 0xd800) { + // Lone surrogate (not part of a valid pair, handled above). Emit the + // U+FFFD replacement character to match Buffer.from(str, "utf8") and the + // native bcrypt binding, rather than WTF-8 encoding the raw code unit. + buffer[offset++] = 0xef; + buffer[offset++] = 0xbf; + buffer[offset++] = 0xbd; } else { buffer[offset++] = (c1 >> 12) | 224; buffer[offset++] = ((c1 >> 6) & 63) | 128; diff --git a/tests/index.js b/tests/index.js index 525a44d..96ab514 100644 --- a/tests/index.js +++ b/tests/index.js @@ -211,6 +211,25 @@ const tests = [ } done(); }, + function compat_lone_surrogate(done) { + // Passwords containing an unpaired UTF-16 surrogate must hash identically + // to the native C++ binding, which encodes a lone surrogate the same way + // as Buffer.from(str, "utf8"): as U+FFFD (EF BF BD), not WTF-8 (ED A0 80). + var salt = "$2b$06$DCq7YPn5Rq63x1Lad4cll."; + var pass = [ + "\uD800", // high surrogate, alone + "\uDC00", // low surrogate, alone + "\uD83D", // high surrogate of an emoji, without its low half + "a\uD800b", // lone surrogate surrounded by ASCII + "\uDC00\uD800", // low then high: still two lone surrogates + ]; + for (var i = 0; i < pass.length; i++) { + var hash1 = bcryptcpp.hashSync(pass[i], salt), + hash2 = bcrypt.hashSync(pass[i], salt); + assert.equal(hash1, hash2); + } + done(); + }, function compat_rounds(done) { var salt1 = bcrypt.genSaltSync(0), // $10$ like not set salt2 = bcryptcpp.genSaltSync(0);