diff --git a/src/html/local_name.rs b/src/html/local_name.rs
index a334e000..0d267c0a 100644
--- a/src/html/local_name.rs
+++ b/src/html/local_name.rs
@@ -26,53 +26,60 @@ use encoding_rs::Encoding;
// we are safe here, since we'll just get first character shifted left
// by zeroes as repetitave 1 digits get added to the hash.
//
+// LocalNameHash is built incrementally as tags are parsed, so it needs
+// to be able to invalidate itself if parsing an unrepresentable name.
+// `EMPTY_HASH` is used as a sentinel value.
+//
// Pub only for integration tests
#[derive(Debug, PartialEq, Eq, Copy, Clone, Default, Hash)]
-pub struct LocalNameHash(Option);
+pub struct LocalNameHash(u64);
+
+const EMPTY_HASH: u64 = !0;
impl LocalNameHash {
#[inline]
#[must_use]
pub const fn new() -> Self {
- Self(Some(0))
+ Self(0)
}
#[inline]
#[must_use]
pub const fn is_empty(&self) -> bool {
- self.0.is_none()
+ self.0 == EMPTY_HASH
}
#[inline]
pub fn update(&mut self, ch: u8) {
- if let Some(h) = self.0 {
- // NOTE: check if we still have space for yet another
- // character and if not then invalidate the hash.
- // Note, that we can't have `1` (which is encoded as 0b00000) as
- // a first character of a tag name, so it's safe to perform
- // check this way.
- self.0 = if h >> (64 - 5) == 0 {
- match ch {
- // NOTE: apply 0x1F mask on ASCII alpha to convert it to the
- // number from 1 to 26 (character case is controlled by one of
- // upper bits which we eliminate with the mask). Then add
- // 5, since numbers from 0 to 5 are reserved for digits.
- // Aftwerards put result as 5 lower bits of the hash.
- b'a'..=b'z' | b'A'..=b'Z' => Some((h << 5) | ((u64::from(ch) & 0x1F) + 5)),
-
- // NOTE: apply 0x0F mask on ASCII digit to convert it to number
- // from 1 to 6. Then subtract 1 to make it zero-based.
- // Afterwards, put result as lower bits of the hash.
- b'1'..=b'6' => Some((h << 5) | ((u64::from(ch) & 0x0F) - 1)),
-
- // NOTE: for any other characters hash function is not
- // applicable, so we completely invalidate the hash.
- _ => None,
- }
- } else {
- None
- };
- }
+ let h = self.0;
+
+ // NOTE: check if we still have space for yet another
+ // character and if not then invalidate the hash.
+ // Note, that we can't have `1` (which is encoded as 0b00000) as
+ // a first character of a tag name, so it's safe to perform
+ // check this way.
+ // EMPTY_HASH has all bits set, so it will fail this check.
+ self.0 = if h >> (64 - 5) == 0 {
+ match ch {
+ // NOTE: apply 0x1F mask on ASCII alpha to convert it to the
+ // number from 1 to 26 (character case is controlled by one of
+ // upper bits which we eliminate with the mask). Then add
+ // 5, since numbers from 0 to 5 are reserved for digits.
+ // Aftwerards put result as 5 lower bits of the hash.
+ b'a'..=b'z' | b'A'..=b'Z' => (h << 5) | ((u64::from(ch) & 0x1F) + 5),
+
+ // NOTE: apply 0x0F mask on ASCII digit to convert it to number
+ // from 1 to 6. Then subtract 1 to make it zero-based.
+ // Afterwards, put result as lower bits of the hash.
+ b'1'..=b'6' => (h << 5) | ((u64::from(ch) & 0x0F) - 1),
+
+ // NOTE: for any other characters hash function is not
+ // applicable, so we completely invalidate the hash.
+ _ => EMPTY_HASH,
+ }
+ } else {
+ EMPTY_HASH
+ };
}
}
@@ -92,10 +99,7 @@ impl From<&str> for LocalNameHash {
impl PartialEq for LocalNameHash {
#[inline]
fn eq(&self, tag: &Tag) -> bool {
- match self.0 {
- Some(h) => *tag as u64 == h,
- None => false,
- }
+ self.0 == *tag as u64
}
}
@@ -159,7 +163,10 @@ impl PartialEq> for LocalName<'_> {
use LocalName::{Bytes, Hash};
match (self, other) {
- (Hash(s), Hash(o)) => s == o,
+ (Hash(s), Hash(o)) => {
+ debug_assert!(!s.is_empty());
+ s == o
+ }
(Bytes(s), Bytes(o)) => s.eq_ignore_ascii_case(o),
_ => false,
}
@@ -172,7 +179,7 @@ mod tests {
#[test]
fn from_str() {
- assert_eq!(LocalNameHash::from("div"), LocalNameHash(Some(9691u64)));
+ assert_eq!(LocalNameHash::from("div"), LocalNameHash(9691u64));
}
#[test]