| 1 | // Copyright 2014-2017 The html5ever Project Developers. See the |
| 2 | // COPYRIGHT file at the top-level directory of this distribution. |
| 3 | // |
| 4 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
| 5 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
| 6 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
| 7 | // option. This file may not be copied, modified, or distributed |
| 8 | // except according to those terms. |
| 9 | |
| 10 | //! This module contains a single struct [`SmallCharSet`]. See its documentation for details. |
| 11 | //! |
| 12 | //! [`SmallCharSet`]: struct.SmallCharSet.html |
| 13 | |
| 14 | /// Represents a set of "small characters", those with Unicode scalar |
| 15 | /// values less than 64. |
| 16 | /// |
| 17 | /// This is stored as a bitmap, with 1 bit for each value. |
| 18 | #[derive (Debug, Eq, PartialEq, Clone, Copy, Hash)] |
| 19 | pub struct SmallCharSet { |
| 20 | pub bits: u64, |
| 21 | } |
| 22 | |
| 23 | impl SmallCharSet { |
| 24 | /// Checks whether a character (u8 value below 64) is stored in the SmallCharSet. |
| 25 | /// |
| 26 | /// # Examples |
| 27 | /// |
| 28 | /// ```ignore |
| 29 | /// # use markup5ever::SmallCharSet; |
| 30 | /// let set = SmallCharSet { |
| 31 | /// bits: 0b00000000_01000000_00000100_00000000_00000000_00000000_00010000_00000000 |
| 32 | /// }; |
| 33 | /// assert!(set.contains(64)); |
| 34 | /// assert!(set.contains(b'6' )); // `b'6'` is the same as 64u8 |
| 35 | /// ``` |
| 36 | #[inline ] |
| 37 | fn contains(&self, n: u8) -> bool { |
| 38 | 0 != (self.bits & (1 << (n as usize))) |
| 39 | } |
| 40 | |
| 41 | /// Count the number of bytes of characters at the beginning of `buf` which are not in the set. |
| 42 | /// |
| 43 | /// This functionality is used in [`BufferQueue::pop_except_from`]. |
| 44 | /// |
| 45 | /// # Examples |
| 46 | /// |
| 47 | /// ``` |
| 48 | /// # #[macro_use ] extern crate markup5ever; |
| 49 | /// # fn main() { |
| 50 | /// let set = small_char_set!(48 49 50); // '0' '1' '2' |
| 51 | /// // `test` is 4 chars, ๐ is 4 chars, then we meet a character in the set |
| 52 | /// let test_str = "test๐01232afd" ; |
| 53 | /// assert_eq!(set.nonmember_prefix_len(test_str), 8); |
| 54 | /// # } |
| 55 | /// ``` |
| 56 | /// |
| 57 | /// [`BufferQueue::pop_except_from`]: buffer_queue/struct.BufferQueue.html#method.pop_except_from |
| 58 | pub fn nonmember_prefix_len(&self, buf: &str) -> u32 { |
| 59 | let mut n = 0; |
| 60 | for b in buf.bytes() { |
| 61 | if b >= 64 || !self.contains(b) { |
| 62 | n += 1; |
| 63 | } else { |
| 64 | break; |
| 65 | } |
| 66 | } |
| 67 | n |
| 68 | } |
| 69 | } |
| 70 | |
| 71 | #[cfg (test)] |
| 72 | mod test { |
| 73 | #[test ] |
| 74 | fn nonmember_prefix() { |
| 75 | for &c in ['&' , ' \0' ].iter() { |
| 76 | for x in 0..48u32 { |
| 77 | for y in 0..48u32 { |
| 78 | let mut s = "x" .repeat(x as usize); |
| 79 | s.push(c); |
| 80 | s.push_str(&"x" .repeat(y as usize)); |
| 81 | let set = small_char_set!('&' ' \0' ); |
| 82 | |
| 83 | assert_eq!(x, set.nonmember_prefix_len(&s)); |
| 84 | } |
| 85 | } |
| 86 | } |
| 87 | } |
| 88 | } |
| 89 | |