diff --git a/anagrams/src/lib.rs b/anagrams/src/lib.rs index ac5c8e7..9003efe 100644 --- a/anagrams/src/lib.rs +++ b/anagrams/src/lib.rs @@ -1,4 +1,4 @@ -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; #[cfg(not(target_arch = "wasm32"))] use std::fs::File; use std::hash::{Hash, Hasher}; @@ -20,13 +20,13 @@ impl Dict { let words: std::io::Result> = file .lines() .enumerate() - .filter_map(|(i, e)| (i != 0).then(|| e)) // Skip the first line (size of the dict) + .filter_map(|(i, e)| (i != 0).then(|| e)) // Skip the first line ( = to size of the dict) .into_iter() .collect(); let words = words?; Ok(Self { words, - nb_wild_cards: 1, + nb_wild_cards: 2, wild_card_char: '?', }) } @@ -36,7 +36,7 @@ impl Dict { words: string .lines() .enumerate() - .filter_map(|(i, e)| (i != 0).then(|| e.into())) + .filter_map(|(i, e)| (i != 0).then(|| e.into())) // Skip the first line ( = to size of the dict) .into_iter() .collect(), nb_wild_cards, @@ -58,12 +58,28 @@ impl Dict { /// The precomputed hash map. The words are regrouped in vector, indexed by there /// [`FrequencyHash`]. /// -/// Hash maps have an average access complexity in O(1) -pub struct AnagramDict(HashMap>); +/// Hash maps have an average access complexity in O(|letters|^{max(0, l-nb_wild_cards)}) where l +/// is the number of wild cards in the query +pub struct AnagramDict { + /// The precomputed anagrams classes of equivalences indexed by there caracter + /// frequency + map: HashMap>, + /// Number of precomputed wild cards per word + nb_wild_cards: u8, + /// The character used as a wild card + wild_card_char: char, + /// The set of characters used in the dict + letters: HashSet, +} impl From<&Dict> for AnagramDict { fn from(dict: &Dict) -> Self { - let mut map = Self(HashMap::>::new()); + let mut map = Self { + map: HashMap::>::new(), + nb_wild_cards: dict.nb_wild_cards.clone(), + wild_card_char: dict.wild_card_char.clone(), + letters: HashSet::::new(), + }; #[cfg(not(target_arch = "wasm32"))] let mut i = 0; @@ -71,6 +87,12 @@ impl From<&Dict> for AnagramDict { let len = dict.words.len(); for word in dict.words.iter() { let freq = FrequencyHash::compute(word); + for char_ in word.chars() { + if char_ != map.wild_card_char { + map.letters.insert(char_); + } + } + map.add_word_with_wild_card(freq, word, dict.nb_wild_cards, dict.wild_card_char); #[cfg(not(target_arch = "wasm32"))] { @@ -86,8 +108,33 @@ impl AnagramDict { /// Find all anagrams to a word present in this dict pub fn find(&self, word: &str) -> Option> { let freq = FrequencyHash::compute(word); - self.0.get(&freq).cloned() + self.find_freq(freq) } + + /// Recursivelly resolve wild cards until there is less wildcards than [`self.nb_wild_cards`] + fn find_freq(&self, mut freq: FrequencyHash) -> Option> { + if freq.get_freq(self.wild_card_char) > self.nb_wild_cards { + let mut result = vec![]; + freq.remove_one_char(self.wild_card_char); + for char_ in self.letters.iter() { + let mut freq = freq.clone(); + freq.add_one_char(char_.clone()); + if let Some(anagrams) = self.find_freq(freq) { + for anagram in anagrams { + result.push(anagram); + } + } + } + if result.is_empty() { + None + } else { + Some(result) + } + } else { + self.map.get(&freq).cloned() + } + } + fn add_word_with_wild_card( &mut self, freq: FrequencyHash, @@ -95,7 +142,7 @@ impl AnagramDict { nb_wild_card: u8, wild_card_symbole: char, ) { - self.0 + self.map .entry(freq.clone()) .and_modify(|anagrams| anagrams.push(word.to_string())) .or_insert(vec![word.to_string()]); @@ -158,6 +205,15 @@ impl FrequencyHash { .and_modify(|counter| *counter += 1) .or_insert(1); } + + /// Return the number of occurence of a char in the word + fn get_freq(&self, char_: char) -> u8 { + if let Some(freq) = self.0.get(&char_) { + freq.clone() + } else { + 0 + } + } } impl Hash for FrequencyHash { diff --git a/web_gui/index.html b/web_gui/index.html index b808bdf..f52d959 100644 --- a/web_gui/index.html +++ b/web_gui/index.html @@ -88,14 +88,10 @@

Load dict.dat before starting

-

Wildcards are implemented but increase a lot the precomputing complexity

-

The precomputation time has a complexity if \(\mathcal{O}(\sum_{i=0}^W \binom{S}{i} . n)\) where \(S\) is the maximum size of the words, \(W\) the maximum number of wild card, and \(n\) the size of the dictionary. For \(S = 15\) and \(W = 3\), this is more or less \(500 . n\)

+

The precomputation has a time and space complexity of \(\mathcal{O}(\sum_{i=0}^W \binom{S}{i} . n)\) where \(S\) is the maximum size of the words, \(W\) the maximum number of precomputed wildcards per word, and \(n\) the size of the dictionary. For \(S = 15\) and \(W = 3\), this is more or less \(500 . n\), 2 precomputed wildcards is a good default setting.

+

The query has a time complexity of \(\mathcal{O}(|S|^{max(0, W'-W)})\) where \(W'\) is the number of wildcards in the query.

The wildcard caracter is '?'

Sources in rust are available here

-

TODO:

-
    -
  • Allow query-time resolution of wild cards when there are more wildcards than the number of precomputed wild cards
  • -