allow quert time resolution of wildcard

2023-02-24 20:32:40 +01:00 · 2023-02-24 20:32:40 +01:00 · bfe039c6d7
commit bfe039c6d7
parent 1c832b747a
2 changed files with 67 additions and 15 deletions
--- a/anagrams/src/lib.rs
+++ b/anagrams/src/lib.rs
@ -1,4 +1,4 @@
-use std::collections::HashMap;
+use std::collections::{HashMap, HashSet};
 #[cfg(not(target_arch = "wasm32"))]
 use std::fs::File;
 use std::hash::{Hash, Hasher};
@ -20,13 +20,13 @@ impl Dict {
        let words: std::io::Result<Vec<_>> = file
            .lines()
            .enumerate()
-            .filter_map(|(i, e)| (i != 0).then(|| e)) // Skip the first line (size of the dict)
+            .filter_map(|(i, e)| (i != 0).then(|| e)) // Skip the first line ( = to size of the dict)
            .into_iter()
            .collect();
        let words = words?;
        Ok(Self {
            words,
-            nb_wild_cards: 1,
+            nb_wild_cards: 2,
            wild_card_char: '?',
        })
    }
@ -36,7 +36,7 @@ impl Dict {
            words: string
                .lines()
                .enumerate()
-                .filter_map(|(i, e)| (i != 0).then(|| e.into()))
+                .filter_map(|(i, e)| (i != 0).then(|| e.into())) // Skip the first line ( = to size of the dict)
                .into_iter()
                .collect(),
            nb_wild_cards,
@ -58,12 +58,28 @@ impl Dict {
 /// The precomputed hash map. The words are regrouped in vector, indexed by there
 /// [`FrequencyHash`].
 ///
-/// Hash maps have an average access complexity in O(1)
-pub struct AnagramDict(HashMap<FrequencyHash, Vec<String>>);
+/// Hash maps have an average access complexity in O(|letters|^{max(0, l-nb_wild_cards)}) where l
+/// is the number of wild cards in the query
+pub struct AnagramDict {
+    /// The precomputed anagrams classes of equivalences indexed by there caracter
+    /// frequency
+    map: HashMap<FrequencyHash, Vec<String>>,
+    /// Number of precomputed wild cards per word
+    nb_wild_cards: u8,
+    /// The character used as a wild card
+    wild_card_char: char,
+    /// The set of characters used in the dict
+    letters: HashSet<char>,
+}

 impl From<&Dict> for AnagramDict {
    fn from(dict: &Dict) -> Self {
-        let mut map = Self(HashMap::<FrequencyHash, Vec<String>>::new());
+        let mut map = Self {
+            map: HashMap::<FrequencyHash, Vec<String>>::new(),
+            nb_wild_cards: dict.nb_wild_cards.clone(),
+            wild_card_char: dict.wild_card_char.clone(),
+            letters: HashSet::<char>::new(),
+        };

        #[cfg(not(target_arch = "wasm32"))]
        let mut i = 0;
@ -71,6 +87,12 @@ impl From<&Dict> for AnagramDict {
        let len = dict.words.len();
        for word in dict.words.iter() {
            let freq = FrequencyHash::compute(word);
+            for char_ in word.chars() {
+                if char_ != map.wild_card_char {
+                    map.letters.insert(char_);
+                }
+            }
+
            map.add_word_with_wild_card(freq, word, dict.nb_wild_cards, dict.wild_card_char);
            #[cfg(not(target_arch = "wasm32"))]
            {
@ -86,8 +108,33 @@ impl AnagramDict {
    /// Find all anagrams to a word present in this dict
    pub fn find(&self, word: &str) -> Option<Vec<String>> {
        let freq = FrequencyHash::compute(word);
-        self.0.get(&freq).cloned()
+        self.find_freq(freq)
    }
+
+    /// Recursivelly resolve wild cards until there is less wildcards than [`self.nb_wild_cards`]
+    fn find_freq(&self, mut freq: FrequencyHash) -> Option<Vec<String>> {
+        if freq.get_freq(self.wild_card_char) > self.nb_wild_cards {
+            let mut result = vec![];
+            freq.remove_one_char(self.wild_card_char);
+            for char_ in self.letters.iter() {
+                let mut freq = freq.clone();
+                freq.add_one_char(char_.clone());
+                if let Some(anagrams) = self.find_freq(freq) {
+                    for anagram in anagrams {
+                        result.push(anagram);
+                    }
+                }
+            }
+            if result.is_empty() {
+                None
+            } else {
+                Some(result)
+            }
+        } else {
+            self.map.get(&freq).cloned()
+        }
+    }
+
    fn add_word_with_wild_card(
        &mut self,
        freq: FrequencyHash,
@ -95,7 +142,7 @@ impl AnagramDict {
        nb_wild_card: u8,
        wild_card_symbole: char,
    ) {
-        self.0
+        self.map
            .entry(freq.clone())
            .and_modify(|anagrams| anagrams.push(word.to_string()))
            .or_insert(vec![word.to_string()]);
@ -158,6 +205,15 @@ impl FrequencyHash {
            .and_modify(|counter| *counter += 1)
            .or_insert(1);
    }
+
+    /// Return the number of occurence of a char in the word
+    fn get_freq(&self, char_: char) -> u8 {
+        if let Some(freq) = self.0.get(&char_) {
+            freq.clone()
+        } else {
+            0
+        }
+    }
 }

 impl Hash for FrequencyHash {
--- a/web_gui/index.html
+++ b/web_gui/index.html
@ -88,14 +88,10 @@
  <body>
    <div id="main_div">
    <p> Load <a href="./dict.dat">dict.dat</a> before starting </p>
-    <p> Wildcards are implemented but increase a lot the precomputing complexity</p>
-    <p> The precomputation time has a complexity if \(\mathcal{O}(\sum_{i=0}^W \binom{S}{i} . n)\) where \(S\) is the maximum size of the words, \(W\) the maximum number of wild card, and \(n\) the size of the dictionary. For \(S = 15\) and \(W = 3\), this is more or less \(500 . n\)</p>
+    <p> The precomputation has a time and space complexity of \(\mathcal{O}(\sum_{i=0}^W \binom{S}{i} . n)\) where \(S\) is the maximum size of the words, \(W\) the maximum number of precomputed wildcards per word, and \(n\) the size of the dictionary. For \(S = 15\) and \(W = 3\), this is more or less \(500 . n\), 2 precomputed wildcards is a good default setting.</p>
+    <p> The query has a time complexity of \(\mathcal{O}(|S|^{max(0, W'-W)})\) where \(W'\) is the number of wildcards in the query. </p>
    <p> The wildcard caracter is '?'</p>
    <p> Sources in rust are available <a href="./lib.rs">here</a> </p>
-    <h3> TODO: </h3>
-    <ul>
-      <li> Allow query-time resolution of wild cards when there are more wildcards than the number of precomputed wild cards</li>
-    </ul>
    <div id="upload_dict_div">
      <label id="upload_dict_label" for="upload_dict">Select the dictionary</label>
      <input type="file" id="upload_dict" style="opacity:0">