allow quert time resolution of wildcard
This commit is contained in:
parent
894b0be052
commit
323e77b4e4
|
@ -1,4 +1,4 @@
|
|||
use std::collections::HashMap;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
#[cfg(not(target_arch = "wasm32"))]
|
||||
use std::fs::File;
|
||||
use std::hash::{Hash, Hasher};
|
||||
|
@ -20,13 +20,13 @@ impl Dict {
|
|||
let words: std::io::Result<Vec<_>> = file
|
||||
.lines()
|
||||
.enumerate()
|
||||
.filter_map(|(i, e)| (i != 0).then(|| e)) // Skip the first line (size of the dict)
|
||||
.filter_map(|(i, e)| (i != 0).then(|| e)) // Skip the first line ( = to size of the dict)
|
||||
.into_iter()
|
||||
.collect();
|
||||
let words = words?;
|
||||
Ok(Self {
|
||||
words,
|
||||
nb_wild_cards: 1,
|
||||
nb_wild_cards: 2,
|
||||
wild_card_char: '?',
|
||||
})
|
||||
}
|
||||
|
@ -36,7 +36,7 @@ impl Dict {
|
|||
words: string
|
||||
.lines()
|
||||
.enumerate()
|
||||
.filter_map(|(i, e)| (i != 0).then(|| e.into()))
|
||||
.filter_map(|(i, e)| (i != 0).then(|| e.into())) // Skip the first line ( = to size of the dict)
|
||||
.into_iter()
|
||||
.collect(),
|
||||
nb_wild_cards,
|
||||
|
@ -58,12 +58,28 @@ impl Dict {
|
|||
/// The precomputed hash map. The words are regrouped in vector, indexed by there
|
||||
/// [`FrequencyHash`].
|
||||
///
|
||||
/// Hash maps have an average access complexity in O(1)
|
||||
pub struct AnagramDict(HashMap<FrequencyHash, Vec<String>>);
|
||||
/// Hash maps have an average access complexity in O(|letters|^{max(0, l-nb_wild_cards)}) where l
|
||||
/// is the number of wild cards in the query
|
||||
pub struct AnagramDict {
|
||||
/// The precomputed anagrams classes of equivalences indexed by there caracter
|
||||
/// frequency
|
||||
map: HashMap<FrequencyHash, Vec<String>>,
|
||||
/// Number of precomputed wild cards per word
|
||||
nb_wild_cards: u8,
|
||||
/// The character used as a wild card
|
||||
wild_card_char: char,
|
||||
/// The set of characters used in the dict
|
||||
letters: HashSet<char>,
|
||||
}
|
||||
|
||||
impl From<&Dict> for AnagramDict {
|
||||
fn from(dict: &Dict) -> Self {
|
||||
let mut map = Self(HashMap::<FrequencyHash, Vec<String>>::new());
|
||||
let mut map = Self {
|
||||
map: HashMap::<FrequencyHash, Vec<String>>::new(),
|
||||
nb_wild_cards: dict.nb_wild_cards.clone(),
|
||||
wild_card_char: dict.wild_card_char.clone(),
|
||||
letters: HashSet::<char>::new(),
|
||||
};
|
||||
|
||||
#[cfg(not(target_arch = "wasm32"))]
|
||||
let mut i = 0;
|
||||
|
@ -71,6 +87,12 @@ impl From<&Dict> for AnagramDict {
|
|||
let len = dict.words.len();
|
||||
for word in dict.words.iter() {
|
||||
let freq = FrequencyHash::compute(word);
|
||||
for char_ in word.chars() {
|
||||
if char_ != map.wild_card_char {
|
||||
map.letters.insert(char_);
|
||||
}
|
||||
}
|
||||
|
||||
map.add_word_with_wild_card(freq, word, dict.nb_wild_cards, dict.wild_card_char);
|
||||
#[cfg(not(target_arch = "wasm32"))]
|
||||
{
|
||||
|
@ -86,8 +108,33 @@ impl AnagramDict {
|
|||
/// Find all anagrams to a word present in this dict
|
||||
pub fn find(&self, word: &str) -> Option<Vec<String>> {
|
||||
let freq = FrequencyHash::compute(word);
|
||||
self.0.get(&freq).cloned()
|
||||
self.find_freq(freq)
|
||||
}
|
||||
|
||||
/// Recursivelly resolve wild cards until there is less wildcards than [`self.nb_wild_cards`]
|
||||
fn find_freq(&self, mut freq: FrequencyHash) -> Option<Vec<String>> {
|
||||
if freq.get_freq(self.wild_card_char) > self.nb_wild_cards {
|
||||
let mut result = vec![];
|
||||
freq.remove_one_char(self.wild_card_char);
|
||||
for char_ in self.letters.iter() {
|
||||
let mut freq = freq.clone();
|
||||
freq.add_one_char(char_.clone());
|
||||
if let Some(anagrams) = self.find_freq(freq) {
|
||||
for anagram in anagrams {
|
||||
result.push(anagram);
|
||||
}
|
||||
}
|
||||
}
|
||||
if result.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(result)
|
||||
}
|
||||
} else {
|
||||
self.map.get(&freq).cloned()
|
||||
}
|
||||
}
|
||||
|
||||
fn add_word_with_wild_card(
|
||||
&mut self,
|
||||
freq: FrequencyHash,
|
||||
|
@ -95,7 +142,7 @@ impl AnagramDict {
|
|||
nb_wild_card: u8,
|
||||
wild_card_symbole: char,
|
||||
) {
|
||||
self.0
|
||||
self.map
|
||||
.entry(freq.clone())
|
||||
.and_modify(|anagrams| anagrams.push(word.to_string()))
|
||||
.or_insert(vec![word.to_string()]);
|
||||
|
@ -158,6 +205,15 @@ impl FrequencyHash {
|
|||
.and_modify(|counter| *counter += 1)
|
||||
.or_insert(1);
|
||||
}
|
||||
|
||||
/// Return the number of occurence of a char in the word
|
||||
fn get_freq(&self, char_: char) -> u8 {
|
||||
if let Some(freq) = self.0.get(&char_) {
|
||||
freq.clone()
|
||||
} else {
|
||||
0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Hash for FrequencyHash {
|
||||
|
|
|
@ -88,14 +88,10 @@
|
|||
<body>
|
||||
<div id="main_div">
|
||||
<p> Load <a href="./dict.dat">dict.dat</a> before starting </p>
|
||||
<p> Wildcards are implemented but increase a lot the precomputing complexity</p>
|
||||
<p> The precomputation time has a complexity if \(\mathcal{O}(\sum_{i=0}^W \binom{S}{i} . n)\) where \(S\) is the maximum size of the words, \(W\) the maximum number of wild card, and \(n\) the size of the dictionary. For \(S = 15\) and \(W = 3\), this is more or less \(500 . n\)</p>
|
||||
<p> The precomputation has a time and space complexity of \(\mathcal{O}(\sum_{i=0}^W \binom{S}{i} . n)\) where \(S\) is the maximum size of the words, \(W\) the maximum number of precomputed wildcards per word, and \(n\) the size of the dictionary. For \(S = 15\) and \(W = 3\), this is more or less \(500 . n\), 2 precomputed wildcards is a good default setting.</p>
|
||||
<p> The query has a time complexity of \(\mathcal{O}(|S|^{max(0, W'-W)})\) where \(W'\) is the number of wildcards in the query. </p>
|
||||
<p> The wildcard caracter is '?'</p>
|
||||
<p> Sources in rust are available <a href="./lib.rs">here</a> </p>
|
||||
<h3> TODO: </h3>
|
||||
<ul>
|
||||
<li> Allow query-time resolution of wild cards when there are more wildcards than the number of precomputed wild cards</li>
|
||||
</ul>
|
||||
<div id="upload_dict_div">
|
||||
<label id="upload_dict_label" for="upload_dict">Select the dictionary</label>
|
||||
<input type="file" id="upload_dict" style="opacity:0">
|
||||
|
|
Loading…
Reference in a new issue