Compare commits

...

5 commits

12 changed files with 387049 additions and 1 deletions

View file

@ -1,2 +1,3 @@
# anagrams
# Anagrams
Compute anagrams.

1
anagrams/.gitignore vendored Normal file
View file

@ -0,0 +1 @@
/target

7
anagrams/Cargo.lock generated Normal file
View file

@ -0,0 +1,7 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "anagrams"
version = "0.1.0"

8
anagrams/Cargo.toml Normal file
View file

@ -0,0 +1,8 @@
[package]
name = "anagrams"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]

386265
anagrams/dict.dat Executable file

File diff suppressed because it is too large Load diff

230
anagrams/src/lib.rs Normal file
View file

@ -0,0 +1,230 @@
use std::collections::{HashMap, HashSet};
#[cfg(not(target_arch = "wasm32"))]
use std::fs::File;
use std::hash::{Hash, Hasher};
#[cfg(not(target_arch = "wasm32"))]
use std::io::{self, BufRead, BufReader, Write};
pub struct Dict {
words: Vec<String>,
nb_wild_cards: u8,
wild_card_char: char,
}
/// Simple dictionnary, nothing exiting here
impl Dict {
/// Load the the dictionnary
#[cfg(not(target_arch = "wasm32"))]
pub fn load() -> io::Result<Self> {
let file = BufReader::new(File::open("dict.dat")?);
let words: std::io::Result<Vec<_>> = file
.lines()
.enumerate()
.filter_map(|(i, e)| (i != 0).then_some(e)) // Skip the first line ( = to size of the dict)
.into_iter()
.collect();
let words = words?;
Ok(Self {
words,
nb_wild_cards: 2,
wild_card_char: '?',
})
}
pub fn load_from_str(string: &str, nb_wild_cards: u8, wild_card_char: char) -> Self {
Self {
words: string
.lines()
.enumerate()
.filter_map(|(i, e)| (i != 0).then(|| e.into())) // Skip the first line ( = to size of the dict)
.into_iter()
.collect(),
nb_wild_cards,
wild_card_char,
}
}
/*
/// Find a word in the dict
fn find(&self, word: &str) -> Option<Vec<String>> {
self.0
.iter()
.find(|word_d| word_d.as_str() == word)
.map(|word| vec![word.clone()])
}
*/
}
/// The precomputed hash map. The words are regrouped in vector, indexed by there
/// [`FrequencyHash`].
///
/// Hash maps have an average access complexity in O(|letters|^{max(0, l-nb_wild_cards)}) where l
/// is the number of wild cards in the query
pub struct AnagramDict {
/// The precomputed anagrams classes of equivalences indexed by there caracter
/// frequency
map: HashMap<FrequencyHash, Vec<String>>,
/// Number of precomputed wild cards per word
nb_wild_cards: u8,
/// The character used as a wild card
wild_card_char: char,
/// The set of characters used in the dict
letters: HashSet<char>,
}
impl From<&Dict> for AnagramDict {
fn from(dict: &Dict) -> Self {
let mut map = Self {
map: HashMap::<FrequencyHash, Vec<String>>::new(),
nb_wild_cards: dict.nb_wild_cards,
wild_card_char: dict.wild_card_char,
letters: HashSet::<char>::new(),
};
#[cfg(not(target_arch = "wasm32"))]
let mut i = 0;
#[cfg(not(target_arch = "wasm32"))]
let len = dict.words.len();
for word in dict.words.iter() {
let freq = FrequencyHash::compute(word);
word.chars().for_each(|char_| {
if char_ != map.wild_card_char {
map.letters.insert(char_);
}
});
map.add_word_with_wild_card(freq, word, dict.nb_wild_cards, dict.wild_card_char);
#[cfg(not(target_arch = "wasm32"))]
{
// Show avancement when running with a stding available
i += 1;
print!("{}%\r", (100 * i) / len);
io::stdout().flush().unwrap();
}
}
map
}
}
impl AnagramDict {
/// Find all anagrams to a word present in this dict
pub fn find(&self, word: &str) -> Option<Vec<String>> {
let freq = FrequencyHash::compute(word);
self.find_freq(freq)
}
/// Recursivelly resolve wild cards until there is less wildcards than [`self.nb_wild_cards`]
fn find_freq(&self, mut freq: FrequencyHash) -> Option<Vec<String>> {
if freq.get_freq(self.wild_card_char) > self.nb_wild_cards {
let mut result = vec![];
freq.remove_one_char(self.wild_card_char);
self.letters.iter().for_each(|char_| {
let mut freq = freq.clone();
freq.add_one_char(*char_);
if let Some(anagrams) = self.find_freq(freq) {
anagrams
.iter()
.for_each(|anagram| result.push(anagram.clone()));
}
});
if result.is_empty() {
None
} else {
Some(result)
}
} else {
self.map.get(&freq).cloned()
}
}
fn add_word_with_wild_card(
&mut self,
freq: FrequencyHash,
word: &str,
nb_wild_card: u8,
wild_card_symbole: char,
) {
self.map
.entry(freq.clone())
.and_modify(|anagrams| anagrams.push(word.to_string()))
.or_insert_with(|| vec![word.to_string()]);
if nb_wild_card != 0 {
for char_ in freq.0.keys() {
let mut freq = freq.clone();
freq.remove_one_char(*char_);
freq.add_one_char(wild_card_symbole);
self.add_word_with_wild_card(freq, word, nb_wild_card - 1, wild_card_symbole);
}
}
}
}
/// The FrequencyHash is an hashable structure that represent the occurences
/// of chars in a word. Two words have the same FrequencyHash if and only if
/// they are anagrams of each other.
///
/// We use the [`char`] type for simplicity. The initial problem was in ascii
/// anyway, but keep in mind that [`char`]s are utf-8 scalar values, and in
/// some alphabet those do not match characters.
#[derive(Debug, Eq, Clone)]
struct FrequencyHash(HashMap<char, u8>);
impl FrequencyHash {
/// Compute the [`FrequencyHash`] of a slice.
fn compute(string: &str) -> Self {
let mut map = HashMap::<char, u8>::new();
string.chars().for_each(|char_| {
map.entry(char_)
.and_modify(|counter| *counter += 1)
.or_insert(1);
});
Self(map)
}
/// Convert the [`FrequencyHash`] into an equivalent vector of tupple.
/// The vector is sorted in order to have a unique vector representation
/// for each [`FrequencyHash`].
///
/// This value used to compute the actual hash of the FrequencyHash.
fn get_unique_vec(&self) -> Vec<(char, u8)> {
let mut vec: Vec<(char, u8)> = self.0.clone().into_iter().collect();
vec.sort();
vec
}
/// Remove one character from the hashmap
fn remove_one_char(&mut self, char_: char) {
self.0.entry(char_).and_modify(|counter| *counter -= 1);
if let Some(0) = self.0.get(&char_) {
self.0.remove(&char_);
}
}
/// Add one character to the hashmap
fn add_one_char(&mut self, char_: char) {
self.0
.entry(char_)
.and_modify(|counter| *counter += 1)
.or_insert(1);
}
/// Return the number of occurence of a char in the word
fn get_freq(&self, char_: char) -> u8 {
if let Some(freq) = self.0.get(&char_) {
*freq
} else {
0
}
}
}
impl Hash for FrequencyHash {
fn hash<H: Hasher>(&self, state: &mut H) {
self.get_unique_vec().hash(state);
}
}
impl PartialEq for FrequencyHash {
fn eq(&self, other: &Self) -> bool {
self.get_unique_vec() == other.get_unique_vec()
}
}

28
anagrams/src/main.rs Normal file
View file

@ -0,0 +1,28 @@
use std::io::{self, BufRead};
use anagrams::{AnagramDict, Dict};
fn main() {
println!("[ Loading dictionnary from file ]");
let dict = Dict::load().unwrap();
println!("[ Precomputing the hashmap ]");
let dict: AnagramDict = (&dict).into();
println!("[ Ready ]");
let stdin = io::stdin();
for line in stdin.lock().lines() {
let line = line.unwrap();
if line == "!" {
break;
}
//let freq = FrequencyHash::compute(&line);
//println!("{freq:?}");
if let Some(words) = dict.find(&line) {
println!("Found anagrams:");
for word in words {
println!("> {word}");
}
} else {
println!("No anagram found");
}
}
}

1
web_gui/.gitignore vendored Normal file
View file

@ -0,0 +1 @@
/target

164
web_gui/Cargo.lock generated Normal file
View file

@ -0,0 +1,164 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "anagrams"
version = "0.1.0"
[[package]]
name = "anagrams_web"
version = "0.1.0"
dependencies = [
"anagrams",
"wasm-bindgen",
"wasm-bindgen-futures",
"web-sys",
]
[[package]]
name = "bumpalo"
version = "3.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0d261e256854913907f67ed06efbc3338dfe6179796deefc1ff763fc1aee5535"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "js-sys"
version = "0.3.61"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "445dde2150c55e483f3d8416706b97ec8e8237c307e5b7b4b8dd15e6af2a0730"
dependencies = [
"wasm-bindgen",
]
[[package]]
name = "log"
version = "0.4.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e"
dependencies = [
"cfg-if",
]
[[package]]
name = "once_cell"
version = "1.17.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3"
[[package]]
name = "proc-macro2"
version = "1.0.51"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d727cae5b39d21da60fa540906919ad737832fe0b1c165da3a34d6548c849d6"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b"
dependencies = [
"proc-macro2",
]
[[package]]
name = "syn"
version = "1.0.107"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1f4064b5b16e03ae50984a5a8ed5d4f8803e6bc1fd170a3cda91a1be4b18e3f5"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "unicode-ident"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "84a22b9f218b40614adcb3f4ff08b703773ad44fa9423e4e0d346d5db86e4ebc"
[[package]]
name = "wasm-bindgen"
version = "0.2.84"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "31f8dcbc21f30d9b8f2ea926ecb58f6b91192c17e9d33594b3df58b2007ca53b"
dependencies = [
"cfg-if",
"wasm-bindgen-macro",
]
[[package]]
name = "wasm-bindgen-backend"
version = "0.2.84"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95ce90fd5bcc06af55a641a86428ee4229e44e07033963a2290a8e241607ccb9"
dependencies = [
"bumpalo",
"log",
"once_cell",
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-futures"
version = "0.4.34"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f219e0d211ba40266969f6dbdd90636da12f75bee4fc9d6c23d1260dadb51454"
dependencies = [
"cfg-if",
"js-sys",
"wasm-bindgen",
"web-sys",
]
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.84"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c21f77c0bedc37fd5dc21f897894a5ca01e7bb159884559461862ae90c0b4c5"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
]
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.84"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2aff81306fcac3c7515ad4e177f521b5c9a15f2b08f4e32d823066102f35a5f6"
dependencies = [
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.84"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0046fef7e28c3804e5e38bfa31ea2a0f73905319b677e57ebe37e49358989b5d"
[[package]]
name = "web-sys"
version = "0.3.61"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e33b99f4b23ba3eec1a53ac264e35a755f00e966e0065077d6027c0f575b0b97"
dependencies = [
"js-sys",
"wasm-bindgen",
]

31
web_gui/Cargo.toml Normal file
View file

@ -0,0 +1,31 @@
[package]
name = "anagrams_web"
version = "0.1.0"
edition = "2021"
[lib]
crate-type = ["cdylib"]
[dependencies]
wasm-bindgen = "0.2.84"
wasm-bindgen-futures = "0.4.34"
anagrams = { path = "../anagrams" }
[dependencies.web-sys]
version = "0.3.61"
features = [
'Document',
'Element',
'HtmlElement',
'Node',
'Window',
'EventTarget',
'Event',
'HtmlInputElement',
'HtmlCollection',
'FileList',
'File',
'Blob',
'console',
'CssStyleDeclaration',
]

128
web_gui/index.html Normal file
View file

@ -0,0 +1,128 @@
<!DOCTYPE html>
<html>
<head>
<meta content="text/html;charset=utf-8" http-equiv="Content-Type"/>
<title>Anagram calculator</title>
<style>
body {
background-color: black;
text-align: center;
color: white;
}
#upload_dict_label {
display: inline-block;
background-color: #333;
padding: 15px;
margin: 15px;
text-align: center;
display: inline-block;
font-size: 24px;
border-radius: 15px;
box-shadow: 5px 6px #222;
border: solid;
border-color: black;
}
#upload_dict_label:hover {
background-color: #444;
}
#upload_dict_label:active {
background-color: #444;
box-shadow: 0px 0px #444;
transform: translateY(9px);
}
#main_div {
border-radius: 15px;
background-color: #555555;
padding: 15px;
margin: 15px;
text-align: center;
display: inline-block;
}
#spinner {
margin: 30px;
display: none;
}
#result {
text-align: left;
}
#word_div {
margin: 30px;
}
.lds-dual-ring {
display: inline-block;
width: 80px;
height: 80px;
}
.lds-dual-ring:after {
content: " ";
display: block;
width: 64px;
height: 64px;
margin: 8px;
border-radius: 50%;
border: 6px solid #fff;
border-color: #fff transparent #fff transparent;
animation: lds-dual-ring 1.2s linear infinite;
}
@keyframes lds-dual-ring {
0% {
transform: rotate(0deg);
}
100% {
transform: rotate(360deg);
}
}
</style>
<!-- Katex for latex rendering -->
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css" integrity="sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0" crossorigin="anonymous">
<!-- The loading of KaTeX is deferred to speed up page rendering -->
<script defer src="https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.js" integrity="sha384-PwRUT/YqbnEjkZO0zZxNqcxACrXe+j766U2amXcgMg5457rve2Y7I6ZJSm2A0mS4" crossorigin="anonymous"></script>
<!-- To automatically render math in text elements, include the auto-render extension: -->
<script defer src="https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/contrib/auto-render.min.js" integrity="sha384-+VBxd3r6XgURycqtZ117nYw44OOcIax56Z4dCRWbxyPt0Koah1uHoK0o4+/RRE05" crossorigin="anonymous"
onload="renderMathInElement(document.body);"></script>
</head>
<body>
<div id="main_div">
<p> Load <a href="./dict.dat">dict.dat</a> before starting </p>
<p> The precomputation has a time and space complexity of \(\mathcal{O}(\sum_{i=0}^W \binom{S}{i} . n)\) where \(S\) is the maximum size of the words, \(W\) the maximum number of precomputed wildcards per word, and \(n\) the size of the dictionary. For \(S = 15\) and \(W = 3\), this is more or less \(500 . n\), 2 precomputed wildcards is a good default setting.</p>
<p> The query has a time complexity of \(\mathcal{O}(|S|^{max(0, W'-W)})\) where \(W'\) is the number of wildcards in the query. </p>
<p> The wildcard caracter is '?'</p>
<p> Sources in rust are available <a href="./lib.rs">here</a> </p>
<div id="upload_dict_div">
<label id="upload_dict_label" for="upload_dict">Select the dictionary</label>
<input type="file" id="upload_dict" style="opacity:0">
<div>
<label id="nb_wild_card_label" for="nb_wild_card"> Select the number of precomputed wild card: </label>
<input type="number" id="nb_wild_card" value=0>
</div>
</div>
<!-- Thanks https://loading.io/css/ for the spinner -->
<div id="spinner" class="lds-dual-ring"></div>
<div id="word_div">
<label id="word_label" for="select_word">Enter word: </label>
<input type="text" id="select_word">
<p id="ask_dict">Please select a dictionnary</p>
</div>
<div>
<ul id="result"></ul>
<div>
</div>
<script type="module">
import init from './pkg/anagrams_web.js';
async function run() {
await init();
}
run();
</script>
</body>
</html>

184
web_gui/src/lib.rs Normal file
View file

@ -0,0 +1,184 @@
use std::sync::Mutex;
use wasm_bindgen::prelude::*;
use web_sys::{console, Event, HtmlElement, HtmlInputElement};
use anagrams::{AnagramDict, Dict};
static DICTIONNARY: Mutex<Option<AnagramDict>> = Mutex::new(None);
fn window() -> web_sys::Window {
web_sys::window().expect("no global `window` exists")
}
fn document() -> web_sys::Document {
window()
.document()
.expect("should have a document on window")
}
fn display_spinner() {
let result = document()
.get_element_by_id("spinner")
.expect("#spinner not found");
let style = result
.dyn_ref::<HtmlElement>()
.expect("Failed to cast #spinner to HtmlElement")
.style();
style
.set_property("display", "inline-block")
.expect("Failed to edit #spinner's CSS");
}
fn hide_spinner() {
let result = document()
.get_element_by_id("spinner")
.expect("#spinner not found");
let style = result
.dyn_ref::<HtmlElement>()
.expect("Failed to cast #spinner to HtmlElement")
.style();
style
.set_property("display", "none")
.expect("Failed to edit #spinner's CSS");
}
fn hide_ask_dict() {
let result = document()
.get_element_by_id("ask_dict")
.expect("#ask_dic not found");
let style = result
.dyn_ref::<HtmlElement>()
.expect("Failed to cast #ask_dict to HtmlElement")
.style();
console::log_1(&style);
style
.set_property("display", "none")
.expect("Failed to edit #ask_dict's CSS");
}
fn update_letters() {
let el = document()
.get_element_by_id("select_word")
.expect("#select_word not found");
let val = el
.dyn_ref::<HtmlInputElement>()
.expect("Failed to dyn cast to HtmlInputElement")
.value();
let result = document()
.get_element_by_id("result")
.expect("#result not found");
while let Some(child) = result.first_element_child() {
child.remove();
}
if let Some(anagrms) = DICTIONNARY
.lock()
.expect("Failed to access DICTIONNARY")
.as_ref()
.and_then(|dict| dict.find(&val))
{
for anagram in anagrms {
let val = document().create_element("li").unwrap();
val.set_text_content(Some(&anagram));
result.append_child(&val).unwrap();
}
};
}
fn load_dict(text: JsValue) {
display_spinner();
let text = text.as_string().expect("Failed to get content of the file");
let el = document()
.get_element_by_id("nb_wild_card")
.expect("#nb_wild_card not found");
let nb_wild_card = el
.dyn_ref::<HtmlInputElement>()
.expect("Failed to dyn cast #nb_wild_card to HtmlInputElement")
.value();
console::log_1(&(&nb_wild_card).into());
let dict = Dict::load_from_str(
&text,
nb_wild_card
.parse()
.expect("#nb_wild_card should be an interger"),
'?',
);
*DICTIONNARY.lock().expect("Failed to access DICTIONNARY") = Some((&dict).into());
hide_spinner();
hide_ask_dict();
update_letters();
}
// Called when the wasm module is instantiated
#[wasm_bindgen(start)]
fn main() -> Result<(), JsValue> {
let load_dict_c1 = Closure::<dyn FnMut(_)>::new(load_dict);
let load_dict_c2 = Closure::<dyn FnMut(_)>::new(load_dict);
let update_dict = Closure::<dyn FnMut(_)>::new(move |event: Event| {
display_spinner();
let target = event.target().expect("No target");
let _ = target
.dyn_ref::<HtmlInputElement>()
.expect("Failed to dyn cast to HtmlInputElement")
.files()
.expect("Failed to get files")
.get(0)
.expect("Failed to get first file")
.text()
.then(&load_dict_c1);
});
let update_nb_wild_card = Closure::<dyn FnMut(_)>::new(move |_: Event| {
let el = document()
.get_element_by_id("upload_dict")
.expect("#upload_dict not found");
let files = el
.dyn_ref::<HtmlInputElement>()
.expect("Failed to dyn cast to HtmlInputElement")
.files()
.expect("Failed to get files");
if files.length() > 0 {
display_spinner();
let _ = files
.get(0)
.expect("Failed to get first file")
.text()
.then(&load_dict_c2);
}
});
let update_letters_closure = Closure::<dyn FnMut(_)>::new(|_: Event| update_letters());
let input_dict = document()
.get_element_by_id("upload_dict")
.expect("#upload_dict not found");
input_dict
.dyn_ref::<HtmlInputElement>()
.expect("#upload_dict must be an <input type='file'>")
.add_event_listener_with_callback("change", update_dict.as_ref().unchecked_ref())
.expect("Failed to register event listener to #upload_dict");
let input_text = document()
.get_element_by_id("select_word")
.expect("#select_word not found");
input_text
.dyn_ref::<HtmlInputElement>()
.expect("#select_word must be an <input type='text'>")
.add_event_listener_with_callback("input", update_letters_closure.as_ref().unchecked_ref())
.expect("Failed to register event listener to #select_word");
let nb_wild_card = document()
.get_element_by_id("nb_wild_card")
.expect("#nb_wild_card not found");
nb_wild_card
.dyn_ref::<HtmlInputElement>()
.expect("#nb_wild_card must be an <input type='numver'>")
.add_event_listener_with_callback("change", update_nb_wild_card.as_ref().unchecked_ref())
.expect("Failed to register event listener to #nb_wild_card");
update_dict.forget(); // The callback will invalidate when the closure is dropped, this prevent it,
// at the cost of a "memory leak"
update_letters_closure.forget();
update_nb_wild_card.forget();
Ok(())
}