Add compression to dictionary data

This commit is contained in:
Sayantan Santra 2022-06-07 19:38:30 -05:00
parent ade3107107
commit cf9e1f7775
8 changed files with 60 additions and 5 deletions

41
Cargo.lock generated
View File

@ -2,6 +2,47 @@
# It is not intended for manual editing.
version = 3
[[package]]
name = "cc"
version = "1.0.73"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11"
[[package]]
name = "libc"
version = "0.2.126"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836"
[[package]]
name = "lzma-sys"
version = "0.1.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e06754c4acf47d49c727d5665ca9fb828851cda315ed3bd51edd148ef78a8772"
dependencies = [
"cc",
"libc",
"pkg-config",
]
[[package]]
name = "pkg-config"
version = "0.3.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1df8c4ec4b0627e53bdf214615ad287367e482558cf84b109250b37464dc03ae"
[[package]]
name = "unscrambler"
version = "0.1.0"
dependencies = [
"xz2",
]
[[package]]
name = "xz2"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2"
dependencies = [
"lzma-sys",
]

View File

@ -6,3 +6,4 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
xz2 = "0.1.7"

View File

@ -6,4 +6,4 @@ I'm learning Rust, so this is just a rewrite of an simple old project in Rust.
### Note
The main wordlist was pulled from [words_alpha.txt by dwyl](https://github.com/dwyl/english-words/) and processed using Rust. Processing code was really simple, so didn't put it up here.
The main `src/wordlist` was pulled from [words_alpha.txt by dwyl](https://github.com/dwyl/english-words/) and processed using Rust. Processing code was really simple, so didn't put it up here. The processing included pre-sorting the each line in `src/wordlist` to create `src/wordlist_sorted` and then compressing both using `xz`.

BIN
src/dict/wordlist.txt.xz Normal file

Binary file not shown.

Binary file not shown.

View File

@ -1,9 +1,22 @@
use std::io::{self, Write};
use std::io::{self, prelude::*, Write};
use xz2::read::XzDecoder;
fn main() {
// read the dictionary files
let wordlist = include_str!("data/wordlist.txt");
let wordlist_sorted = include_str!("data/wordlist_sorted.txt");
// load the compressed dictionary files (embedded in compile-time)
let wordlist_cmp: &[u8] = include_bytes!("dict/wordlist.txt.xz");
let wordlist_sorted_cmp: &[u8] = include_bytes!("dict/wordlist_sorted.txt.xz");
// decompress the dictionary files
let mut decompressor = XzDecoder::new(wordlist_cmp);
let mut decompressor_sorted = XzDecoder::new(wordlist_sorted_cmp);
let mut wordlist = String::new();
let mut wordlist_sorted = String::new();
decompressor.read_to_string(&mut wordlist).unwrap();
decompressor_sorted
.read_to_string(&mut wordlist_sorted)
.unwrap();
// some formatting of the dictionary data
let wordlist = &[" ", &wordlist.replace("\n", " ")[..]].join("")[..];
let wordlist_sorted = &[" ", &wordlist_sorted.replace("\n", " ")[..]].join("")[..];