mirror of
https://github.com/inzerosight/ZWUS-rs.git
synced 2026-04-07 05:02:15 +00:00
152 lines
22 KiB
Rust
152 lines
22 KiB
Rust
use zwus::Zwus;
|
||
|
||
/// Torture-test corpus — every Unicode edge case.
|
||
/// >< on the last line is where npm-encoded "test" (base 6) gets pasted.
|
||
const CORPUS: &str = "\
|
||
ASCII: Hello, World! 0123456789 ~!@#$%^&*()_+-=[]{}|;:'\",.<>?/\n\
|
||
CJK Unified: 你好世界测试汉字\n\
|
||
CJK Extension B: 𠀀𠀁𠀂\n\
|
||
Korean: 한국어 테스트\n\
|
||
Japanese: こんにちは カタカナ テスト\n\
|
||
Emoji single: ✅❌🔥💀🎉🦀⭐🌍\n\
|
||
Surrogate-heavy: 𐐷𐑌𐑀 𝄞\n\
|
||
Math: ∑∏∫∂∇ ℵ ∞ ≠ ≤ ≥\n\
|
||
Newlines:\n\
|
||
Line2\n\
|
||
Line3\n\
|
||
Tabs:\tcol1\tcol2\tcol3\n\
|
||
RTL Arabic: مرحبا بالعالم\n\
|
||
RTL Hebrew: שלום עולם\n\
|
||
Mixed: Hello مرحبا World שלום Fin\n\
|
||
Skin tones: 👋🏻👋🏼👋🏽👋🏾👋🏿\n\
|
||
Flags: 🇺🇸🇯🇵🇩🇪\n\
|
||
Cuneiform: 𒀀𒀁𒀂\n\
|
||
Egyptian: 𓀀𓀁𓀂\n\
|
||
Musical: 𝄞𝄢\n\
|
||
Repeated: 😀😀😀😀😀\n\
|
||
Single: x\n\
|
||
Zero: 0\n\
|
||
Spaces: multiple spaces here\n\
|
||
Nested ZW: ><";
|
||
|
||
// ── Paste encoded output from npm here ─────────────────────────────
|
||
// In Node:
|
||
// import zwus from 'zwus';
|
||
// console.log(JSON.stringify(zwus.encodeString(CORPUS, 3)));
|
||
// console.log(JSON.stringify(zwus.encodeString(CORPUS, 6)));
|
||
// console.log(JSON.stringify(zwus.encodeString(CORPUS, 8)));
|
||
const NPM_BASE_3: &str = "";
|
||
const NPM_BASE_6: &str = "";
|
||
const NPM_BASE_8: &str = "PASTE_HERE";
|
||
|
||
// ── Encoding match: Rust encode == npm encode ──────────────────────
|
||
|
||
#[test]
|
||
fn rust_encode_matches_npm_base3() {
|
||
assert_ne!(NPM_BASE_3, "PASTE_HERE", "Paste npm base-3 encoded output");
|
||
let rust_encoded = Zwus::encode_string_with_base(CORPUS, 3);
|
||
assert_eq!(rust_encoded, NPM_BASE_3, "Base-3 encoding differs from npm");
|
||
}
|
||
|
||
#[test]
|
||
fn rust_encode_matches_npm_base6() {
|
||
assert_ne!(NPM_BASE_6, "PASTE_HERE", "Paste npm base-6 encoded output");
|
||
let rust_encoded = Zwus::encode_string_with_base(CORPUS, 6);
|
||
assert_eq!(rust_encoded, NPM_BASE_6, "Base-6 encoding differs from npm");
|
||
}
|
||
|
||
#[test]
|
||
fn rust_encode_matches_npm_base8() {
|
||
assert_ne!(NPM_BASE_8, "PASTE_HERE", "Paste npm base-8 encoded output");
|
||
let rust_encoded = Zwus::encode_string_with_base(CORPUS, 8);
|
||
assert_eq!(rust_encoded, NPM_BASE_8, "Base-8 encoding differs from npm");
|
||
}
|
||
|
||
// ── Decoding match: Rust decodes npm output back to CORPUS ─────────
|
||
|
||
#[test]
|
||
fn rust_decodes_npm_base3() {
|
||
assert_ne!(NPM_BASE_3, "PASTE_HERE", "Paste npm base-3 encoded output");
|
||
let decoded = Zwus::decode_to_string_with_base(NPM_BASE_3, 3);
|
||
assert_eq!(decoded, CORPUS);
|
||
}
|
||
|
||
#[test]
|
||
fn rust_decodes_npm_base6() {
|
||
assert_ne!(NPM_BASE_6, "PASTE_HERE", "Paste npm base-6 encoded output");
|
||
let decoded = Zwus::decode_to_string_with_base(NPM_BASE_6, 6);
|
||
assert_eq!(decoded, CORPUS);
|
||
}
|
||
|
||
#[test]
|
||
fn rust_decodes_npm_base8() {
|
||
assert_ne!(NPM_BASE_8, "PASTE_HERE", "Paste npm base-8 encoded output");
|
||
let decoded = Zwus::decode_to_string_with_base(NPM_BASE_8, 8);
|
||
assert_eq!(decoded, CORPUS);
|
||
}
|
||
|
||
// ── Rust self-roundtrip ────────────────────────────────────────────
|
||
|
||
#[test]
|
||
fn rust_roundtrip_corpus_all_bases() {
|
||
for base in [3u8, 6, 8] {
|
||
let encoded = Zwus::encode_string_with_base(CORPUS, base);
|
||
let decoded = Zwus::decode_to_string_with_base(&encoded, base);
|
||
assert_eq!(decoded, CORPUS, "Roundtrip failed for base {base}");
|
||
}
|
||
}
|
||
|
||
// ── Nested zero-width: encode the whole corpus, then double-decode ─
|
||
// The CORPUS itself contains base-6 encoded "test" between > and <.
|
||
// First decode peels the outer layer → reveals visible text + inner ZW.
|
||
// Second decode (base 6) on that inner ZW region → "test".
|
||
|
||
#[test]
|
||
fn nested_zero_width_double_decode() {
|
||
assert_ne!(NPM_BASE_3, "PASTE_HERE", "Paste npm base-3 encoded output");
|
||
|
||
// Outer layer: decode the full corpus from base 3
|
||
let outer_decoded = Zwus::decode_to_string_with_base(NPM_BASE_3, 3);
|
||
assert_eq!(outer_decoded, CORPUS);
|
||
|
||
// Find the nested ZW payload between > and <
|
||
let start = outer_decoded.find('>').expect("missing >") + 1;
|
||
let end = outer_decoded.rfind('<').expect("missing <");
|
||
let inner_payload = &outer_decoded[start..end];
|
||
|
||
// Inner layer: that region contains base-6 encoded "test"
|
||
// (pasted by Master into CORPUS between > and <)
|
||
assert!(
|
||
!inner_payload.is_empty() && inner_payload != "PASTE_ZW_HERE",
|
||
"Paste base-6 encoded 'test' between >< in CORPUS"
|
||
);
|
||
let inner_decoded = Zwus::decode_to_string_with_base(inner_payload, 6);
|
||
assert_eq!(inner_decoded, "test", "Inner nested ZW did not decode to 'test'");
|
||
}
|
||
|
||
// ── Surrogate safety ──────────────────────────────────────────────
|
||
|
||
#[test]
|
||
fn no_surrogates_in_codepoint_iteration() {
|
||
let codepoints: Vec<u32> = CORPUS.chars().map(|c| c as u32).collect();
|
||
for &cp in &codepoints {
|
||
assert!(
|
||
!(0xD800..=0xDFFF).contains(&cp),
|
||
"Surrogate in Rust char iteration: U+{cp:04X}"
|
||
);
|
||
assert!(cp <= 0x10FFFF, "Out of Unicode range: U+{cp:04X}");
|
||
}
|
||
}
|
||
|
||
// ── Number array roundtrip ────────────────────────────────────────
|
||
|
||
#[test]
|
||
fn number_array_roundtrip_all_bases() {
|
||
let codepoints: Vec<u32> = CORPUS.chars().map(|c| c as u32).collect();
|
||
for base in [3u8, 6, 8] {
|
||
let encoded = Zwus::encode_number_array_with_base(&codepoints, base);
|
||
let decoded = Zwus::decode_to_number_array_with_base(&encoded, base);
|
||
assert_eq!(decoded, codepoints, "Number array roundtrip failed base {base}");
|
||
}
|
||
}
|