mirror of
https://github.com/inzerosight/ZWUS-rs.git
synced 2026-04-07 05:02:15 +00:00
Feat: Bulletproof JS↔Rust interop + nested ZW
Co-authored-by: Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
151
tests/interop.rs
Normal file
151
tests/interop.rs
Normal file
@@ -0,0 +1,151 @@
|
||||
use zwus::Zwus;
|
||||
|
||||
/// Torture-test corpus — every Unicode edge case.
|
||||
/// >< on the last line is where npm-encoded "test" (base 6) gets pasted.
|
||||
const CORPUS: &str = "\
|
||||
ASCII: Hello, World! 0123456789 ~!@#$%^&*()_+-=[]{}|;:'\",.<>?/\n\
|
||||
CJK Unified: 你好世界测试汉字\n\
|
||||
CJK Extension B: 𠀀𠀁𠀂\n\
|
||||
Korean: 한국어 테스트\n\
|
||||
Japanese: こんにちは カタカナ テスト\n\
|
||||
Emoji single: ✅❌🔥💀🎉🦀⭐🌍\n\
|
||||
Surrogate-heavy: 𐐷𐑌𐑀 𝄞\n\
|
||||
Math: ∑∏∫∂∇ ℵ ∞ ≠ ≤ ≥\n\
|
||||
Newlines:\n\
|
||||
Line2\n\
|
||||
Line3\n\
|
||||
Tabs:\tcol1\tcol2\tcol3\n\
|
||||
RTL Arabic: مرحبا بالعالم\n\
|
||||
RTL Hebrew: שלום עולם\n\
|
||||
Mixed: Hello مرحبا World שלום Fin\n\
|
||||
Skin tones: 👋🏻👋🏼👋🏽👋🏾👋🏿\n\
|
||||
Flags: 🇺🇸🇯🇵🇩🇪\n\
|
||||
Cuneiform: 𒀀𒀁𒀂\n\
|
||||
Egyptian: 𓀀𓀁𓀂\n\
|
||||
Musical: 𝄞𝄢\n\
|
||||
Repeated: 😀😀😀😀😀\n\
|
||||
Single: x\n\
|
||||
Zero: 0\n\
|
||||
Spaces: multiple spaces here\n\
|
||||
Nested ZW: >PASTE_ZW_HERE<";
|
||||
|
||||
// ── Paste encoded output from npm here ─────────────────────────────
|
||||
// In Node:
|
||||
// import zwus from 'zwus';
|
||||
// console.log(JSON.stringify(zwus.encodeString(CORPUS, 3)));
|
||||
// console.log(JSON.stringify(zwus.encodeString(CORPUS, 6)));
|
||||
// console.log(JSON.stringify(zwus.encodeString(CORPUS, 8)));
|
||||
const NPM_BASE_3: &str = "PASTE_HERE";
|
||||
const NPM_BASE_6: &str = "PASTE_HERE";
|
||||
const NPM_BASE_8: &str = "PASTE_HERE";
|
||||
|
||||
// ── Encoding match: Rust encode == npm encode ──────────────────────
|
||||
|
||||
#[test]
|
||||
fn rust_encode_matches_npm_base3() {
|
||||
assert_ne!(NPM_BASE_3, "PASTE_HERE", "Paste npm base-3 encoded output");
|
||||
let rust_encoded = Zwus::encode_string_with_base(CORPUS, 3);
|
||||
assert_eq!(rust_encoded, NPM_BASE_3, "Base-3 encoding differs from npm");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rust_encode_matches_npm_base6() {
|
||||
assert_ne!(NPM_BASE_6, "PASTE_HERE", "Paste npm base-6 encoded output");
|
||||
let rust_encoded = Zwus::encode_string_with_base(CORPUS, 6);
|
||||
assert_eq!(rust_encoded, NPM_BASE_6, "Base-6 encoding differs from npm");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rust_encode_matches_npm_base8() {
|
||||
assert_ne!(NPM_BASE_8, "PASTE_HERE", "Paste npm base-8 encoded output");
|
||||
let rust_encoded = Zwus::encode_string_with_base(CORPUS, 8);
|
||||
assert_eq!(rust_encoded, NPM_BASE_8, "Base-8 encoding differs from npm");
|
||||
}
|
||||
|
||||
// ── Decoding match: Rust decodes npm output back to CORPUS ─────────
|
||||
|
||||
#[test]
|
||||
fn rust_decodes_npm_base3() {
|
||||
assert_ne!(NPM_BASE_3, "PASTE_HERE", "Paste npm base-3 encoded output");
|
||||
let decoded = Zwus::decode_to_string_with_base(NPM_BASE_3, 3);
|
||||
assert_eq!(decoded, CORPUS);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rust_decodes_npm_base6() {
|
||||
assert_ne!(NPM_BASE_6, "PASTE_HERE", "Paste npm base-6 encoded output");
|
||||
let decoded = Zwus::decode_to_string_with_base(NPM_BASE_6, 6);
|
||||
assert_eq!(decoded, CORPUS);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rust_decodes_npm_base8() {
|
||||
assert_ne!(NPM_BASE_8, "PASTE_HERE", "Paste npm base-8 encoded output");
|
||||
let decoded = Zwus::decode_to_string_with_base(NPM_BASE_8, 8);
|
||||
assert_eq!(decoded, CORPUS);
|
||||
}
|
||||
|
||||
// ── Rust self-roundtrip ────────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn rust_roundtrip_corpus_all_bases() {
|
||||
for base in [3u8, 6, 8] {
|
||||
let encoded = Zwus::encode_string_with_base(CORPUS, base);
|
||||
let decoded = Zwus::decode_to_string_with_base(&encoded, base);
|
||||
assert_eq!(decoded, CORPUS, "Roundtrip failed for base {base}");
|
||||
}
|
||||
}
|
||||
|
||||
// ── Nested zero-width: encode the whole corpus, then double-decode ─
|
||||
// The CORPUS itself contains base-6 encoded "test" between > and <.
|
||||
// First decode peels the outer layer → reveals visible text + inner ZW.
|
||||
// Second decode (base 6) on that inner ZW region → "test".
|
||||
|
||||
#[test]
|
||||
fn nested_zero_width_double_decode() {
|
||||
assert_ne!(NPM_BASE_3, "PASTE_HERE", "Paste npm base-3 encoded output");
|
||||
|
||||
// Outer layer: decode the full corpus from base 3
|
||||
let outer_decoded = Zwus::decode_to_string_with_base(NPM_BASE_3, 3);
|
||||
assert_eq!(outer_decoded, CORPUS);
|
||||
|
||||
// Find the nested ZW payload between > and <
|
||||
let start = outer_decoded.find('>').expect("missing >") + 1;
|
||||
let end = outer_decoded.rfind('<').expect("missing <");
|
||||
let inner_payload = &outer_decoded[start..end];
|
||||
|
||||
// Inner layer: that region contains base-6 encoded "test"
|
||||
// (pasted by Master into CORPUS between > and <)
|
||||
assert!(
|
||||
!inner_payload.is_empty() && inner_payload != "PASTE_ZW_HERE",
|
||||
"Paste base-6 encoded 'test' between >< in CORPUS"
|
||||
);
|
||||
let inner_decoded = Zwus::decode_to_string_with_base(inner_payload, 6);
|
||||
assert_eq!(inner_decoded, "test", "Inner nested ZW did not decode to 'test'");
|
||||
}
|
||||
|
||||
// ── Surrogate safety ──────────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn no_surrogates_in_codepoint_iteration() {
|
||||
let codepoints: Vec<u32> = CORPUS.chars().map(|c| c as u32).collect();
|
||||
for &cp in &codepoints {
|
||||
assert!(
|
||||
!(0xD800..=0xDFFF).contains(&cp),
|
||||
"Surrogate in Rust char iteration: U+{cp:04X}"
|
||||
);
|
||||
assert!(cp <= 0x10FFFF, "Out of Unicode range: U+{cp:04X}");
|
||||
}
|
||||
}
|
||||
|
||||
// ── Number array roundtrip ────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn number_array_roundtrip_all_bases() {
|
||||
let codepoints: Vec<u32> = CORPUS.chars().map(|c| c as u32).collect();
|
||||
for base in [3u8, 6, 8] {
|
||||
let encoded = Zwus::encode_number_array_with_base(&codepoints, base);
|
||||
let decoded = Zwus::decode_to_number_array_with_base(&encoded, base);
|
||||
assert_eq!(decoded, codepoints, "Number array roundtrip failed base {base}");
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user