aboutsummaryrefslogtreecommitdiff
path: root/crates/fparkan-corpus
diff options
context:
space:
mode:
Diffstat (limited to 'crates/fparkan-corpus')
-rw-r--r--crates/fparkan-corpus/Cargo.toml8
-rw-r--r--crates/fparkan-corpus/src/lib.rs281
2 files changed, 272 insertions, 17 deletions
diff --git a/crates/fparkan-corpus/Cargo.toml b/crates/fparkan-corpus/Cargo.toml
index 552870d..e9285a8 100644
--- a/crates/fparkan-corpus/Cargo.toml
+++ b/crates/fparkan-corpus/Cargo.toml
@@ -7,8 +7,16 @@ repository.workspace = true
[dependencies]
fparkan-binary = { path = "../fparkan-binary" }
+fparkan-fx = { path = "../fparkan-fx" }
+fparkan-material = { path = "../fparkan-material" }
+fparkan-msh = { path = "../fparkan-msh" }
+fparkan-mission-format = { path = "../fparkan-mission-format" }
fparkan-nres = { path = "../fparkan-nres" }
+fparkan-prototype = { path = "../fparkan-prototype" }
fparkan-path = { path = "../fparkan-path" }
+fparkan-rsli = { path = "../fparkan-rsli" }
+fparkan-texm = { path = "../fparkan-texm" }
+fparkan-terrain-format = { path = "../fparkan-terrain-format" }
[lints]
workspace = true
diff --git a/crates/fparkan-corpus/src/lib.rs b/crates/fparkan-corpus/src/lib.rs
index 460bbbf..f923841 100644
--- a/crates/fparkan-corpus/src/lib.rs
+++ b/crates/fparkan-corpus/src/lib.rs
@@ -2,7 +2,16 @@
//! Licensed corpus discovery and aggregate reports.
use fparkan_binary::{sha256, sha256_hex, Sha256Digest};
+use fparkan_fx::{decode_fxid, FXID_KIND};
+use fparkan_material::{decode_mat0, decode_wear, MAT0_KIND, WEAR_KIND};
+use fparkan_msh::{decode_msh, validate_msh};
+use fparkan_mission_format::{decode_tma, TmaProfile};
+use fparkan_nres::NresDocument;
use fparkan_path::{ascii_lookup_key, normalize_relative, PathPolicy};
+use fparkan_prototype::{decode_unit_dat, decode_unit_dat_binding};
+use fparkan_rsli::{decode as decode_rsli, ReadProfile};
+use fparkan_texm::decode_texm;
+use fparkan_terrain_format::{decode_land_map, decode_land_msh};
use std::collections::{BTreeMap, BTreeSet};
use std::fmt;
use std::fs;
@@ -10,6 +19,8 @@ use std::io::Write;
use std::path::{Path, PathBuf};
use std::sync::Arc;
+const TEXM_KIND: u32 = 0x6d78_6554;
+
/// Corpus kind.
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum CorpusKind {
@@ -336,7 +347,6 @@ fn inspect_report_file(
}
};
if bytes.starts_with(b"NRes") {
- variant = "nres".to_string();
bump(metrics, "nres_files", 1);
if let Err(message) = inspect_nres_metrics(bytes, metrics) {
return CorpusFileRecord {
@@ -346,9 +356,52 @@ fn inspect_report_file(
message: Some(message),
};
}
+ if variant == "land_msh" && let Err(message) = inspect_land_metrics(&bytes, false) {
+ return CorpusFileRecord {
+ path: entry.path.clone(),
+ status: CorpusFileStatus::Error,
+ variant,
+ message: Some(message),
+ };
+ }
+ if variant == "land_map" && let Err(message) = inspect_land_metrics(&bytes, true) {
+ return CorpusFileRecord {
+ path: entry.path.clone(),
+ status: CorpusFileStatus::Error,
+ variant,
+ message: Some(message),
+ };
+ }
} else if bytes.starts_with(b"NL") {
variant = "rsli".to_string();
bump(metrics, "rsli_files", 1);
+ if let Err(message) = inspect_rsli_metrics(&bytes) {
+ return CorpusFileRecord {
+ path: entry.path.clone(),
+ status: CorpusFileStatus::Error,
+ variant,
+ message: Some(message),
+ };
+ }
+ } else if lower.ends_with("data.tma") {
+ if let Err(message) = inspect_tma_metrics(&bytes) {
+ return CorpusFileRecord {
+ path: entry.path.clone(),
+ status: CorpusFileStatus::Error,
+ variant: "tma".to_string(),
+ message: Some(message),
+ };
+ }
+ } else if has_extension(lower, "dat") && (lower.starts_with("units/") || lower.contains("/units/")) {
+ variant = "unit_dat".to_string();
+ if let Err(message) = inspect_unit_dat_metrics(&bytes) {
+ return CorpusFileRecord {
+ path: entry.path.clone(),
+ status: CorpusFileStatus::Error,
+ variant,
+ message: Some(message),
+ };
+ }
}
CorpusFileRecord {
path: entry.path.clone(),
@@ -380,25 +433,30 @@ fn inspect_path_metrics(lower: &str, metrics: &mut BTreeMap<String, u64>) -> Str
}
fn inspect_nres_metrics(bytes: Vec<u8>, metrics: &mut BTreeMap<String, u64>) -> Result<(), String> {
- let entries = inspect_nres_entries(bytes)?;
- bump(metrics, "nres_entries", entries.len() as u64);
- for entry in entries {
+ let document = inspect_nres_document(&bytes)?;
+ bump(metrics, "nres_entries", document.entries().len() as u64);
+ for entry in document.entries() {
let name = String::from_utf8_lossy(entry.name_bytes()).to_ascii_lowercase();
if has_extension(&name, "msh") {
bump(metrics, "msh_entries", 1);
+ validate_nres_msh_payload(&document, entry)?;
}
match entry.meta().type_id {
- 0x3054_414D => {
+ MAT0_KIND => {
bump(metrics, "mat0_entries", 1);
+ validate_nres_mat0_payload(&document, entry)?;
}
- 0x6D78_6554 => {
+ TEXM_KIND => {
bump(metrics, "texm_entries", 1);
+ validate_nres_texm_payload(&document, entry)?;
}
- 0x4449_5846 => {
+ FXID_KIND => {
bump(metrics, "fxid_entries", 1);
+ validate_nres_fxid_payload(&document, entry)?;
}
- 0x5241_4557 => {
+ WEAR_KIND => {
bump(metrics, "wear_entries", 1);
+ validate_nres_wear_payload(&document, entry)?;
}
_ => {}
}
@@ -406,6 +464,94 @@ fn inspect_nres_metrics(bytes: Vec<u8>, metrics: &mut BTreeMap<String, u64>) ->
Ok(())
}
+fn validate_nres_msh_payload(document: &NresDocument, entry: &fparkan_nres::NresEntry) -> Result<(), String> {
+ let payload = document.payload(entry.id()).map_err(|err| err.to_string())?;
+ let nested = fparkan_nres::decode(
+ Arc::from(payload.to_vec().into_boxed_slice()),
+ fparkan_nres::ReadProfile::Compatible,
+ )
+ .map_err(|err| err.to_string())?;
+ let model = decode_msh(&nested).map_err(|err| err.to_string())?;
+ validate_msh(&model).map_err(|err| err.to_string())?;
+ Ok(())
+}
+
+fn validate_nres_mat0_payload(
+ document: &NresDocument,
+ entry: &fparkan_nres::NresEntry,
+) -> Result<(), String> {
+ let payload = document.payload(entry.id()).map_err(|err| err.to_string())?;
+ decode_mat0(payload, entry.meta().attr2).map_err(|err| err.to_string())?;
+ Ok(())
+}
+
+fn validate_nres_wear_payload(
+ document: &NresDocument,
+ entry: &fparkan_nres::NresEntry,
+) -> Result<(), String> {
+ let payload = document.payload(entry.id()).map_err(|err| err.to_string())?;
+ decode_wear(payload).map_err(|err| err.to_string())?;
+ Ok(())
+}
+
+fn validate_nres_texm_payload(
+ document: &NresDocument,
+ entry: &fparkan_nres::NresEntry,
+) -> Result<(), String> {
+ let payload = document.payload(entry.id()).map_err(|err| err.to_string())?;
+ decode_texm(Arc::from(payload.to_vec().into_boxed_slice())).map_err(|err| err.to_string())?;
+ Ok(())
+}
+
+fn validate_nres_fxid_payload(
+ document: &NresDocument,
+ entry: &fparkan_nres::NresEntry,
+) -> Result<(), String> {
+ let payload = document.payload(entry.id()).map_err(|err| err.to_string())?;
+ decode_fxid(Arc::from(payload.to_vec().into_boxed_slice())).map_err(|err| err.to_string())?;
+ Ok(())
+}
+
+fn inspect_rsli_metrics(bytes: &[u8]) -> Result<(), String> {
+ let _ = decode_rsli(
+ Arc::from(bytes.to_vec().into_boxed_slice()),
+ ReadProfile::Compatible,
+ )
+ .map_err(|err| err.to_string())?;
+ Ok(())
+}
+
+fn inspect_tma_metrics(bytes: &[u8]) -> Result<(), String> {
+ let _ = decode_tma(Arc::from(bytes.to_vec().into_boxed_slice()), TmaProfile::Strict)
+ .map_err(|err| err.to_string())?;
+ Ok(())
+}
+
+fn inspect_unit_dat_metrics(bytes: &[u8]) -> Result<(), String> {
+ if decode_unit_dat(bytes).is_err() && decode_unit_dat_binding(bytes).is_err() {
+ return Err("failed to parse unit.dat payload as unit or binding format".to_string());
+ }
+ Ok(())
+}
+
+fn inspect_land_metrics(bytes: &[u8], is_map: bool) -> Result<(), String> {
+ let document = inspect_nres_document(bytes)?;
+ if is_map {
+ decode_land_map(&document).map_err(|err| err.to_string())?;
+ } else {
+ decode_land_msh(&document).map_err(|err| err.to_string())?;
+ }
+ Ok(())
+}
+
+fn inspect_nres_document(bytes: &[u8]) -> Result<NresDocument, String> {
+ fparkan_nres::decode(
+ Arc::from(bytes.to_vec().into_boxed_slice()),
+ fparkan_nres::ReadProfile::Compatible,
+ )
+ .map_err(|err| err.to_string())
+}
+
fn bump(metrics: &mut BTreeMap<String, u64>, key: &str, delta: u64) {
if let Some(value) = metrics.get_mut(key) {
*value = value.saturating_add(delta);
@@ -418,15 +564,6 @@ fn has_extension(path: &str, expected: &str) -> bool {
.is_some_and(|extension| extension.eq_ignore_ascii_case(expected))
}
-fn inspect_nres_entries(bytes: Vec<u8>) -> Result<Vec<fparkan_nres::NresEntry>, String> {
- let document = fparkan_nres::decode(
- Arc::from(bytes.into_boxed_slice()),
- fparkan_nres::ReadProfile::Compatible,
- )
- .map_err(|err| err.to_string())?;
- Ok(document.entries().to_vec())
-}
-
/// Computes stable manifest fingerprint.
#[must_use]
pub fn fingerprint(manifest: &CorpusManifest) -> Sha256Digest {
@@ -699,6 +836,116 @@ mod tests {
}
#[test]
+ fn report_land_map_paths_use_production_land_parser() {
+ let root = temp_dir("report-land-map");
+ fs::write(root.join("WORLD/MAP/land.map"), build_nres(&[])).expect("land map");
+ let manifest = CorpusManifest {
+ kind: CorpusKind::Unknown,
+ files: vec![ManifestEntry {
+ path: "WORLD/MAP/land.map".to_string(),
+ size: 16,
+ hash: sha256(b"land.map"),
+ }],
+ casefold_collisions: Vec::new(),
+ };
+
+ let report = report(&root, &manifest).expect("report");
+
+ assert_eq!(report.failures, 1);
+ assert_eq!(report.records[0].status, CorpusFileStatus::Error);
+ assert_eq!(report.records[0].variant, "land_map");
+ let _ = fs::remove_dir_all(root);
+ }
+
+ #[test]
+ fn report_land_msh_paths_use_production_land_parser() {
+ let root = temp_dir("report-land-msh");
+ fs::write(root.join("WORLD/MAP/land.msh"), build_nres(&[])).expect("land msh");
+ let manifest = CorpusManifest {
+ kind: CorpusKind::Unknown,
+ files: vec![ManifestEntry {
+ path: "WORLD/MAP/land.msh".to_string(),
+ size: 16,
+ hash: sha256(b"land.msh"),
+ }],
+ casefold_collisions: Vec::new(),
+ };
+
+ let report = report(&root, &manifest).expect("report");
+
+ assert_eq!(report.failures, 1);
+ assert_eq!(report.records[0].status, CorpusFileStatus::Error);
+ assert_eq!(report.records[0].variant, "land_msh");
+ let _ = fs::remove_dir_all(root);
+ }
+
+ #[test]
+ fn report_tma_paths_use_production_tma_parser() {
+ let root = temp_dir("report-tma");
+ fs::write(root.join("MISSIONS/test/data.tma"), b"malformed tma").expect("tma");
+ let manifest = CorpusManifest {
+ kind: CorpusKind::Unknown,
+ files: vec![ManifestEntry {
+ path: "MISSIONS/test/data.tma".to_string(),
+ size: 12,
+ hash: sha256(b"malformed tma"),
+ }],
+ casefold_collisions: Vec::new(),
+ };
+
+ let report = report(&root, &manifest).expect("report");
+
+ assert_eq!(report.failures, 1);
+ assert_eq!(report.records[0].status, CorpusFileStatus::Error);
+ assert_eq!(report.records[0].variant, "tma");
+ let _ = fs::remove_dir_all(root);
+ }
+
+ #[test]
+ fn report_unit_dat_paths_use_production_unit_parser() {
+ let root = temp_dir("report-unit");
+ fs::write(root.join("units/unit.dat"), vec![0u8; 120]).expect("unit");
+ let manifest = CorpusManifest {
+ kind: CorpusKind::Unknown,
+ files: vec![ManifestEntry {
+ path: "units/unit.dat".to_string(),
+ size: 120,
+ hash: sha256(&[0u8; 120]),
+ }],
+ casefold_collisions: Vec::new(),
+ };
+
+ let report = report(&root, &manifest).expect("report");
+
+ assert_eq!(report.failures, 0);
+ assert_eq!(report.records[0].status, CorpusFileStatus::Ok);
+ assert_eq!(report.records[0].variant, "unit_dat");
+ let _ = fs::remove_dir_all(root);
+ }
+
+ #[test]
+ fn report_rsli_paths_use_production_rsli_parser() {
+ let root = temp_dir("report-rsli");
+ fs::write(root.join("patch.nl"), b"NL malformed").expect("rsli");
+ let manifest = CorpusManifest {
+ kind: CorpusKind::Unknown,
+ files: vec![ManifestEntry {
+ path: "patch.nl".to_string(),
+ size: 12,
+ hash: sha256(b"NL malformed"),
+ }],
+ casefold_collisions: Vec::new(),
+ };
+
+ let report = report(&root, &manifest).expect("report");
+
+ assert_eq!(report.failures, 1);
+ assert_eq!(report.records[0].status, CorpusFileStatus::Error);
+ assert_eq!(report.records[0].variant, "rsli");
+ let _ = fs::remove_dir_all(root);
+ }
+
+ #[test]
fn deterministic_traversal_is_creation_order_independent() {
let first = temp_dir("order-first");
let second = temp_dir("order-second");