mirror of
https://github.com/google-deepmind/alphafold3.git
synced 2025-10-20 13:23:47 +08:00
Improve validation of CIF files.
* Make sure the data name in `data_<name>` is non-empty. * Check for duplicate key names. * Check that the multi-line tokens are closed at the end of the file. * Fix also two broken mmCIF files this check has uncovered. PiperOrigin-RevId: 802572236 Change-Id: Ie7a3a5ec816ec5b97158508cc1b12064cf0e70a8
This commit is contained in:
committed by
Copybara-Service
parent
4208665547
commit
b467f92160
@ -134,6 +134,9 @@ absl::StatusOr<std::vector<absl::string_view>> TokenizeInternal(
|
||||
line_num++;
|
||||
if (!multiline.empty() && multiline[0] == ';') {
|
||||
break;
|
||||
} else if (line_num == lines.size()) {
|
||||
return absl::InvalidArgumentError(
|
||||
"Last multiline token is not terminated by a semicolon.");
|
||||
}
|
||||
multiline_tokens.push_back(multiline);
|
||||
}
|
||||
@ -374,6 +377,10 @@ absl::StatusOr<CifDict> CifDict::FromString(absl::string_view cif_string) {
|
||||
return absl::InvalidArgumentError(
|
||||
"The CIF file does not start with the data_ field.");
|
||||
}
|
||||
if (first_token.empty()) {
|
||||
return absl::InvalidArgumentError(
|
||||
"The CIF file does not contain a data block name.");
|
||||
}
|
||||
cif["data_"].emplace_back(first_token);
|
||||
|
||||
// Counters for CIF loop_ regions.
|
||||
@ -413,7 +420,12 @@ absl::StatusOr<CifDict> CifDict::FromString(absl::string_view cif_string) {
|
||||
loop_flag = false;
|
||||
} else {
|
||||
// We are in the keys (column names) section of the loop.
|
||||
auto& columns = cif[token];
|
||||
auto [it, inserted] = cif.try_emplace(token);
|
||||
if (!inserted) {
|
||||
return absl::InvalidArgumentError(
|
||||
absl::StrCat("Duplicate loop key: '", token, "'"));
|
||||
}
|
||||
auto& columns = it->second;
|
||||
columns.clear();
|
||||
|
||||
// Heuristic: _atom_site is typically the largest table in an mmCIF
|
||||
@ -448,7 +460,12 @@ absl::StatusOr<CifDict> CifDict::FromString(absl::string_view cif_string) {
|
||||
absl::StrCat("Key '", key, "' does not start with an underscore."));
|
||||
}
|
||||
} else {
|
||||
cif[key].emplace_back(token);
|
||||
auto [it, inserted] = cif.try_emplace(key);
|
||||
if (!inserted) {
|
||||
return absl::InvalidArgumentError(
|
||||
absl::StrCat("Duplicate key: '", key, "'"));
|
||||
}
|
||||
(it->second).emplace_back(token);
|
||||
key = "";
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user