mirror of
https://github.com/google-deepmind/alphafold3.git
synced 2025-10-20 13:23:47 +08:00
Improve validation of CIF files.
* Make sure the data name in `data_<name>` is non-empty. * Check for duplicate key names. * Check that the multi-line tokens are closed at the end of the file. * Fix also two broken mmCIF files this check has uncovered. PiperOrigin-RevId: 802572236 Change-Id: Ie7a3a5ec816ec5b97158508cc1b12064cf0e70a8
This commit is contained in:
committed by
Copybara-Service
parent
4208665547
commit
b467f92160
@ -134,6 +134,9 @@ absl::StatusOr<std::vector<absl::string_view>> TokenizeInternal(
|
|||||||
line_num++;
|
line_num++;
|
||||||
if (!multiline.empty() && multiline[0] == ';') {
|
if (!multiline.empty() && multiline[0] == ';') {
|
||||||
break;
|
break;
|
||||||
|
} else if (line_num == lines.size()) {
|
||||||
|
return absl::InvalidArgumentError(
|
||||||
|
"Last multiline token is not terminated by a semicolon.");
|
||||||
}
|
}
|
||||||
multiline_tokens.push_back(multiline);
|
multiline_tokens.push_back(multiline);
|
||||||
}
|
}
|
||||||
@ -374,6 +377,10 @@ absl::StatusOr<CifDict> CifDict::FromString(absl::string_view cif_string) {
|
|||||||
return absl::InvalidArgumentError(
|
return absl::InvalidArgumentError(
|
||||||
"The CIF file does not start with the data_ field.");
|
"The CIF file does not start with the data_ field.");
|
||||||
}
|
}
|
||||||
|
if (first_token.empty()) {
|
||||||
|
return absl::InvalidArgumentError(
|
||||||
|
"The CIF file does not contain a data block name.");
|
||||||
|
}
|
||||||
cif["data_"].emplace_back(first_token);
|
cif["data_"].emplace_back(first_token);
|
||||||
|
|
||||||
// Counters for CIF loop_ regions.
|
// Counters for CIF loop_ regions.
|
||||||
@ -413,7 +420,12 @@ absl::StatusOr<CifDict> CifDict::FromString(absl::string_view cif_string) {
|
|||||||
loop_flag = false;
|
loop_flag = false;
|
||||||
} else {
|
} else {
|
||||||
// We are in the keys (column names) section of the loop.
|
// We are in the keys (column names) section of the loop.
|
||||||
auto& columns = cif[token];
|
auto [it, inserted] = cif.try_emplace(token);
|
||||||
|
if (!inserted) {
|
||||||
|
return absl::InvalidArgumentError(
|
||||||
|
absl::StrCat("Duplicate loop key: '", token, "'"));
|
||||||
|
}
|
||||||
|
auto& columns = it->second;
|
||||||
columns.clear();
|
columns.clear();
|
||||||
|
|
||||||
// Heuristic: _atom_site is typically the largest table in an mmCIF
|
// Heuristic: _atom_site is typically the largest table in an mmCIF
|
||||||
@ -448,7 +460,12 @@ absl::StatusOr<CifDict> CifDict::FromString(absl::string_view cif_string) {
|
|||||||
absl::StrCat("Key '", key, "' does not start with an underscore."));
|
absl::StrCat("Key '", key, "' does not start with an underscore."));
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
cif[key].emplace_back(token);
|
auto [it, inserted] = cif.try_emplace(key);
|
||||||
|
if (!inserted) {
|
||||||
|
return absl::InvalidArgumentError(
|
||||||
|
absl::StrCat("Duplicate key: '", key, "'"));
|
||||||
|
}
|
||||||
|
(it->second).emplace_back(token);
|
||||||
key = "";
|
key = "";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user