This dataset is used to benchmark the task of Detection and Localization of Instruction Errors, presented in our paper. You will find the dataset explanation here, as well as the Habitat integration tools to use it.
{"episodes": [
{
"episode_id": "integer number representing an episode id",
"trajectory_id": "integer number representing the gt trajectory",
"scene_id": "string representing the scene",
"start_position": "list of float representing the start position",
"start_rotation": "list of float representing the start rotation",
"goals": "as in standard vlnce",
"instruction": {
"instruction_text": "string representing the instruction",
"instruction_tokens": "list of integer representing the instruction tokens (using BERT tokenizer)",
},
"reference_path" : "as in standard vlnce",
"error_information" : {
"episode_contains_error": "boolean",
"error_type": "integer representing the error type",
"token_swapped": [
{
"old_word": "string representing the old word",
"new_word": "string representing the new word",
"token_id" : ["list of bert token id of this word"],
"token_id_position": ["list of integer representing the index of the swapped words in the new instruction"],
}]
}
"old_episode_id": "integer number representing the episode id from which this episode was generated"
},
{}, ...],
"instruction_vocab": [
// same as BEVBert
]}
See readme in the repo.