blob: 1f0f6cc04a67c9a312b818bcde2aeca9834f2def [file] [log] [blame] [edit]
{
"model": {
"type": "WordPiece",
"vocab": {
"[CLS]": 0, "[SEP]": 1, "[UNK]": 2, "[PAD]": 3, "[MASK]": 4,
"hello": 5, "world": 6, "test": 7, "##ing": 8,
"日": 9, "本": 10, "語": 11, "テスト": 12
},
"unk_token": "[UNK]",
"continuing_subword_prefix": "##"
},
"added_tokens": [
{"id": 0, "content": "[CLS]", "special": true},
{"id": 1, "content": "[SEP]", "special": true},
{"id": 2, "content": "[UNK]", "special": true},
{"id": 3, "content": "[PAD]", "special": true},
{"id": 4, "content": "[MASK]", "special": true}
],
"pre_tokenizer": {
"type": "BertPreTokenizer"
},
"decoder": {
"type": "WordPiece",
"prefix": "##",
"cleanup": true
},
"post_processor": {
"type": "TemplateProcessing",
"single": [
{"SpecialToken": {"id": "[CLS]", "type_id": 0}},
{"Sequence": {"id": "A", "type_id": 0}},
{"SpecialToken": {"id": "[SEP]", "type_id": 0}}
],
"pair": [
{"SpecialToken": {"id": "[CLS]", "type_id": 0}},
{"Sequence": {"id": "A", "type_id": 0}},
{"SpecialToken": {"id": "[SEP]", "type_id": 0}},
{"Sequence": {"id": "B", "type_id": 1}},
{"SpecialToken": {"id": "[SEP]", "type_id": 1}}
],
"special_tokens": {
"[CLS]": {"id": "[CLS]", "ids": [0], "tokens": ["[CLS]"]},
"[SEP]": {"id": "[SEP]", "ids": [1], "tokens": ["[SEP]"]}
}
}
}