blob: 831112bd97dd3e00f4e999b9c03deef61a5bcd3d [file] [log] [blame]
{
"model": {
"type": "BPE",
"vocab": {
"<s>": 0, "</s>": 1, "<unk>": 2,
"h": 3, "e": 4, "l": 5, "o": 6,
"\u0120": 7, "w": 8, "r": 9, "d": 10,
"t": 11, "s": 12,
"he": 13, "ll": 14, "\u0120w": 15, "or": 16, "ld": 17,
"hell": 18, "hello": 19, "orld": 20, "\u0120world": 21
},
"merges": [
"h e",
"l l",
"\u0120 w",
"o r",
"l d",
"he ll",
"hell o",
"or ld",
"\u0120w orld"
]
},
"added_tokens": [
{"id": 0, "content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true},
{"id": 1, "content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true},
{"id": 2, "content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true}
],
"pre_tokenizer": {"type": "ByteLevel", "add_prefix_space": false, "trim_offsets": true, "use_regex": true},
"decoder": {"type": "ByteLevel", "add_prefix_space": true, "trim_offsets": true, "use_regex": true},
"post_processor": {
"type": "TemplateProcessing",
"single": [
{"SpecialToken": {"id": "<s>", "type_id": 0}},
{"Sequence": {"id": "A", "type_id": 0}},
{"SpecialToken": {"id": "</s>", "type_id": 0}}
],
"pair": [
{"SpecialToken": {"id": "<s>", "type_id": 0}},
{"Sequence": {"id": "A", "type_id": 0}},
{"SpecialToken": {"id": "</s>", "type_id": 0}},
{"Sequence": {"id": "B", "type_id": 1}},
{"SpecialToken": {"id": "</s>", "type_id": 1}}
],
"special_tokens": {
"<s>": {"id": "<s>", "ids": [0], "tokens": ["<s>"]},
"</s>": {"id": "</s>", "ids": [1], "tokens": ["</s>"]}
}
}
}