Update README.md
Browse files
README.md
CHANGED
@@ -107,6 +107,24 @@ class BertCRF(BertPreTrainedModel):
|
|
107 |
|
108 |
return loss, tags
|
109 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
Some sample output from the model
|
111 |
|
112 |
This model uses a different kind of labelling system from it will not only be able to detect language, as well as it can detect the POS of the respective language
|
|
|
107 |
|
108 |
return loss, tags
|
109 |
```
|
110 |
+
|
111 |
+
```commandline
|
112 |
+
with io.open('./multilingual-pos-tagger-language-detection-indian-context-muril/label_encoder.pkl', 'rb') as f:
|
113 |
+
le = cloudpickle.load(f, encoding="latin-1")
|
114 |
+
|
115 |
+
model = BertCRF.from_pretrained('./multilingual-pos-tagger-language-detection-indian-context-muril/', num_labels=210)
|
116 |
+
tokenizer = BertTokenizerFast.from_pretrained('./data/muril-base-cased/')
|
117 |
+
|
118 |
+
corpus='maru naam swagat che'
|
119 |
+
inputs = tokenizer(corpus, max_length=512, padding=True, truncation=True, return_tensors='pt',
|
120 |
+
return_offsets_mapping=True)
|
121 |
+
offset_mapping = inputs.pop("offset_mapping").cpu().numpy().tolist()
|
122 |
+
|
123 |
+
outputs = model(**inputs)
|
124 |
+
print(decode(outputs[1].numpy().tolist(), inputs['input_ids'].numpy().tolist(), offset_mapping, list(le.inverse_transform(list(range(209))))))
|
125 |
+
|
126 |
+
##[{'words': ['maru', 'naam', 'swagat', 'che'], 'labels': ['gu_rom-PRP', 'gu_rom-NN', 'gu_rom-NNP', 'gu_rom-VAUX']}]
|
127 |
+
```
|
128 |
Some sample output from the model
|
129 |
|
130 |
This model uses a different kind of labelling system from it will not only be able to detect language, as well as it can detect the POS of the respective language
|