import numpy as np import json from transformers import RobertaTokenizer, RobertaForSequenceClassification tokenizer = RobertaTokenizer.from_pretrained("./tokenizers/roberta_wals_tokenizer.json") Load set 1 (Consonant inventories) consonant_data = np.load("./data/set_01_consonants/wals_code_vectors.npy") labels = np.load("./data/set_01_consonants/labels.npy")
from transformers import RobertaForSequenceClassification, Trainer, TrainingArguments model = RobertaForSequenceClassification.from_pretrained("roberta-base", num_labels=36) # 36 feature sets
print(f"Loaded {consonant_data.shape[0]} language samples for Set 1") Here is a minimal example using Hugging Face's Trainer API:
import numpy as np import json from transformers import RobertaTokenizer, RobertaForSequenceClassification tokenizer = RobertaTokenizer.from_pretrained("./tokenizers/roberta_wals_tokenizer.json") Load set 1 (Consonant inventories) consonant_data = np.load("./data/set_01_consonants/wals_code_vectors.npy") labels = np.load("./data/set_01_consonants/labels.npy")
from transformers import RobertaForSequenceClassification, Trainer, TrainingArguments model = RobertaForSequenceClassification.from_pretrained("roberta-base", num_labels=36) # 36 feature sets WALS Roberta Sets 1-36.zip
print(f"Loaded {consonant_data.shape[0]} language samples for Set 1") Here is a minimal example using Hugging Face's Trainer API: import numpy as np import json from transformers