readme
Browse files- README.md +2 -30
- events.out.tfevents.1639128562.t1v-n-358ff5d1-w-0.686981.3.v2 +3 -0
- events.out.tfevents.1639128677.t1v-n-358ff5d1-w-0.688351.3.v2 +3 -0
- events.out.tfevents.1639128772.t1v-n-358ff5d1-w-0.689734.3.v2 +3 -0
- events.out.tfevents.1639128961.t1v-n-358ff5d1-w-0.691429.3.v2 +3 -0
- events.out.tfevents.1639129025.t1v-n-358ff5d1-w-0.692738.3.v2 +3 -0
- events.out.tfevents.1639129541.t1v-n-358ff5d1-w-0.694454.3.v2 +3 -0
- run.sh +3 -3
- run_t5_mlm_flax.py +1 -1
- run_t5_mlm_flax_streaming.py +1 -5
README.md
CHANGED
|
@@ -8,35 +8,7 @@ datasets:
|
|
| 8 |
---
|
| 9 |
# 🇳🇴 Norwegian T5 Base model Trained on the NCC🇳🇴
|
| 10 |
|
| 11 |
-
This is a Norwegian T5-base model trained on the Norwegian Colossal Corpus (NCC) on a TPU v3-8.
|
| 12 |
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
The following setting were used in training:
|
| 16 |
-
```bash
|
| 17 |
-
./run_t5_mlm_flax_streaming.py \
|
| 18 |
-
--output_dir="./" \
|
| 19 |
-
--model_type="t5" \
|
| 20 |
-
--config_name="./" \
|
| 21 |
-
--tokenizer_name="./" \
|
| 22 |
-
--dataset_name="pere/norwegian_colossal_corpus_v2_short100k" \
|
| 23 |
-
--max_seq_length="512" \
|
| 24 |
-
--weight_decay="0.01" \
|
| 25 |
-
--per_device_train_batch_size="32" \
|
| 26 |
-
--per_device_eval_batch_size="32" \
|
| 27 |
-
--learning_rate="8e-3" \
|
| 28 |
-
--warmup_steps="5000" \
|
| 29 |
-
--overwrite_output_dir \
|
| 30 |
-
--cache_dir /mnt/disks/flaxdisk/cache/ \
|
| 31 |
-
--num_train_epochs="5" \
|
| 32 |
-
--adam_beta1="0.9" \
|
| 33 |
-
--adam_beta2="0.98" \
|
| 34 |
-
--logging_steps="500" \
|
| 35 |
-
--num_train_steps="1000000" \
|
| 36 |
-
--num_eval_samples="5000" \
|
| 37 |
-
--save_steps="5000" \
|
| 38 |
-
--eval_steps="5000" \
|
| 39 |
-
--preprocessing_num_workers 96 \
|
| 40 |
-
--adafactor \
|
| 41 |
-
--push_to_hub
|
| 42 |
```
|
|
|
|
| 8 |
---
|
| 9 |
# 🇳🇴 Norwegian T5 Base model Trained on the NCC🇳🇴
|
| 10 |
|
| 11 |
+
This is a Norwegian T5-base model trained on the Norwegian Colossal Corpus (NCC) on a TPU v3-8.
|
| 12 |
|
| 13 |
+
This model is currently training. It will finish in January 2022. Please do not use yet..
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
```
|
events.out.tfevents.1639128562.t1v-n-358ff5d1-w-0.686981.3.v2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5dfce941b39945a6d6d07c566fd3bc9b6fac8b6004009113d225a917c03538e7
|
| 3 |
+
size 40
|
events.out.tfevents.1639128677.t1v-n-358ff5d1-w-0.688351.3.v2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8932ca5995a80bb77d76b41daf7a8a4d63d987473564afc7e00a9d10158be496
|
| 3 |
+
size 40
|
events.out.tfevents.1639128772.t1v-n-358ff5d1-w-0.689734.3.v2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:92f6bb44edb94beb130d22c68fed5473bfea890e3f0770e698d0786b3ae1bb84
|
| 3 |
+
size 40
|
events.out.tfevents.1639128961.t1v-n-358ff5d1-w-0.691429.3.v2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ce6c72f09124acb4634a23ea7eb63aec00eba13595fe0887a7ef8116c5361376
|
| 3 |
+
size 40
|
events.out.tfevents.1639129025.t1v-n-358ff5d1-w-0.692738.3.v2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:46c8eace8366230b32c94f9c7986a751c6faeb2da07e46805c33cf50072349ba
|
| 3 |
+
size 40
|
events.out.tfevents.1639129541.t1v-n-358ff5d1-w-0.694454.3.v2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bbabad7dcae1ed7eeaf7462fd304baeae84d3fe85bf4eb6371738109d9654c7a
|
| 3 |
+
size 40
|
run.sh
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
./run_t5_mlm_flax_streaming.py \
|
| 2 |
-
--output_dir="
|
| 3 |
--model_type="t5" \
|
| 4 |
-
--config_name="./" \
|
| 5 |
-
--tokenizer_name="
|
| 6 |
--dataset_name="NbAiLab/nbailab_extended" \
|
| 7 |
--max_seq_length="512" \
|
| 8 |
--weight_decay="0.01" \
|
|
|
|
| 1 |
./run_t5_mlm_flax_streaming.py \
|
| 2 |
+
--output_dir="." \
|
| 3 |
--model_type="t5" \
|
| 4 |
+
--config_name="./config.json" \
|
| 5 |
+
--tokenizer_name="." \
|
| 6 |
--dataset_name="NbAiLab/nbailab_extended" \
|
| 7 |
--max_seq_length="512" \
|
| 8 |
--weight_decay="0.01" \
|
run_t5_mlm_flax.py
CHANGED
|
@@ -599,7 +599,7 @@ if __name__ == "__main__":
|
|
| 599 |
dropout_rngs = jax.random.split(rng, jax.local_device_count())
|
| 600 |
|
| 601 |
if model_args.model_name_or_path:
|
| 602 |
-
model = FlaxT5ForConditionalGeneration.
|
| 603 |
model_args.model_name_or_path, config=config, seed=training_args.seed, dtype=getattr(jnp, model_args.dtype)
|
| 604 |
)
|
| 605 |
else:
|
|
|
|
| 599 |
dropout_rngs = jax.random.split(rng, jax.local_device_count())
|
| 600 |
|
| 601 |
if model_args.model_name_or_path:
|
| 602 |
+
model = FlaxT5ForConditionalGeneration.from_pretrainedu
|
| 603 |
model_args.model_name_or_path, config=config, seed=training_args.seed, dtype=getattr(jnp, model_args.dtype)
|
| 604 |
)
|
| 605 |
else:
|
run_t5_mlm_flax_streaming.py
CHANGED
|
@@ -554,17 +554,13 @@ if __name__ == "__main__":
|
|
| 554 |
rng = jax.random.PRNGKey(training_args.seed)
|
| 555 |
dropout_rngs = jax.random.split(rng, jax.local_device_count())
|
| 556 |
|
| 557 |
-
#Pere changed 13 august
|
| 558 |
-
#model = FlaxT5ForConditionalGeneration(config, seed=training_args.seed, dtype=getattr(jnp, model_args.dtype))
|
| 559 |
|
| 560 |
if model_args.model_name_or_path:
|
| 561 |
model = FlaxT5ForConditionalGeneration.from_pretrained(
|
| 562 |
model_args.model_name_or_path, config=config, seed=training_args.seed, dtype=getattr(jnp, model_args.dtype)
|
| 563 |
)
|
| 564 |
else:
|
| 565 |
-
model = FlaxT5ForConditionalGeneration.
|
| 566 |
-
config, seed=training_args.seed, dtype=getattr(jnp, model_args.dtype)
|
| 567 |
-
)
|
| 568 |
|
| 569 |
|
| 570 |
# Data collator
|
|
|
|
| 554 |
rng = jax.random.PRNGKey(training_args.seed)
|
| 555 |
dropout_rngs = jax.random.split(rng, jax.local_device_count())
|
| 556 |
|
|
|
|
|
|
|
| 557 |
|
| 558 |
if model_args.model_name_or_path:
|
| 559 |
model = FlaxT5ForConditionalGeneration.from_pretrained(
|
| 560 |
model_args.model_name_or_path, config=config, seed=training_args.seed, dtype=getattr(jnp, model_args.dtype)
|
| 561 |
)
|
| 562 |
else:
|
| 563 |
+
model = FlaxT5ForConditionalGeneration(config, seed=training_args.seed, dtype=getattr(jnp, model_args.dtype))
|
|
|
|
|
|
|
| 564 |
|
| 565 |
|
| 566 |
# Data collator
|