mainakjsr
commited on
Commit
·
fc4b224
1
Parent(s):
8e08b29
new tune hindi text
Browse files- config.json +1 -1
- generation_config.json +1 -1
- model.safetensors +1 -1
- output_tune.log +68 -0
- special_tokens_map.json +24 -0
- test_output.txt +56 -0
- tokenizer.json +0 -0
- tokenizer_config.json +30 -0
- vocab.json +0 -0
config.json
CHANGED
|
@@ -32,7 +32,7 @@
|
|
| 32 |
}
|
| 33 |
},
|
| 34 |
"torch_dtype": "float32",
|
| 35 |
-
"transformers_version": "4.
|
| 36 |
"use_cache": true,
|
| 37 |
"vocab_size": 50257
|
| 38 |
}
|
|
|
|
| 32 |
}
|
| 33 |
},
|
| 34 |
"torch_dtype": "float32",
|
| 35 |
+
"transformers_version": "4.55.4",
|
| 36 |
"use_cache": true,
|
| 37 |
"vocab_size": 50257
|
| 38 |
}
|
generation_config.json
CHANGED
|
@@ -2,5 +2,5 @@
|
|
| 2 |
"_from_model_config": true,
|
| 3 |
"bos_token_id": 50256,
|
| 4 |
"eos_token_id": 50256,
|
| 5 |
-
"transformers_version": "4.
|
| 6 |
}
|
|
|
|
| 2 |
"_from_model_config": true,
|
| 3 |
"bos_token_id": 50256,
|
| 4 |
"eos_token_id": 50256,
|
| 5 |
+
"transformers_version": "4.55.4"
|
| 6 |
}
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 497774208
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:05421fc15f3404fbca310ba45a596bd1228801d441147cd4db74eacce823654e
|
| 3 |
size 497774208
|
output_tune.log
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
( gpt2_env1) mainakjsr@mainakjsr-Lenovo-Y50-70-Touch:~/py_projects/shayari1$ python3 gpt2_tunning.py
|
| 2 |
+
Starting fine-tuning...
|
| 3 |
+
0%| | 0/600 [00:00<?, ?it/s]`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.
|
| 4 |
+
{'loss': 0.0181, 'grad_norm': 2.293494462966919, 'learning_rate': 4.9250000000000004e-05, 'epoch': 0.1}
|
| 5 |
+
{'loss': 0.0115, 'grad_norm': 0.5815603137016296, 'learning_rate': 4.8416666666666673e-05, 'epoch': 0.2}
|
| 6 |
+
{'loss': 0.029, 'grad_norm': 2.7110595703125, 'learning_rate': 4.7583333333333336e-05, 'epoch': 0.3}
|
| 7 |
+
{'loss': 0.0209, 'grad_norm': 0.7257663607597351, 'learning_rate': 4.6750000000000005e-05, 'epoch': 0.4}
|
| 8 |
+
{'loss': 0.0076, 'grad_norm': 0.20155230164527893, 'learning_rate': 4.591666666666667e-05, 'epoch': 0.5}
|
| 9 |
+
{'loss': 0.0258, 'grad_norm': 2.0846869945526123, 'learning_rate': 4.5083333333333336e-05, 'epoch': 0.6}
|
| 10 |
+
{'loss': 0.0299, 'grad_norm': 0.3581019341945648, 'learning_rate': 4.4250000000000005e-05, 'epoch': 0.7}
|
| 11 |
+
{'loss': 0.0174, 'grad_norm': 1.6799453496932983, 'learning_rate': 4.341666666666667e-05, 'epoch': 0.8}
|
| 12 |
+
{'loss': 0.0248, 'grad_norm': 1.8864364624023438, 'learning_rate': 4.2583333333333336e-05, 'epoch': 0.9}
|
| 13 |
+
{'loss': 0.0199, 'grad_norm': 0.7106167674064636, 'learning_rate': 4.175e-05, 'epoch': 1.0}
|
| 14 |
+
{'loss': 0.0129, 'grad_norm': 1.9354057312011719, 'learning_rate': 4.091666666666667e-05, 'epoch': 1.1}
|
| 15 |
+
{'loss': 0.0189, 'grad_norm': 2.191124439239502, 'learning_rate': 4.0083333333333336e-05, 'epoch': 1.2}
|
| 16 |
+
{'loss': 0.0356, 'grad_norm': 0.24308550357818604, 'learning_rate': 3.9250000000000005e-05, 'epoch': 1.3}
|
| 17 |
+
{'loss': 0.0121, 'grad_norm': 0.4473109543323517, 'learning_rate': 3.841666666666667e-05, 'epoch': 1.4}
|
| 18 |
+
{'loss': 0.0257, 'grad_norm': 3.0310373306274414, 'learning_rate': 3.7583333333333337e-05, 'epoch': 1.5}
|
| 19 |
+
{'loss': 0.0243, 'grad_norm': 0.40297117829322815, 'learning_rate': 3.675e-05, 'epoch': 1.6}
|
| 20 |
+
{'loss': 0.0199, 'grad_norm': 0.20001010596752167, 'learning_rate': 3.591666666666667e-05, 'epoch': 1.7}
|
| 21 |
+
{'loss': 0.0092, 'grad_norm': 1.5069109201431274, 'learning_rate': 3.508333333333334e-05, 'epoch': 1.8}
|
| 22 |
+
{'loss': 0.0302, 'grad_norm': 1.6387602090835571, 'learning_rate': 3.4250000000000006e-05, 'epoch': 1.9}
|
| 23 |
+
{'loss': 0.0193, 'grad_norm': 2.052999258041382, 'learning_rate': 3.341666666666667e-05, 'epoch': 2.0}
|
| 24 |
+
{'loss': 0.0202, 'grad_norm': 0.5691620111465454, 'learning_rate': 3.258333333333333e-05, 'epoch': 2.1}
|
| 25 |
+
{'loss': 0.0237, 'grad_norm': 0.829046368598938, 'learning_rate': 3.175e-05, 'epoch': 2.2}
|
| 26 |
+
{'loss': 0.0206, 'grad_norm': 0.6925863027572632, 'learning_rate': 3.091666666666667e-05, 'epoch': 2.3}
|
| 27 |
+
{'loss': 0.0159, 'grad_norm': 1.1066631078720093, 'learning_rate': 3.0083333333333337e-05, 'epoch': 2.4}
|
| 28 |
+
{'loss': 0.0221, 'grad_norm': 0.5353291630744934, 'learning_rate': 2.925e-05, 'epoch': 2.5}
|
| 29 |
+
{'loss': 0.0114, 'grad_norm': 0.7897487282752991, 'learning_rate': 2.841666666666667e-05, 'epoch': 2.6}
|
| 30 |
+
{'loss': 0.0179, 'grad_norm': 2.496028184890747, 'learning_rate': 2.7583333333333334e-05, 'epoch': 2.7}
|
| 31 |
+
{'loss': 0.0155, 'grad_norm': 2.1950201988220215, 'learning_rate': 2.6750000000000003e-05, 'epoch': 2.8}
|
| 32 |
+
{'loss': 0.0204, 'grad_norm': 1.5243595838546753, 'learning_rate': 2.5916666666666665e-05, 'epoch': 2.9}
|
| 33 |
+
{'loss': 0.0195, 'grad_norm': 0.7217574119567871, 'learning_rate': 2.5083333333333338e-05, 'epoch': 3.0}
|
| 34 |
+
{'loss': 0.0176, 'grad_norm': 1.0712541341781616, 'learning_rate': 2.425e-05, 'epoch': 3.1}
|
| 35 |
+
{'loss': 0.0159, 'grad_norm': 0.7634212970733643, 'learning_rate': 2.341666666666667e-05, 'epoch': 3.2}
|
| 36 |
+
{'loss': 0.0206, 'grad_norm': 1.5768340826034546, 'learning_rate': 2.2583333333333335e-05, 'epoch': 3.3}
|
| 37 |
+
{'loss': 0.0195, 'grad_norm': 0.3796348571777344, 'learning_rate': 2.175e-05, 'epoch': 3.4}
|
| 38 |
+
{'loss': 0.0226, 'grad_norm': 0.49591338634490967, 'learning_rate': 2.091666666666667e-05, 'epoch': 3.5}
|
| 39 |
+
{'loss': 0.0285, 'grad_norm': 0.23481231927871704, 'learning_rate': 2.0083333333333335e-05, 'epoch': 3.6}
|
| 40 |
+
{'loss': 0.0156, 'grad_norm': 0.7325056791305542, 'learning_rate': 1.925e-05, 'epoch': 3.7}
|
| 41 |
+
{'loss': 0.028, 'grad_norm': 0.716157078742981, 'learning_rate': 1.841666666666667e-05, 'epoch': 3.8}
|
| 42 |
+
{'loss': 0.0103, 'grad_norm': 1.5975515842437744, 'learning_rate': 1.7583333333333335e-05, 'epoch': 3.9}
|
| 43 |
+
{'loss': 0.0234, 'grad_norm': 0.20278723537921906, 'learning_rate': 1.675e-05, 'epoch': 4.0}
|
| 44 |
+
{'loss': 0.0172, 'grad_norm': 0.3673444986343384, 'learning_rate': 1.591666666666667e-05, 'epoch': 4.1}
|
| 45 |
+
{'loss': 0.016, 'grad_norm': 0.6794467568397522, 'learning_rate': 1.5083333333333335e-05, 'epoch': 4.2}
|
| 46 |
+
{'loss': 0.0129, 'grad_norm': 1.4637274742126465, 'learning_rate': 1.4249999999999999e-05, 'epoch': 4.3}
|
| 47 |
+
{'loss': 0.0214, 'grad_norm': 1.019705057144165, 'learning_rate': 1.3416666666666666e-05, 'epoch': 4.4}
|
| 48 |
+
{'loss': 0.0144, 'grad_norm': 0.9328732490539551, 'learning_rate': 1.2583333333333334e-05, 'epoch': 4.5}
|
| 49 |
+
{'loss': 0.0313, 'grad_norm': 2.414787530899048, 'learning_rate': 1.175e-05, 'epoch': 4.6}
|
| 50 |
+
{'loss': 0.0321, 'grad_norm': 0.7159636616706848, 'learning_rate': 1.0916666666666667e-05, 'epoch': 4.7}
|
| 51 |
+
{'loss': 0.0277, 'grad_norm': 0.6355525255203247, 'learning_rate': 1.0083333333333334e-05, 'epoch': 4.8}
|
| 52 |
+
{'loss': 0.0206, 'grad_norm': 1.8428443670272827, 'learning_rate': 9.25e-06, 'epoch': 4.9}
|
| 53 |
+
{'loss': 0.0201, 'grad_norm': 0.44408753514289856, 'learning_rate': 8.416666666666667e-06, 'epoch': 5.0}
|
| 54 |
+
{'loss': 0.0204, 'grad_norm': 2.702193021774292, 'learning_rate': 7.583333333333334e-06, 'epoch': 5.1}
|
| 55 |
+
{'loss': 0.0327, 'grad_norm': 2.130901336669922, 'learning_rate': 6.750000000000001e-06, 'epoch': 5.2}
|
| 56 |
+
{'loss': 0.0236, 'grad_norm': 2.583890914916992, 'learning_rate': 5.916666666666667e-06, 'epoch': 5.3}
|
| 57 |
+
{'loss': 0.0417, 'grad_norm': 2.275362253189087, 'learning_rate': 5.0833333333333335e-06, 'epoch': 5.4}
|
| 58 |
+
{'loss': 0.047, 'grad_norm': 3.3780415058135986, 'learning_rate': 4.250000000000001e-06, 'epoch': 5.5}
|
| 59 |
+
{'loss': 0.0624, 'grad_norm': 4.144228458404541, 'learning_rate': 3.4166666666666664e-06, 'epoch': 5.6}
|
| 60 |
+
{'loss': 0.0702, 'grad_norm': 1.9479155540466309, 'learning_rate': 2.5833333333333333e-06, 'epoch': 5.7}
|
| 61 |
+
{'loss': 0.078, 'grad_norm': 3.936119556427002, 'learning_rate': 1.7500000000000002e-06, 'epoch': 5.8}
|
| 62 |
+
{'loss': 0.0955, 'grad_norm': 5.419748783111572, 'learning_rate': 9.166666666666667e-07, 'epoch': 5.9}
|
| 63 |
+
{'loss': 0.1158, 'grad_norm': 4.048203945159912, 'learning_rate': 8.333333333333334e-08, 'epoch': 6.0}
|
| 64 |
+
{'train_runtime': 203.9476, 'train_samples_per_second': 2.942, 'train_steps_per_second': 2.942, 'train_loss': 0.02671192432443301, 'epoch': 6.0}
|
| 65 |
+
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 600/600 [03:23<00:00, 2.94it/s]
|
| 66 |
+
Saving model and tokenizer...
|
| 67 |
+
Fine-tuning complete!
|
| 68 |
+
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": {
|
| 3 |
+
"content": "<|endoftext|>",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": true,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"eos_token": {
|
| 10 |
+
"content": "<|endoftext|>",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": true,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"pad_token": "<|endoftext|>",
|
| 17 |
+
"unk_token": {
|
| 18 |
+
"content": "<|endoftext|>",
|
| 19 |
+
"lstrip": false,
|
| 20 |
+
"normalized": true,
|
| 21 |
+
"rstrip": false,
|
| 22 |
+
"single_word": false
|
| 23 |
+
}
|
| 24 |
+
}
|
test_output.txt
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Loading gpt2_base model...
|
| 2 |
+
Loading fine-tuned model...
|
| 3 |
+
================================================================================
|
| 4 |
+
Prompt 1: 'Har aansoon apni ek dastaan kehta hai.'
|
| 5 |
+
================================================================================
|
| 6 |
+
|
| 7 |
+
--- Fine-Tuned Model Output ---
|
| 8 |
+
Aansoon khamoshi mein kahani sunate hain,
|
| 9 |
+
Dil ke jazbaat sabko chhupate hain.
|
| 10 |
+
Par kal phir muskaan bikhar jaayegi,
|
| 11 |
+
Zindagi phir rang sajayegi.
|
| 12 |
+
|
| 13 |
+
--- Base Model Output ---
|
| 14 |
+
Aansoon khamoshi mein kahani sunate hain,
|
| 15 |
+
Dil ke jazbaat sabko chhupate hain.
|
| 16 |
+
Par kal phir muskaan bikhar jaayegi,
|
| 17 |
+
Zindagi phir rang sajayegi.
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
================================================================================
|
| 22 |
+
Prompt 2: 'Jab bhi tum yaad aate ho.'
|
| 23 |
+
================================================================================
|
| 24 |
+
|
| 25 |
+
--- Fine-Tuned Model Output ---
|
| 26 |
+
Phoolon ka rang uda jaata hai,
|
| 27 |
+
Har khwaab ek nayi roshni laata hai.
|
| 28 |
+
Jahan umeed ka diya jalaye rakho,
|
| 29 |
+
Wahan zindagi naghme sunaye.
|
| 30 |
+
|
| 31 |
+
--- Base Model Output ---
|
| 32 |
+
Phoolon ka rang kabhi bekaar nahi jaata,
|
| 33 |
+
Har koshish adhoori lagta hai.
|
| 34 |
+
Magar waqt nayi tasveer banata,
|
| 35 |
+
Dil phir se phool ki tarah khilta hai.
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
================================================================================
|
| 40 |
+
Prompt 3: 'Zindagi ki raahon mein, dhup aur chaon dono milte hai.'
|
| 41 |
+
================================================================================
|
| 42 |
+
|
| 43 |
+
--- Fine-Tuned Model Output ---
|
| 44 |
+
Dhupo ka rang uda ho jaata hai,
|
| 45 |
+
Har sapna toote pal ban jaata hai.
|
| 46 |
+
Jo pal hansi mein kho jaaye,
|
| 47 |
+
Woh zindagi ka rang geeton mein dikhati hai.
|
| 48 |
+
|
| 49 |
+
--- Base Model Output ---
|
| 50 |
+
Dhupo ka rang uda ho jaata,
|
| 51 |
+
Har sapna sach ho to raah milti hai.
|
| 52 |
+
Magar hausla rakho, raah nayi banegi,
|
| 53 |
+
Phir se manzil roshan hogi.
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
|
tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_bos_token": false,
|
| 3 |
+
"add_prefix_space": false,
|
| 4 |
+
"added_tokens_decoder": {
|
| 5 |
+
"50256": {
|
| 6 |
+
"content": "<|endoftext|>",
|
| 7 |
+
"lstrip": false,
|
| 8 |
+
"normalized": true,
|
| 9 |
+
"rstrip": false,
|
| 10 |
+
"single_word": false,
|
| 11 |
+
"special": true
|
| 12 |
+
}
|
| 13 |
+
},
|
| 14 |
+
"bos_token": "<|endoftext|>",
|
| 15 |
+
"clean_up_tokenization_spaces": false,
|
| 16 |
+
"eos_token": "<|endoftext|>",
|
| 17 |
+
"errors": "replace",
|
| 18 |
+
"extra_special_tokens": {},
|
| 19 |
+
"max_length": 168,
|
| 20 |
+
"model_max_length": 1024,
|
| 21 |
+
"pad_to_multiple_of": null,
|
| 22 |
+
"pad_token": "<|endoftext|>",
|
| 23 |
+
"pad_token_type_id": 0,
|
| 24 |
+
"padding_side": "right",
|
| 25 |
+
"stride": 0,
|
| 26 |
+
"tokenizer_class": "GPT2Tokenizer",
|
| 27 |
+
"truncation_side": "right",
|
| 28 |
+
"truncation_strategy": "longest_first",
|
| 29 |
+
"unk_token": "<|endoftext|>"
|
| 30 |
+
}
|
vocab.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|