mainakjsr commited on
Commit
fc4b224
·
1 Parent(s): 8e08b29

new tune hindi text

Browse files
config.json CHANGED
@@ -32,7 +32,7 @@
32
  }
33
  },
34
  "torch_dtype": "float32",
35
- "transformers_version": "4.54.1",
36
  "use_cache": true,
37
  "vocab_size": 50257
38
  }
 
32
  }
33
  },
34
  "torch_dtype": "float32",
35
+ "transformers_version": "4.55.4",
36
  "use_cache": true,
37
  "vocab_size": 50257
38
  }
generation_config.json CHANGED
@@ -2,5 +2,5 @@
2
  "_from_model_config": true,
3
  "bos_token_id": 50256,
4
  "eos_token_id": 50256,
5
- "transformers_version": "4.54.1"
6
  }
 
2
  "_from_model_config": true,
3
  "bos_token_id": 50256,
4
  "eos_token_id": 50256,
5
+ "transformers_version": "4.55.4"
6
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83276c7135dc7f802bd364631db2610bff770ba86843e03920953da6c410c3f3
3
  size 497774208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05421fc15f3404fbca310ba45a596bd1228801d441147cd4db74eacce823654e
3
  size 497774208
output_tune.log ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ( gpt2_env1) mainakjsr@mainakjsr-Lenovo-Y50-70-Touch:~/py_projects/shayari1$ python3 gpt2_tunning.py
2
+ Starting fine-tuning...
3
+ 0%| | 0/600 [00:00<?, ?it/s]`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.
4
+ {'loss': 0.0181, 'grad_norm': 2.293494462966919, 'learning_rate': 4.9250000000000004e-05, 'epoch': 0.1}
5
+ {'loss': 0.0115, 'grad_norm': 0.5815603137016296, 'learning_rate': 4.8416666666666673e-05, 'epoch': 0.2}
6
+ {'loss': 0.029, 'grad_norm': 2.7110595703125, 'learning_rate': 4.7583333333333336e-05, 'epoch': 0.3}
7
+ {'loss': 0.0209, 'grad_norm': 0.7257663607597351, 'learning_rate': 4.6750000000000005e-05, 'epoch': 0.4}
8
+ {'loss': 0.0076, 'grad_norm': 0.20155230164527893, 'learning_rate': 4.591666666666667e-05, 'epoch': 0.5}
9
+ {'loss': 0.0258, 'grad_norm': 2.0846869945526123, 'learning_rate': 4.5083333333333336e-05, 'epoch': 0.6}
10
+ {'loss': 0.0299, 'grad_norm': 0.3581019341945648, 'learning_rate': 4.4250000000000005e-05, 'epoch': 0.7}
11
+ {'loss': 0.0174, 'grad_norm': 1.6799453496932983, 'learning_rate': 4.341666666666667e-05, 'epoch': 0.8}
12
+ {'loss': 0.0248, 'grad_norm': 1.8864364624023438, 'learning_rate': 4.2583333333333336e-05, 'epoch': 0.9}
13
+ {'loss': 0.0199, 'grad_norm': 0.7106167674064636, 'learning_rate': 4.175e-05, 'epoch': 1.0}
14
+ {'loss': 0.0129, 'grad_norm': 1.9354057312011719, 'learning_rate': 4.091666666666667e-05, 'epoch': 1.1}
15
+ {'loss': 0.0189, 'grad_norm': 2.191124439239502, 'learning_rate': 4.0083333333333336e-05, 'epoch': 1.2}
16
+ {'loss': 0.0356, 'grad_norm': 0.24308550357818604, 'learning_rate': 3.9250000000000005e-05, 'epoch': 1.3}
17
+ {'loss': 0.0121, 'grad_norm': 0.4473109543323517, 'learning_rate': 3.841666666666667e-05, 'epoch': 1.4}
18
+ {'loss': 0.0257, 'grad_norm': 3.0310373306274414, 'learning_rate': 3.7583333333333337e-05, 'epoch': 1.5}
19
+ {'loss': 0.0243, 'grad_norm': 0.40297117829322815, 'learning_rate': 3.675e-05, 'epoch': 1.6}
20
+ {'loss': 0.0199, 'grad_norm': 0.20001010596752167, 'learning_rate': 3.591666666666667e-05, 'epoch': 1.7}
21
+ {'loss': 0.0092, 'grad_norm': 1.5069109201431274, 'learning_rate': 3.508333333333334e-05, 'epoch': 1.8}
22
+ {'loss': 0.0302, 'grad_norm': 1.6387602090835571, 'learning_rate': 3.4250000000000006e-05, 'epoch': 1.9}
23
+ {'loss': 0.0193, 'grad_norm': 2.052999258041382, 'learning_rate': 3.341666666666667e-05, 'epoch': 2.0}
24
+ {'loss': 0.0202, 'grad_norm': 0.5691620111465454, 'learning_rate': 3.258333333333333e-05, 'epoch': 2.1}
25
+ {'loss': 0.0237, 'grad_norm': 0.829046368598938, 'learning_rate': 3.175e-05, 'epoch': 2.2}
26
+ {'loss': 0.0206, 'grad_norm': 0.6925863027572632, 'learning_rate': 3.091666666666667e-05, 'epoch': 2.3}
27
+ {'loss': 0.0159, 'grad_norm': 1.1066631078720093, 'learning_rate': 3.0083333333333337e-05, 'epoch': 2.4}
28
+ {'loss': 0.0221, 'grad_norm': 0.5353291630744934, 'learning_rate': 2.925e-05, 'epoch': 2.5}
29
+ {'loss': 0.0114, 'grad_norm': 0.7897487282752991, 'learning_rate': 2.841666666666667e-05, 'epoch': 2.6}
30
+ {'loss': 0.0179, 'grad_norm': 2.496028184890747, 'learning_rate': 2.7583333333333334e-05, 'epoch': 2.7}
31
+ {'loss': 0.0155, 'grad_norm': 2.1950201988220215, 'learning_rate': 2.6750000000000003e-05, 'epoch': 2.8}
32
+ {'loss': 0.0204, 'grad_norm': 1.5243595838546753, 'learning_rate': 2.5916666666666665e-05, 'epoch': 2.9}
33
+ {'loss': 0.0195, 'grad_norm': 0.7217574119567871, 'learning_rate': 2.5083333333333338e-05, 'epoch': 3.0}
34
+ {'loss': 0.0176, 'grad_norm': 1.0712541341781616, 'learning_rate': 2.425e-05, 'epoch': 3.1}
35
+ {'loss': 0.0159, 'grad_norm': 0.7634212970733643, 'learning_rate': 2.341666666666667e-05, 'epoch': 3.2}
36
+ {'loss': 0.0206, 'grad_norm': 1.5768340826034546, 'learning_rate': 2.2583333333333335e-05, 'epoch': 3.3}
37
+ {'loss': 0.0195, 'grad_norm': 0.3796348571777344, 'learning_rate': 2.175e-05, 'epoch': 3.4}
38
+ {'loss': 0.0226, 'grad_norm': 0.49591338634490967, 'learning_rate': 2.091666666666667e-05, 'epoch': 3.5}
39
+ {'loss': 0.0285, 'grad_norm': 0.23481231927871704, 'learning_rate': 2.0083333333333335e-05, 'epoch': 3.6}
40
+ {'loss': 0.0156, 'grad_norm': 0.7325056791305542, 'learning_rate': 1.925e-05, 'epoch': 3.7}
41
+ {'loss': 0.028, 'grad_norm': 0.716157078742981, 'learning_rate': 1.841666666666667e-05, 'epoch': 3.8}
42
+ {'loss': 0.0103, 'grad_norm': 1.5975515842437744, 'learning_rate': 1.7583333333333335e-05, 'epoch': 3.9}
43
+ {'loss': 0.0234, 'grad_norm': 0.20278723537921906, 'learning_rate': 1.675e-05, 'epoch': 4.0}
44
+ {'loss': 0.0172, 'grad_norm': 0.3673444986343384, 'learning_rate': 1.591666666666667e-05, 'epoch': 4.1}
45
+ {'loss': 0.016, 'grad_norm': 0.6794467568397522, 'learning_rate': 1.5083333333333335e-05, 'epoch': 4.2}
46
+ {'loss': 0.0129, 'grad_norm': 1.4637274742126465, 'learning_rate': 1.4249999999999999e-05, 'epoch': 4.3}
47
+ {'loss': 0.0214, 'grad_norm': 1.019705057144165, 'learning_rate': 1.3416666666666666e-05, 'epoch': 4.4}
48
+ {'loss': 0.0144, 'grad_norm': 0.9328732490539551, 'learning_rate': 1.2583333333333334e-05, 'epoch': 4.5}
49
+ {'loss': 0.0313, 'grad_norm': 2.414787530899048, 'learning_rate': 1.175e-05, 'epoch': 4.6}
50
+ {'loss': 0.0321, 'grad_norm': 0.7159636616706848, 'learning_rate': 1.0916666666666667e-05, 'epoch': 4.7}
51
+ {'loss': 0.0277, 'grad_norm': 0.6355525255203247, 'learning_rate': 1.0083333333333334e-05, 'epoch': 4.8}
52
+ {'loss': 0.0206, 'grad_norm': 1.8428443670272827, 'learning_rate': 9.25e-06, 'epoch': 4.9}
53
+ {'loss': 0.0201, 'grad_norm': 0.44408753514289856, 'learning_rate': 8.416666666666667e-06, 'epoch': 5.0}
54
+ {'loss': 0.0204, 'grad_norm': 2.702193021774292, 'learning_rate': 7.583333333333334e-06, 'epoch': 5.1}
55
+ {'loss': 0.0327, 'grad_norm': 2.130901336669922, 'learning_rate': 6.750000000000001e-06, 'epoch': 5.2}
56
+ {'loss': 0.0236, 'grad_norm': 2.583890914916992, 'learning_rate': 5.916666666666667e-06, 'epoch': 5.3}
57
+ {'loss': 0.0417, 'grad_norm': 2.275362253189087, 'learning_rate': 5.0833333333333335e-06, 'epoch': 5.4}
58
+ {'loss': 0.047, 'grad_norm': 3.3780415058135986, 'learning_rate': 4.250000000000001e-06, 'epoch': 5.5}
59
+ {'loss': 0.0624, 'grad_norm': 4.144228458404541, 'learning_rate': 3.4166666666666664e-06, 'epoch': 5.6}
60
+ {'loss': 0.0702, 'grad_norm': 1.9479155540466309, 'learning_rate': 2.5833333333333333e-06, 'epoch': 5.7}
61
+ {'loss': 0.078, 'grad_norm': 3.936119556427002, 'learning_rate': 1.7500000000000002e-06, 'epoch': 5.8}
62
+ {'loss': 0.0955, 'grad_norm': 5.419748783111572, 'learning_rate': 9.166666666666667e-07, 'epoch': 5.9}
63
+ {'loss': 0.1158, 'grad_norm': 4.048203945159912, 'learning_rate': 8.333333333333334e-08, 'epoch': 6.0}
64
+ {'train_runtime': 203.9476, 'train_samples_per_second': 2.942, 'train_steps_per_second': 2.942, 'train_loss': 0.02671192432443301, 'epoch': 6.0}
65
+ 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 600/600 [03:23<00:00, 2.94it/s]
66
+ Saving model and tokenizer...
67
+ Fine-tuning complete!
68
+
special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<|endoftext|>",
17
+ "unk_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": true,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
test_output.txt ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Loading gpt2_base model...
2
+ Loading fine-tuned model...
3
+ ================================================================================
4
+ Prompt 1: 'Har aansoon apni ek dastaan kehta hai.'
5
+ ================================================================================
6
+
7
+ --- Fine-Tuned Model Output ---
8
+ Aansoon khamoshi mein kahani sunate hain,
9
+ Dil ke jazbaat sabko chhupate hain.
10
+ Par kal phir muskaan bikhar jaayegi,
11
+ Zindagi phir rang sajayegi.
12
+
13
+ --- Base Model Output ---
14
+ Aansoon khamoshi mein kahani sunate hain,
15
+ Dil ke jazbaat sabko chhupate hain.
16
+ Par kal phir muskaan bikhar jaayegi,
17
+ Zindagi phir rang sajayegi.
18
+
19
+
20
+
21
+ ================================================================================
22
+ Prompt 2: 'Jab bhi tum yaad aate ho.'
23
+ ================================================================================
24
+
25
+ --- Fine-Tuned Model Output ---
26
+ Phoolon ka rang uda jaata hai,
27
+ Har khwaab ek nayi roshni laata hai.
28
+ Jahan umeed ka diya jalaye rakho,
29
+ Wahan zindagi naghme sunaye.
30
+
31
+ --- Base Model Output ---
32
+ Phoolon ka rang kabhi bekaar nahi jaata,
33
+ Har koshish adhoori lagta hai.
34
+ Magar waqt nayi tasveer banata,
35
+ Dil phir se phool ki tarah khilta hai.
36
+
37
+
38
+
39
+ ================================================================================
40
+ Prompt 3: 'Zindagi ki raahon mein, dhup aur chaon dono milte hai.'
41
+ ================================================================================
42
+
43
+ --- Fine-Tuned Model Output ---
44
+ Dhupo ka rang uda ho jaata hai,
45
+ Har sapna toote pal ban jaata hai.
46
+ Jo pal hansi mein kho jaaye,
47
+ Woh zindagi ka rang geeton mein dikhati hai.
48
+
49
+ --- Base Model Output ---
50
+ Dhupo ka rang uda ho jaata,
51
+ Har sapna sach ho to raah milti hai.
52
+ Magar hausla rakho, raah nayi banegi,
53
+ Phir se manzil roshan hogi.
54
+
55
+
56
+
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "50256": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ }
13
+ },
14
+ "bos_token": "<|endoftext|>",
15
+ "clean_up_tokenization_spaces": false,
16
+ "eos_token": "<|endoftext|>",
17
+ "errors": "replace",
18
+ "extra_special_tokens": {},
19
+ "max_length": 168,
20
+ "model_max_length": 1024,
21
+ "pad_to_multiple_of": null,
22
+ "pad_token": "<|endoftext|>",
23
+ "pad_token_type_id": 0,
24
+ "padding_side": "right",
25
+ "stride": 0,
26
+ "tokenizer_class": "GPT2Tokenizer",
27
+ "truncation_side": "right",
28
+ "truncation_strategy": "longest_first",
29
+ "unk_token": "<|endoftext|>"
30
+ }
vocab.json CHANGED
The diff for this file is too large to render. See raw diff