akshaynayaks9845 commited on
Commit
ddbe231
·
verified ·
1 Parent(s): 22680c0

Upload rml_ai/config.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. rml_ai/config.py +64 -0
rml_ai/config.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Configuration for RML System
3
+ """
4
+
5
+ import os
6
+ from typing import Optional
7
+ from dataclasses import dataclass
8
+
9
+
10
+ @dataclass
11
+ class RMLConfig:
12
+ """Configuration for RML system"""
13
+
14
+ # Model paths
15
+ encoder_model: str = "intfloat/e5-base-v2"
16
+ decoder_model: str = "microsoft/phi-1_5"
17
+
18
+ # Device configuration
19
+ device: str = "auto"
20
+
21
+ # Dataset configuration
22
+ dataset_path: str = "data/rml_data.jsonl"
23
+ max_entries: int = 1000
24
+
25
+ # Encoding configuration
26
+ encoder_batch_size: int = 8
27
+ encoder_max_length: int = 192
28
+
29
+ # Feature flags
30
+ disable_web_search: bool = True
31
+ disable_world_knowledge: bool = True
32
+
33
+ def __post_init__(self):
34
+ """Load configuration from environment variables"""
35
+ self.encoder_model = os.getenv("RML_ENCODER_MODEL", self.encoder_model)
36
+ self.decoder_model = os.getenv("RML_DECODER_MODEL", self.decoder_model)
37
+ self.device = os.getenv("RML_DEVICE", self.device)
38
+ self.dataset_path = os.getenv("RML_DATASET_PATH", self.dataset_path)
39
+ self.max_entries = int(os.getenv("RML_API_ENTRIES", self.max_entries))
40
+ self.encoder_batch_size = int(os.getenv("RML_ENCODER_BATCH_SIZE", self.encoder_batch_size))
41
+ self.encoder_max_length = int(os.getenv("RML_ENCODER_MAX_LEN", self.encoder_max_length))
42
+ self.disable_web_search = os.getenv("RML_DISABLE_WEB_SEARCH", "1") == "1"
43
+ self.disable_world_knowledge = os.getenv("RML_DISABLE_WORLD_KNOWLEDGE", "1") == "1"
44
+
45
+ def to_dict(self) -> dict:
46
+ """Convert config to dictionary"""
47
+ return {
48
+ "encoder_model": self.encoder_model,
49
+ "decoder_model": self.decoder_model,
50
+ "device": self.device,
51
+ "dataset_path": self.dataset_path,
52
+ "max_entries": self.max_entries,
53
+ "encoder_batch_size": self.encoder_batch_size,
54
+ "encoder_max_length": self.encoder_max_length,
55
+ "disable_web_search": self.disable_web_search,
56
+ "disable_world_knowledge": self.disable_world_knowledge,
57
+ }
58
+
59
+ def __str__(self) -> str:
60
+ """String representation of config"""
61
+ config_str = "RML Configuration:\n"
62
+ for key, value in self.to_dict().items():
63
+ config_str += f" {key}: {value}\n"
64
+ return config_str