| { | |
| "_name_or_path": "/covalent/.cache/models/tmp5hs_y_ey2yjwq_ff", | |
| "activation_function": "gelu_new", | |
| "all_reduce_scores": { | |
| "0": "NON_PARTICIPATING", | |
| "1": "NON_PARTICIPATING", | |
| "10": "SUCCESS", | |
| "100": "NON_PARTICIPATING", | |
| "101": "NON_PARTICIPATING", | |
| "102": "NON_PARTICIPATING", | |
| "103": "NON_PARTICIPATING", | |
| "104": "NON_PARTICIPATING", | |
| "105": "NON_PARTICIPATING", | |
| "106": "NON_PARTICIPATING", | |
| "107": "NON_PARTICIPATING", | |
| "108": "NON_PARTICIPATING", | |
| "109": "NON_PARTICIPATING", | |
| "11": "NON_PARTICIPATING", | |
| "110": "SUCCESS", | |
| "111": "NON_PARTICIPATING", | |
| "112": "NON_PARTICIPATING", | |
| "113": "NON_PARTICIPATING", | |
| "114": "SUCCESS", | |
| "115": "SUCCESS", | |
| "116": "NON_PARTICIPATING", | |
| "117": "NON_PARTICIPATING", | |
| "118": "SUCCESS", | |
| "119": "NON_PARTICIPATING", | |
| "12": "SUCCESS", | |
| "120": "NON_PARTICIPATING", | |
| "121": "SUCCESS", | |
| "122": "NON_PARTICIPATING", | |
| "123": "NON_PARTICIPATING", | |
| "124": "SUCCESS", | |
| "125": "NON_PARTICIPATING", | |
| "126": "NON_PARTICIPATING", | |
| "127": "NON_PARTICIPATING", | |
| "128": "NON_PARTICIPATING", | |
| "129": "NON_PARTICIPATING", | |
| "13": "SUCCESS", | |
| "130": "NON_PARTICIPATING", | |
| "131": "NON_PARTICIPATING", | |
| "132": "NON_PARTICIPATING", | |
| "133": "NON_PARTICIPATING", | |
| "134": "SUCCESS", | |
| "135": "NON_PARTICIPATING", | |
| "136": "NON_PARTICIPATING", | |
| "137": "NON_PARTICIPATING", | |
| "138": "NON_PARTICIPATING", | |
| "139": "NON_PARTICIPATING", | |
| "14": "SUCCESS", | |
| "140": "SUCCESS", | |
| "141": "SUCCESS", | |
| "142": "SUCCESS", | |
| "143": "SUCCESS", | |
| "144": "NON_PARTICIPATING", | |
| "145": "NON_PARTICIPATING", | |
| "146": "NON_PARTICIPATING", | |
| "147": "SUCCESS", | |
| "148": "SUCCESS", | |
| "149": "NON_PARTICIPATING", | |
| "15": "NON_PARTICIPATING", | |
| "150": "NON_PARTICIPATING", | |
| "151": "NON_PARTICIPATING", | |
| "152": "NON_PARTICIPATING", | |
| "153": "NON_PARTICIPATING", | |
| "154": "SUCCESS", | |
| "155": "NON_PARTICIPATING", | |
| "156": "NON_PARTICIPATING", | |
| "157": "NON_PARTICIPATING", | |
| "158": "SUCCESS", | |
| "159": "SUCCESS", | |
| "16": "NON_PARTICIPATING", | |
| "160": "NON_PARTICIPATING", | |
| "161": "NON_PARTICIPATING", | |
| "162": "NON_PARTICIPATING", | |
| "163": "NON_PARTICIPATING", | |
| "164": "SUCCESS", | |
| "165": "NON_PARTICIPATING", | |
| "166": "NON_PARTICIPATING", | |
| "167": "SUCCESS", | |
| "168": "SUCCESS", | |
| "169": "NON_PARTICIPATING", | |
| "17": "NON_PARTICIPATING", | |
| "170": "NON_PARTICIPATING", | |
| "171": "NON_PARTICIPATING", | |
| "172": "NON_PARTICIPATING", | |
| "173": "NON_PARTICIPATING", | |
| "174": "NON_PARTICIPATING", | |
| "175": "NON_PARTICIPATING", | |
| "176": "NON_PARTICIPATING", | |
| "177": "NON_PARTICIPATING", | |
| "178": "NON_PARTICIPATING", | |
| "179": "SUCCESS", | |
| "18": "NON_PARTICIPATING", | |
| "180": "NON_PARTICIPATING", | |
| "181": "NON_PARTICIPATING", | |
| "182": "NON_PARTICIPATING", | |
| "183": "NON_PARTICIPATING", | |
| "184": "NON_PARTICIPATING", | |
| "185": "NON_PARTICIPATING", | |
| "186": "NON_PARTICIPATING", | |
| "187": "NON_PARTICIPATING", | |
| "188": "NON_PARTICIPATING", | |
| "189": "NON_PARTICIPATING", | |
| "19": "NON_PARTICIPATING", | |
| "190": "NON_PARTICIPATING", | |
| "191": "NON_PARTICIPATING", | |
| "192": "SUCCESS", | |
| "193": "NON_PARTICIPATING", | |
| "194": "NON_PARTICIPATING", | |
| "195": "SUCCESS", | |
| "196": "NON_PARTICIPATING", | |
| "197": "NON_PARTICIPATING", | |
| "198": "NON_PARTICIPATING", | |
| "199": "NON_PARTICIPATING", | |
| "2": "SUCCESS", | |
| "20": "NON_PARTICIPATING", | |
| "200": "NON_PARTICIPATING", | |
| "201": "SUCCESS", | |
| "202": "NON_PARTICIPATING", | |
| "203": "NON_PARTICIPATING", | |
| "204": "NON_PARTICIPATING", | |
| "205": "NON_PARTICIPATING", | |
| "206": "NON_PARTICIPATING", | |
| "207": "NON_PARTICIPATING", | |
| "208": "SUCCESS", | |
| "209": "NON_PARTICIPATING", | |
| "21": "NON_PARTICIPATING", | |
| "210": "NON_PARTICIPATING", | |
| "211": "SUCCESS", | |
| "212": "NON_PARTICIPATING", | |
| "213": "NON_PARTICIPATING", | |
| "214": "NON_PARTICIPATING", | |
| "215": "NON_PARTICIPATING", | |
| "216": "NON_PARTICIPATING", | |
| "217": "NON_PARTICIPATING", | |
| "218": "NON_PARTICIPATING", | |
| "219": "NON_PARTICIPATING", | |
| "22": "NON_PARTICIPATING", | |
| "220": "NON_PARTICIPATING", | |
| "221": "NON_PARTICIPATING", | |
| "222": "NON_PARTICIPATING", | |
| "223": "NON_PARTICIPATING", | |
| "224": "SUCCESS", | |
| "225": "NON_PARTICIPATING", | |
| "226": "NON_PARTICIPATING", | |
| "227": "NON_PARTICIPATING", | |
| "228": "SUCCESS", | |
| "229": "SUCCESS", | |
| "23": "NON_PARTICIPATING", | |
| "230": "SUCCESS", | |
| "231": "NON_PARTICIPATING", | |
| "232": "NON_PARTICIPATING", | |
| "233": "NON_PARTICIPATING", | |
| "234": "SUCCESS", | |
| "235": "NON_PARTICIPATING", | |
| "236": "SUCCESS", | |
| "237": "SUCCESS", | |
| "238": "NON_PARTICIPATING", | |
| "239": "NON_PARTICIPATING", | |
| "24": "NON_PARTICIPATING", | |
| "240": "SUCCESS", | |
| "241": "SUCCESS", | |
| "242": "NON_PARTICIPATING", | |
| "243": "NON_PARTICIPATING", | |
| "244": "SUCCESS", | |
| "245": "SUCCESS", | |
| "246": "NON_PARTICIPATING", | |
| "247": "NON_PARTICIPATING", | |
| "248": "NON_PARTICIPATING", | |
| "249": "NON_PARTICIPATING", | |
| "25": "SUCCESS", | |
| "250": "NON_PARTICIPATING", | |
| "251": "NON_PARTICIPATING", | |
| "252": "SUCCESS", | |
| "253": "NON_PARTICIPATING", | |
| "254": "NON_PARTICIPATING", | |
| "255": "NON_PARTICIPATING", | |
| "26": "NON_PARTICIPATING", | |
| "27": "SUCCESS", | |
| "28": "NON_PARTICIPATING", | |
| "29": "NON_PARTICIPATING", | |
| "3": "NON_PARTICIPATING", | |
| "30": "NON_PARTICIPATING", | |
| "31": "NON_PARTICIPATING", | |
| "32": "NON_PARTICIPATING", | |
| "33": "NON_PARTICIPATING", | |
| "34": "NON_PARTICIPATING", | |
| "35": "NON_PARTICIPATING", | |
| "36": "NON_PARTICIPATING", | |
| "37": "SUCCESS", | |
| "38": "NON_PARTICIPATING", | |
| "39": "NON_PARTICIPATING", | |
| "4": "NON_PARTICIPATING", | |
| "40": "NON_PARTICIPATING", | |
| "41": "NON_PARTICIPATING", | |
| "42": "NON_PARTICIPATING", | |
| "43": "NON_PARTICIPATING", | |
| "44": "NON_PARTICIPATING", | |
| "45": "NON_PARTICIPATING", | |
| "46": "NON_PARTICIPATING", | |
| "47": "NON_PARTICIPATING", | |
| "48": "NON_PARTICIPATING", | |
| "49": "NON_PARTICIPATING", | |
| "5": "NON_PARTICIPATING", | |
| "50": "NON_PARTICIPATING", | |
| "51": "NON_PARTICIPATING", | |
| "52": "NON_PARTICIPATING", | |
| "53": "NON_PARTICIPATING", | |
| "54": "SUCCESS", | |
| "55": "NON_PARTICIPATING", | |
| "56": "NON_PARTICIPATING", | |
| "57": "NON_PARTICIPATING", | |
| "58": "NON_PARTICIPATING", | |
| "59": "NON_PARTICIPATING", | |
| "6": "NON_PARTICIPATING", | |
| "60": "NON_PARTICIPATING", | |
| "61": "NON_PARTICIPATING", | |
| "62": "NON_PARTICIPATING", | |
| "63": "SUCCESS", | |
| "64": "NON_PARTICIPATING", | |
| "65": "SUCCESS", | |
| "66": "SUCCESS", | |
| "67": "SUCCESS", | |
| "68": "NON_PARTICIPATING", | |
| "69": "NON_PARTICIPATING", | |
| "7": "NON_PARTICIPATING", | |
| "70": "NON_PARTICIPATING", | |
| "71": "NON_PARTICIPATING", | |
| "72": "NON_PARTICIPATING", | |
| "73": "NON_PARTICIPATING", | |
| "74": "SUCCESS", | |
| "75": "SUCCESS", | |
| "76": "NON_PARTICIPATING", | |
| "77": "SUCCESS", | |
| "78": "SUCCESS", | |
| "79": "NON_PARTICIPATING", | |
| "8": "NON_PARTICIPATING", | |
| "80": "SUCCESS", | |
| "81": "NON_PARTICIPATING", | |
| "82": "SUCCESS", | |
| "83": "NON_PARTICIPATING", | |
| "84": "SUCCESS", | |
| "85": "NON_PARTICIPATING", | |
| "86": "SUCCESS", | |
| "87": "NON_PARTICIPATING", | |
| "88": "SUCCESS", | |
| "89": "NON_PARTICIPATING", | |
| "9": "SUCCESS", | |
| "90": "NON_PARTICIPATING", | |
| "91": "NON_PARTICIPATING", | |
| "92": "NON_PARTICIPATING", | |
| "93": "SUCCESS", | |
| "94": "NON_PARTICIPATING", | |
| "95": "SUCCESS", | |
| "96": "SUCCESS", | |
| "97": "NON_PARTICIPATING", | |
| "98": "SUCCESS", | |
| "99": "NON_PARTICIPATING" | |
| }, | |
| "architectures": [ | |
| "GPTOptim" | |
| ], | |
| "attn_pdrop": 0.1, | |
| "auto_map": { | |
| "AutoConfig": "configuration_gpt_optimized.GPTOptimConfig", | |
| "AutoModelForCausalLM": "distributed/optimized-gpt2-500m--modeling_gpt_optimized.GPTOptim" | |
| }, | |
| "block_size": 1024, | |
| "bos_token_id": 50256, | |
| "embd_pdrop": 0.1, | |
| "eos_token_id": 50256, | |
| "initializer_range": 0.02, | |
| "layer_norm_epsilon": 1e-05, | |
| "model_type": "gpt_optimized", | |
| "n_embd": 1280, | |
| "n_head": 32, | |
| "n_inner": null, | |
| "n_layer": 48, | |
| "n_positions": 1024, | |
| "quantization_config": { | |
| "_load_in_4bit": false, | |
| "_load_in_8bit": true, | |
| "bnb_4bit_compute_dtype": "bfloat16", | |
| "bnb_4bit_quant_storage": "uint8", | |
| "bnb_4bit_quant_type": "fp4", | |
| "bnb_4bit_use_double_quant": false, | |
| "llm_int8_enable_fp32_cpu_offload": false, | |
| "llm_int8_has_fp16_weight": false, | |
| "llm_int8_skip_modules": [ | |
| "lm_head" | |
| ], | |
| "llm_int8_threshold": 6.0, | |
| "load_in_4bit": false, | |
| "load_in_8bit": true, | |
| "quant_method": "bitsandbytes" | |
| }, | |
| "reorder_and_upcast_attn": false, | |
| "resid_pdrop": 0.1, | |
| "scale_attn_by_inverse_layer_idx": false, | |
| "scale_attn_weights": true, | |
| "summary_activation": null, | |
| "summary_first_dropout": 0.1, | |
| "summary_proj_to_labels": true, | |
| "summary_type": "cls_index", | |
| "summary_use_proj": true, | |
| "torch_dtype": "float16", | |
| "transformers_version": "4.46.2", | |
| "use_cache": true, | |
| "vocab_size": 50257, | |
| "api_key": null | |
| } |