## 4. validate_model.py

#```python
#!/usr/bin/env python3
"""
Validation script for DeepSeek-V3.1-4bit model
"""

import argparse
import logging
import json
from pathlib import Path
from transformers import AutoTokenizer, AutoConfig
import mlx.core as mx

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

def validate_config(model_path):
    """Validate model configuration"""
    logger.info("📋 Validating model configuration...")
    
    try:
        config = AutoConfig.from_pretrained(model_path)
        
        # Check required config fields
        required_fields = [
            'vocab_size', 'hidden_size', 'num_hidden_layers',
            'num_attention_heads', 'max_position_embeddings'
        ]
        
        valid = True
        for field in required_fields:
            if not hasattr(config, field):
                logger.error(f"❌ Missing config field: {field}")
                valid = False
            else:
                logger.info(f"✅ {field}: {getattr(config, field)}")
        
        # DeepSeek specific checks
        if hasattr(config, 'rope_theta'):
            logger.info(f"✅ rope_theta: {config.rope_theta}")
        
        return valid
        
    except Exception as e:
        logger.error(f"❌ Config validation failed: {e}")
        return False

def validate_tokenizer(model_path):
    """Validate tokenizer"""
    logger.info("\n🔤 Validating tokenizer...")
    
    try:
        tokenizer = AutoTokenizer.from_pretrained(model_path)
        
        # Test basic tokenization
        test_texts = [
            "Hello world",
            "The quick brown fox",
            "こんにちは世界",  # Japanese
            "机器学习",        # Chinese
        ]
        
        for text in test_texts:
            tokens = tokenizer.encode(text)
            decoded = tokenizer.decode(tokens)
            logger.info(f"✅ '{text}' → {len(tokens)} tokens → '{decoded}'")
        
        # Check special tokens
        special_tokens = ['pad_token', 'eos_token', 'bos_token']
        for token_name in special_tokens:
            token = getattr(tokenizer, token_name, None)
            if token:
                logger.info(f"✅ {token_name}: {token}")
            else:
                logger.warning(f"⚠️ {token_name}: Not set")
        
        return True
        
    except Exception as e:
        logger.error(f"❌ Tokenizer validation failed: {e}")
        return False

def validate_weights(model_path):
    """Validate model weights"""
    logger.info("\n⚖️ Validating model weights...")
    
    weight_files = []
    for pattern in ["*.npz", "*.safetensors", "*.gguf"]:
        weight_files.extend(Path(model_path).glob(pattern))
    
    if not weight_files:
        logger.error("❌ No weight files found")
        return False
    
    valid = True
    for weight_file in weight_files:
        try:
            if weight_file.suffix == '.npz':
                # Try to load NPZ file
                weights = mx.load(str(weight_file))
                logger.info(f"✅ {weight_file.name}: Loaded {len(weights)} arrays")
                
                # Check some key weights
                key_layers = ['layers.0', 'model.embed_tokens', 'lm_head']
                found_keys = 0
                for key in weights.keys():
                    if any(layer in key for layer in key_layers):
                        found_keys += 1
                        logger.info(f"   ✅ Found: {key} (shape: {weights[key].shape})")
                
                if found_keys == 0:
                    logger.warning(f"⚠️ No expected layer keys found in {weight_file.name}")
                
            else:
                logger.info(f"⚠️ {weight_file.name}: Format not automatically validated")
                
        except Exception as e:
            logger.error(f"❌ Failed to validate {weight_file.name}: {e}")
            valid = False
    
    return valid

def main():
    parser = argparse.ArgumentParser(description="Validate DeepSeek-V3.1-4bit model")
    parser.add_argument("--model-path", type=str, default="./deepseek_v3_4bit",
                       help="Path to the downloaded model")
    parser.add_argument("--full", action="store_true",
                       help="Run full validation including weight loading")
    
    args = parser.parse_args()
    
    model_path = Path(args.model_path)
    if not model_path.exists():
        logger.error(f"Model path does not exist: {model_path}")
        return 1
    
    logger.info("=" * 60)
    logger.info("✅ DeepSeek-V3.1-4bit Model Validation")
    logger.info("=" * 60)
    
    # Run validations
    config_valid = validate_config(args.model_path)
    tokenizer_valid = validate_tokenizer(args.model_path)
    
    weight_valid = True
    if args.full:
        weight_valid = validate_weights(args.model_path)
    
    # Summary
    logger.info("\n" + "=" * 60)
    logger.info("📊 Validation Summary:")
    logger.info(f"   Configuration: {'✅ PASS' if config_valid else '❌ FAIL'}")
    logger.info(f"   Tokenizer: {'✅ PASS' if tokenizer_valid else '❌ FAIL'}")
    if args.full:
        logger.info(f"   Weights: {'✅ PASS' if weight_valid else '❌ FAIL'}")
    
    if all([config_valid, tokenizer_valid, (not args.full or weight_valid)]):
        logger.info("🎉 Model validation PASSED!")
        return 0
    else:
        logger.error("❌ Model validation FAILED!")
        return 1

if __name__ == "__main__":
    exit(main())
#```