Hugging Face Model Card for mlx-community/DeepSeek-V3.1-4bit
DeepSeek-V3.1-4bit (MLX Optimized)
A 4-bit quantized version of DeepSeek-V3.1 optimized for Apple Silicon via MLX, providing efficient inference on Mac devices.
Model Details
- Architecture: DeepSeek-V3.1 Transformer
- Parameters: ~236B (4-bit quantized)
- Context Length: 128K tokens
- Quantization: 4-bit (optimized for MLX)
- Framework: MLX (Apple Silicon optimized)
- Languages: Multilingual
Usage
Installation
pip install mlx huggingface-hub transformers numpy
Download and Workflow Scripts
Here's a complete workflow for downloading and working with this model:
# complete_deepseek_workflow.py
"""
Complete workflow for DeepSeek-V3.1-4bit model download and analysis
Includes: download_deepseek_v3_4bit.py, analyze_model.py, version_check.py, validate_model.py, test_model.py
"""
import argparse
import os
import json
import logging
import shutil
import sys
from datetime import datetime
from pathlib import Path
from huggingface_hub import snapshot_download, hf_hub_download, HfApi, ModelCard
import torch
import mlx.core as mx
import mlx.nn as nn
from transformers import AutoConfig, AutoTokenizer
import numpy as np
import importlib.metadata
import time
# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
def download_model(args):
"""Download the model with specified options."""
repo_id = "mlx-community/DeepSeek-V3.1-4bit"
logger.info("=" * 70)
logger.info("🤗 MLX DeepSeek-V3.1-4bit Model Downloader")
logger.info("=" * 70)
download_kwargs = {
"repo_id": repo_id,
"revision": args.revision,
"local_dir": args.output_dir,
"local_dir_use_symlinks": False,
"resume_download": True,
"force_download": args.force_download,
}
if args.allow_patterns:
download_kwargs["allow_patterns"] = args.allow_patterns
if args.ignore_patterns:
download_kwargs["ignore_patterns"] = args.ignore_patterns
try:
logger.info(f"🚀 Starting download of {repo_id}")
logger.info(f"📁 Output directory: {args.output_dir}")
logger.info(f"🔖 Revision: {args.revision}")
logger.info(f"💾 Cache dir: {args.cache_dir}")
if args.cache_dir:
download_kwargs["cache_dir"] = args.cache_dir
# Download the model
model_path = snapshot_download(**download_kwargs)
logger.info(f"✅ Download completed successfully!")
logger.info(f"📦 Model saved to: {model_path}")
return model_path
except Exception as e:
logger.error(f"❌ Download failed: {e}")
raise
def analyze_model(model_path):
"""Comprehensive model analysis"""
logger.info("=" * 60)
logger.info("🔍 DeepSeek-V3.1-4bit Comprehensive Analysis")
logger.info("=" * 60)
# Load config
try:
config = AutoConfig.from_pretrained(model_path)
logger.info("📊 Model Configuration:")
logger.info(f" Architecture: {config.architectures[0] if config.architectures else 'N/A'}")
logger.info(f" Model type: {getattr(config, 'model_type', 'N/A')}")
logger.info(f" Vocab size: {getattr(config, 'vocab_size', 'N/A'):,}")
logger.info(f" Hidden size: {getattr(config, 'hidden_size', 'N/A')}")
logger.info(f" Num hidden layers: {getattr(config, 'num_hidden_layers', 'N/A')}")
logger.info(f" Num attention heads: {getattr(config, 'num_attention_heads', 'N/A')}")
logger.info(f" Max position embeddings: {getattr(config, 'max_position_embeddings', 'N/A')}")
logger.info(f" Context length: {getattr(config, 'max_position_embeddings', 'N/A')}")
# DeepSeek specific config
if hasattr(config, 'rope_theta'):
logger.info(f" RoPE theta: {config.rope_theta}")
if hasattr(config, 'rms_norm_eps'):
logger.info(f" RMS norm eps: {config.rms_norm_eps}")
except Exception as e:
logger.error(f"❌ Failed to load config: {e}")
return
# Load tokenizer
try:
tokenizer = AutoTokenizer.from_pretrained(model_path)
logger.info("\n🔤 Tokenizer Analysis:")
logger.info(f" Vocabulary size: {tokenizer.vocab_size:,}")
logger.info(f" Special tokens: {len(tokenizer.special_tokens_map)}")
logger.info(f" Padding token: {tokenizer.pad_token}")
logger.info(f" EOS token: {tokenizer.eos_token}")
logger.info(f" BOS token: {tokenizer.bos_token}")
# Check for special tokens
special_tokens = getattr(tokenizer, 'special_tokens_map', {})
for key, value in special_tokens.items():
logger.info(f" {key}: {value}")
except Exception as e:
logger.error(f"❌ Failed to load tokenizer: {e}")
return
# Test various prompts
test_prompts = [
"The capital of France is",
"Artificial intelligence is",
"The future of machine learning will",
"Once upon a time",
"import numpy as np",
"量子コンピューティングとは", # Japanese
"El aprendizaje automático es", # Spanish
"机器学习是", # Chinese
"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\nHello!<|im_end|>\n<|im_start|>assistant",
"def fibonacci(n):",
"The quick brown fox jumps over the lazy dog"
]
logger.info("\n🧪 Tokenization Examples:")
for prompt in test_prompts:
try:
tokens = tokenizer.encode(prompt)
decoded = tokenizer.decode(tokens[:10]) + ("..." if len(tokens) > 10 else "")
logger.info(f" '{prompt[:30]}{'...' if len(prompt) > 30 else ''}'")
logger.info(f" → {len(tokens)} tokens: {tokens[:10]}{'...' if len(tokens) > 10 else ''}")
logger.info(f" → decoded: {decoded}")
except Exception as e:
logger.warning(f" Failed to tokenize: {prompt[:30]} - {e}")
# Check model files
model_dir = Path(model_path)
model_files = list(model_dir.glob("*.safetensors")) + list(model_dir.glob("*.npz")) + list(model_dir.glob("*.gguf"))
logger.info(f"\n📦 Model Files: {len(model_files)} weight files")
for file in model_files:
size_mb = file.stat().st_size / (1024 * 1024)
logger.info(f" {file.name} ({size_mb:.1f} MB)")
# Estimate memory requirements
total_params = 236_000_000_000 # 236B parameters
param_size = 0.5 # bytes per parameter for 4-bit quantization
total_memory_gb = (total_params * param_size) / (1024 ** 3)
logger.info("\n💾 Memory Requirements (Estimated):")
logger.info(f" Model size (4-bit): ~{total_memory_gb:.1f} GB")
logger.info(f" Inference RAM: ~{total_memory_gb * 1.5:.1f} GB+ (for 128K context)")
logger.info(f" GPU VRAM: ~{total_memory_gb:.1f} GB+ (recommended)")
return config, tokenizer
def check_system_requirements():
"""Check system requirements for running the model"""
logger.info("=" * 50)
logger.info("🖥️ System Requirements Check")
logger.info("=" * 50)
# Check Python version
python_version = sys.version.split()[0]
logger.info(f"✅ Python: {python_version}")
# Check required packages
required_packages = [
("transformers", "4.35.0"),
("huggingface-hub", "0.16.0"),
("mlx", "0.0.6"),
("numpy", "1.21.0"),
("torch", "2.0.0"),
]
for package, min_version in required_packages:
try:
version = importlib.metadata.version(package)
status = "✅"
if version < min_version:
status = "⚠️"
logger.info(f"{status} {package}: {version} {'(>= ' + min_version + ')' if min_version else ''}")
except importlib.metadata.PackageNotFoundError:
logger.error(f"❌ {package}: Not installed")
# Check optional packages
logger.info("\n📦 Optional Packages:")
optional_packages = [
"accelerate",
"safetensors",
"tokenizers",
]
for package in optional_packages:
try:
version = importlib.metadata.version(package)
logger.info(f"✅ {package}: {version}")
except importlib.metadata.PackageNotFoundError:
logger.info(f"⚠️ {package}: Not installed")
def validate_model(model_path, full_validation=False):
"""Validate the model"""
logger.info("=" * 60)
logger.info("✅ DeepSeek-V3.1-4bit Model Validation")
logger.info("=" * 60)
# Validate config
logger.info("📋 Validating model configuration...")
try:
config = AutoConfig.from_pretrained(model_path)
required_fields = ['vocab_size', 'hidden_size', 'num_hidden_layers', 'num_attention_heads', 'max_position_embeddings']
config_valid = all(hasattr(config, field) for field in required_fields)
logger.info(f" Configuration: {'✅ PASS' if config_valid else '❌ FAIL'}")
except Exception as e:
logger.error(f"❌ Config validation failed: {e}")
config_valid = False
# Validate tokenizer
logger.info("🔤 Validating tokenizer...")
try:
tokenizer = AutoTokenizer.from_pretrained(model_path)
test_texts = ["Hello world", "The quick brown fox", "こんにちは世界", "机器学习"]
for text in test_texts:
tokens = tokenizer.encode(text)
decoded = tokenizer.decode(tokens)
logger.info(" Tokenizer: ✅ PASS")
tokenizer_valid = True
except Exception as e:
logger.error(f"❌ Tokenizer validation failed: {e}")
tokenizer_valid = False
# Validate weights if requested
weight_valid = True
if full_validation:
logger.info("⚖️ Validating model weights...")
weight_files = []
for pattern in ["*.npz", "*.safetensors", "*.gguf"]:
weight_files.extend(Path(model_path).glob(pattern))
if not weight_files:
logger.error("❌ No weight files found")
weight_valid = False
else:
for weight_file in weight_files:
try:
if weight_file.suffix == '.npz':
weights = mx.load(str(weight_file))
logger.info(f"✅ {weight_file.name}: Loaded {len(weights)} arrays")
else:
logger.info(f"⚠️ {weight_file.name}: Format not automatically validated")
except Exception as e:
logger.error(f"❌ Failed to validate {weight_file.name}: {e}")
weight_valid = False
# Summary
logger.info("\n📊 Validation Summary:")
logger.info(f" Configuration: {'✅ PASS' if config_valid else '❌ FAIL'}")
logger.info(f" Tokenizer: {'✅ PASS' if tokenizer_valid else '❌ FAIL'}")
if full_validation:
logger.info(f" Weights: {'✅ PASS' if weight_valid else '❌ FAIL'}")
if all([config_valid, tokenizer_valid, (not full_validation or weight_valid)]):
logger.info("🎉 Model validation PASSED!")
return True
else:
logger.error("❌ Model validation FAILED!")
return False
def test_model(model_path, quick_test=False):
"""Test the model"""
logger.info("=" * 60)
logger.info("🧪 DeepSeek-V3.1-4bit Model Testing")
logger.info("=" * 60)
# Test tokenizer
logger.info("🔤 Testing tokenizer...")
try:
tokenizer = AutoTokenizer.from_pretrained(model_path)
test_cases = [
("Short text", "Hello world!"),
("Code", "def factorial(n):\n if n == 0:\n return 1\n return n * factorial(n-1)"),
("Multilingual", "こんにちは世界! Hello world! ¡Hola mundo!"),
]
for name, text in test_cases:
start_time = time.time()
tokens = tokenizer.encode(text)
end_time = time.time()
logger.info(f"✅ {name}: {len(tokens)} tokens, {end_time-start_time:.3f}s")
tokenizer_ok = True
except Exception as e:
logger.error(f"❌ Tokenizer test failed: {e}")
tokenizer_ok = False
# Test weights loading if not quick test
weights_ok = True
if not quick_test:
logger.info("⚖️ Testing weights loading...")
weight_files = []
for pattern in ["*.npz", "*.safetensors"]:
weight_files.extend([f for f in Path(model_path).glob(pattern)])
if not weight_files:
logger.error("❌ No weight files found for testing")
weights_ok = False
else:
for weight_file in weight_files:
try:
start_time = time.time()
weights = mx.load(str(weight_file))
load_time = time.time() - start_time
logger.info(f"✅ {weight_file.name}: Loaded in {load_time:.2f}s")
except Exception as e:
logger.error(f"❌ Failed to load {weight_file.name}: {e}")
weights_ok = False
# Summary
logger.info("\n📊 Test Summary:")
logger.info(f" Tokenizer: {'✅ PASS' if tokenizer_ok else '❌ FAIL'}")
if not quick_test:
logger.info(f" Weights: {'✅ PASS' if weights_ok else '❌ FAIL'}")
if all([tokenizer_ok, (quick_test or weights_ok)]):
logger.info("🎉 All tests PASSED! The model is ready for use.")
return True
else:
logger.error("❌ Some tests FAILED!")
return False
def main():
parser = argparse.ArgumentParser(description="Complete workflow for DeepSeek-V3.1-4bit model")
subparsers = parser.add_subparsers(dest='command', help='Command to run')
# Download command
download_parser = subparsers.add_parser('download', help='Download the model')
download_parser.add_argument("--output-dir", type=str, default="./deepseek_v3_4bit",
help="Directory to save the model")
download_parser.add_argument("--cache-dir", type=str, default="./hf_cache",
help="Cache directory for Hugging Face")
download_parser.add_argument("--revision", type=str, default="main",
help="Model revision/branch to download")
download_parser.add_argument("--force-download", action="store_true",
help="Force re-download even if files exist")
download_parser.add_argument("--allow-patterns", nargs="+",
help="Only download files matching these patterns")
download_parser.add_argument("--ignore-patterns", nargs="+",
default=["*.h5", "*.ot", "*.msgpack", "*.tflite"],
help="Skip files matching these patterns")
# Analyze command
analyze_parser = subparsers.add_parser('analyze', help='Analyze the model')
analyze_parser.add_argument("--model-path", type=str, default="./deepseek_v3_4bit",
help="Path to the downloaded model")
# Check command
check_parser = subparsers.add_parser('check', help='Check system requirements')
# Validate command
validate_parser = subparsers.add_parser('validate', help='Validate the model')
validate_parser.add_argument("--model-path", type=str, default="./deepseek_v3_4bit",
help="Path to the downloaded model")
validate_parser.add_argument("--full", action="store_true",
help="Run full validation including weight loading")
# Test command
test_parser = subparsers.add_parser('test', help='Test the model')
test_parser.add_argument("--model-path", type=str, default="./deepseek_v3_4bit",
help="Path to the downloaded model")
test_parser.add_argument("--quick", action="store_true",
help="Run quick tests only (skip weight loading)")
args = parser.parse_args()
if args.command == 'download':
os.makedirs(args.output_dir, exist_ok=True)
try:
model_path = download_model(args)
logger.info("🎉 Model download completed successfully!")
logger.info(f"📁 Model available at: {model_path}")
except Exception as e:
logger.error(f"❌ Download failed: {e}")
return 1
elif args.command == 'analyze':
if not os.path.exists(args.model_path):
logger.error(f"Model path does not exist: {args.model_path}")
return 1
analyze_model(args.model_path)
elif args.command == 'check':
check_system_requirements()
logger.info("\n💡 Recommendations:")
logger.info(" - Ensure you have at least 40GB free disk space")
logger.info(" - For inference, recommend 64GB+ RAM")
logger.info(" - Use Apple Silicon (M1/M2/M3) for best performance")
elif args.command == 'validate':
if not os.path.exists(args.model_path):
logger.error(f"Model path does not exist: {args.model_path}")
return 1
validate_model(args.model_path, args.full)
elif args.command == 'test':
if not os.path.exists(args.model_path):
logger.error(f"Model path does not exist: {args.model_path}")
return 1
test_model(args.model_path, args.quick)
else:
parser.print_help()
return 0
if __name__ == "__main__":
exit(main())
Usage Examples
- Download the model:
python complete_deepseek_workflow.py download --output-dir ./deepseek_v3_4bit
- Analyze the model:
python complete_deepseek_workflow.py analyze --model-path ./deepseek_v3_4bit
- Check system requirements:
python complete_deepseek_workflow.py check
- Validate the model:
python complete_deepseek_workflow.py validate --model-path ./deepseek_v3_4bit --full
- Test the model:
python complete_deepseek_workflow.py test --model-path ./deepseek_v3_4bit
Hardware Requirements
- RAM: 64GB+ recommended for inference
- Storage: 40GB+ free space
- GPU: Apple Silicon (M1/M2/M3) recommended for best performance
Limitations
- Optimized for Apple Silicon via MLX
- 4-bit quantization may slightly reduce output quality compared to full precision
- Requires specific MLX-compatible implementation for inference
Citation
@misc{deepseek2025,
title={DeepSeek-V3.1-4bit},
author={DeepSeek Team},
year={2025},
howpublished={\url{https://huggingface.co/mlx-community/DeepSeek-V3.1-4bit}},
}
License
This model is subject to the original DeepSeek model license. Please refer to the model repository for specific license details.
Model tree for TroglodyteDerivations/MLX_DeepSeek_V3_1_4bit
Base model
deepseek-ai/DeepSeek-V3.1-Base
Quantized
deepseek-ai/DeepSeek-V3.1