Instructions to use DeepXR/Helion-V2.5-Rnd with libraries, inference providers, notebooks, and local apps. Follow these links to get started.

Libraries

How to use DeepXR/Helion-V2.5-Rnd with Transformers:

# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("text-generation", model="DeepXR/Helion-V2.5-Rnd", trust_remote_code=True)

# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("DeepXR/Helion-V2.5-Rnd", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained("DeepXR/Helion-V2.5-Rnd", trust_remote_code=True)

Notebooks
Google Colab
Kaggle
Local Apps

vLLM

How to use DeepXR/Helion-V2.5-Rnd with vLLM:

Install from pip and serve model

# Install vLLM from pip:
pip install vllm
# Start the vLLM server:
vllm serve "DeepXR/Helion-V2.5-Rnd"
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:8000/v1/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "DeepXR/Helion-V2.5-Rnd",
		"prompt": "Once upon a time,",
		"max_tokens": 512,
		"temperature": 0.5
	}'

Use Docker

docker model run hf.co/DeepXR/Helion-V2.5-Rnd

SGLang

How to use DeepXR/Helion-V2.5-Rnd with SGLang:

Install from pip and serve model

# Install SGLang from pip:
pip install sglang
# Start the SGLang server:
python3 -m sglang.launch_server \
    --model-path "DeepXR/Helion-V2.5-Rnd" \
    --host 0.0.0.0 \
    --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "DeepXR/Helion-V2.5-Rnd",
		"prompt": "Once upon a time,",
		"max_tokens": 512,
		"temperature": 0.5
	}'

Use Docker images

docker run --gpus all \
    --shm-size 32g \
    -p 30000:30000 \
    -v ~/.cache/huggingface:/root/.cache/huggingface \
    --env "HF_TOKEN=<secret>" \
    --ipc=host \
    lmsysorg/sglang:latest \
    python3 -m sglang.launch_server \
        --model-path "DeepXR/Helion-V2.5-Rnd" \
        --host 0.0.0.0 \
        --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "DeepXR/Helion-V2.5-Rnd",
		"prompt": "Once upon a time,",
		"max_tokens": 512,
		"temperature": 0.5
	}'

Docker Model Runner
How to use DeepXR/Helion-V2.5-Rnd with Docker Model Runner:
```
docker model run hf.co/DeepXR/Helion-V2.5-Rnd
```

Helion-V2.5-Rnd / docker-compose.yml

Trouter-Library

Create docker-compose.yml

ed61a86 verified 6 months ago

raw

history blame contribute delete

3.44 kB

	version: '3.8'

	services:
	helion-inference:
	build:
	context: .
	dockerfile: Dockerfile
	args:
	- CUDA_VERSION=12.1.1
	image: helion:2.5-rnd
	container_name: helion-inference-server

	deploy:
	resources:
	reservations:
	devices:
	- driver: nvidia
	count: all
	capabilities: [gpu]

	environment:
	- MODEL_PATH=/models/helion
	- MODEL_NAME=DeepXR/Helion-2.5-Rnd
	- PORT=8000
	- HOST=0.0.0.0
	- TENSOR_PARALLEL_SIZE=2
	- MAX_MODEL_LEN=131072
	- GPU_MEMORY_UTILIZATION=0.95
	- CUDA_VISIBLE_DEVICES=0,1
	- WORKERS=1

	# Optional: HuggingFace token for private models
	# - HF_TOKEN=your_token_here

	# Logging
	- LOG_LEVEL=info
	- LOG_FILE=/app/logs/helion.log

	volumes:
	- ./models:/models:ro
	- ./logs:/app/logs
	- ./cache:/app/cache
	- ./inference:/app/inference:ro
	- ./model_config.yaml:/app/model_config.yaml:ro

	ports:
	- "8000:8000"
	- "8001:8001" # Metrics
	- "8002:8002" # Admin

	healthcheck:
	test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
	interval: 30s
	timeout: 10s
	retries: 3
	start_period: 120s

	restart: unless-stopped

	networks:
	- helion-network

	logging:
	driver: "json-file"
	options:
	max-size: "100m"
	max-file: "10"

	prometheus:
	image: prom/prometheus:latest
	container_name: helion-prometheus

	volumes:
	- ./monitoring/prometheus.yml:/etc/prometheus/prometheus.yml:ro
	- prometheus-data:/prometheus

	ports:
	- "9090:9090"

	command:
	- '--config.file=/etc/prometheus/prometheus.yml'
	- '--storage.tsdb.path=/prometheus'
	- '--web.console.libraries=/usr/share/prometheus/console_libraries'
	- '--web.console.templates=/usr/share/prometheus/consoles'

	networks:
	- helion-network

	restart: unless-stopped

	grafana:
	image: grafana/grafana:latest
	container_name: helion-grafana

	environment:
	- GF_SECURITY_ADMIN_PASSWORD=admin
	- GF_USERS_ALLOW_SIGN_UP=false

	volumes:
	- grafana-data:/var/lib/grafana
	- ./monitoring/grafana/dashboards:/etc/grafana/provisioning/dashboards:ro
	- ./monitoring/grafana/datasources:/etc/grafana/provisioning/datasources:ro

	ports:
	- "3000:3000"

	networks:
	- helion-network

	depends_on:
	- prometheus

	restart: unless-stopped

	nginx:
	image: nginx:alpine
	container_name: helion-nginx

	volumes:
	- ./nginx/nginx.conf:/etc/nginx/nginx.conf:ro
	- ./nginx/ssl:/etc/nginx/ssl:ro

	ports:
	- "80:80"
	- "443:443"

	networks:
	- helion-network

	depends_on:
	- helion-inference

	restart: unless-stopped

	redis:
	image: redis:7-alpine
	container_name: helion-redis

	command: redis-server --appendonly yes

	volumes:
	- redis-data:/data

	ports:
	- "6379:6379"

	networks:
	- helion-network

	restart: unless-stopped

	networks:
	helion-network:
	driver: bridge
	ipam:
	config:
	- subnet: 172.28.0.0/16

	volumes:
	prometheus-data:
	driver: local
	grafana-data:
	driver: local
	redis-data:
	driver: local