TL;DR:
- Set up RTX 3090 eGPU hardware connection
- Install NVIDIA drivers and CUDA toolkit
- Configure rootless Docker with GPU support
- Run vLLM container with GPU acceleration
- Test inference with your preferred models
Prerequisites
Hardware Requirements
# Verify eGPU connectionlspci | grep NVIDIASystem Requirements
# Ubuntu/Debiansudo apt updatesudo apt install build-essential
# Docker (rootless)curl -fsSL https://get.docker.com/rootless | shexport PATH=$HOME/bin:$PATHexport DOCKER_HOST=unix://$XDG_RUNTIME_DIR/docker.sock
# NVIDIA driverssudo add-apt-repository ppa:graphics-drivers/ppasudo apt install nvidia-driver-470
# CUDA toolkitwget https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.runsudo ./cuda_11.8.0_520.61.05_linux.runNVIDIA Drivers
Installation
# Add NVIDIA PPAsudo add-apt-repository ppa:graphics-drivers/ppasudo apt update
# Install NVIDIA driversudo apt install nvidia-driver-470Verify Installation
nvidia-smiCUDA Toolkit
Installation
# Download CUDA 11.8wget https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run
# Make executable and run installerchmod +x cuda_11.8.0_520.61.05_linux.runsudo ./cuda_11.8.0_520.61.05_linux.runVerify Installation
nvcc --versionRootless Docker
Installation
# Install rootless Dockercurl -fsSL https://get.docker.com/rootless | shexport PATH=$HOME/bin:$PATHexport DOCKER_HOST=unix://$XDG_RUNTIME_DIR/docker.sock
# Add to shell profileecho 'export PATH=$HOME/bin:$PATH' >> ~/.bashrcecho 'export DOCKER_HOST=unix://$XDG_RUNTIME_DIR/docker.sock' >> ~/.bashrc
# Start Docker daemonsystemctl --user start dockerNVIDIA Container Toolkit Setup
# Install NVIDIA Container Toolkitdistribution=$(. /etc/os-release;echo $ID$VERSION_ID)curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpgcurl -s -L https://nvidia.github.io/libnvidia-container/experimental/$distribution/libnvidia-container.list | sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
sudo apt-get updatesudo apt-get install -y nvidia-container-toolkit
# Configure for rootless Dockersudo nvidia-ctk runtime configure --runtime=docker --no-cgroupssudo nvidia-ctk runtime configure --runtime=docker --config=$HOME/.config/docker/daemon.jsonsystemctl --user restart dockerTest GPU Access
# Test NVIDIA Container Toolkitdocker run --rm --gpus all nvidia/cuda:11.8-base-ubuntu20.04 nvidia-smivLLM
Docker Installation
# Pull vLLM imagedocker pull vllm/vllm-openai:latest
# Run vLLM serverdocker run --gpus all --shm-size 1g \ -p 8000:8000 \ -v $HOME/.cache/huggingface:/root/.cache/huggingface \ vllm/vllm-openai:latest \ --model microsoft/DialoGPT-medium \ --gpu-memory-utilization 0.9Native Installation
# Install Python environmentsudo apt install python3.8 python3.8-venvpython3 -m venv vllm-envsource vllm-env/bin/activate
# Install vLLMpip install vllmpip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118Configuration
# Environment variablesexport CUDA_VISIBLE_DEVICES=1export PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512Basic Usage
# Test API endpointcurl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ -d '{ "model": "microsoft/DialoGPT-medium", "messages": [{"role": "user", "content": "Hello"}], "max_tokens": 100 }'Performance Monitoring
# Monitor GPU usagewatch -n 1 nvidia-smi