Run ML training and inference on A100 and H100 GPUs. Use Python execution for scripts or Docker for custom environments.
PyTorch Training on GPU
Train a model using the CLI with GPU acceleration:Copy
lyceum python run train.py -m gpu.a100 -r requirements.txt
Copy
# train.py
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
if torch.cuda.is_available():
print(f"GPU: {torch.cuda.get_device_name(0)}")
# Load MNIST
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])
train_data = datasets.MNIST('./data', train=True, download=True, transform=transform)
test_data = datasets.MNIST('./data', train=False, transform=transform)
train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
test_loader = DataLoader(test_data, batch_size=1000)
# Define model
model = nn.Sequential(
nn.Flatten(),
nn.Linear(28*28, 512),
nn.ReLU(),
nn.Dropout(0.3),
nn.Linear(512, 10)
).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
# Train
for epoch in range(5):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
loss = criterion(model(data), target)
loss.backward()
optimizer.step()
# Evaluate
model.eval()
correct = 0
with torch.no_grad():
for data, target in test_loader:
data, target = data.to(device), target.to(device)
correct += (model(data).argmax(1) == target).sum().item()
accuracy = 100. * correct / len(test_data)
print(f"Epoch {epoch+1}: Test accuracy {accuracy:.1f}%")
# Save model to storage
torch.save(model.state_dict(), '/lyceum/storage/model.pth')
print("Model saved to storage")
Docker-based ML Training
Use a custom Docker image with pre-built dependencies:Copy
lyceum docker run pytorch/pytorch:2.6.0-cuda12.4-cudnn9-runtime \
-m a100 \
-c "python /lyceum/storage/train.py" \
-t 3600
Copy
curl -X POST https://api.lyceum.technology/api/v2/external/execution/image/start \
-H "Authorization: Bearer <TOKEN>" \
-H "Content-Type: application/json" \
-d '{
"docker_image_ref": "pytorch/pytorch:2.6.0-cuda12.4-cudnn9-runtime",
"docker_run_cmd": ["python", "/lyceum/storage/train.py"],
"execution_type": "gpu",
"timeout": 3600
}'
Model Inference
Use the built-in inference service to deploy Hugging Face models:Copy
# Deploy a model
lyceum infer deploy meta-llama/Llama-3-8B --hf-token <HF_TOKEN>
# Chat with the deployed model
lyceum infer chat -p "Explain gradient descent in one paragraph" -m meta-llama/Llama-3-8B
# Process an image
lyceum infer chat -p "Describe this image" -i photo.jpg
# Batch inference from JSONL
lyceum infer chat -b prompts.jsonl -m meta-llama/Llama-3-8B
# Spin down when done
lyceum infer spindown meta-llama/Llama-3-8B
Use
execution_type: "gpu" or -m gpu.a100 / -m gpu.h100 for ML workloads. Most frameworks automatically use GPU when available.GPU instances cost more than CPU. Use CPU for preprocessing and testing, GPU for actual training and inference.

