Remote & on-site IT support across Australia & New Zealand · 24/7 emergency line

Configure Proxmox Ceph Cluster for Talos Kubernetes

  1. Go to your first Proxmox Node, select Ceph > Configure
  2. Go to Ceph > Monitor > Create > select 2nd Host > Create > repeat for all other hosts
  3. Go to Ceph > Manager > Create > select 2nd Host > Create > repeat for all other hosts
  4. Go to Ceph > OSD > Create: OSD > select unused disk > Create > repeat for all other disks
  5. Go to Ceph > CephFS > Create > select host > accept or enter MDS ID (pve1-mds) > Create > repeat for all hosts
  6. Go to Ceph > CephFS > Create CephFS > give it a name (cephfs-storage) > Create
Setting Up Ceph for Kubernetes Access
Generate Required Ceph Users and Keyrings
# Create admin user for Rook with full cluster management capabilities
ceph auth get-or-create client.rook-ceph-op 
  mon 'profile rbd, allow r' 
  osd 'profile rbd' 
  mgr 'allow rw' 
  -o /tmp/rook-ceph-op.keyring

# Create user for CSI RBD provisioner
ceph auth get-or-create client.rook-csi-rbd-provisioner 
  mon 'profile rbd' 
  osd 'profile rbd' 
  -o /tmp/rook-csi-rbd-provisioner.keyring

# Create user for CSI RBD node plugin
ceph auth get-or-create client.rook-csi-rbd-node 
  mon 'profile rbd' 
  osd 'profile rbd' 
  -o /tmp/rook-csi-rbd-node.keyring

# Create user for CSI CephFS provisioner
ceph auth get-or-create client.rook-csi-cephfs-provisioner 
  mon 'allow r' 
  mgr 'allow rw' 
  osd 'allow rw tag cephfs metadata=*, allow rw tag cephfs data=*' 
  mds 'allow rw' 
  -o /tmp/rook-csi-cephfs-provisioner.keyring

# Create user for CSI CephFS node plugin
ceph auth get-or-create client.rook-csi-cephfs-node 
  mon 'allow r' 
  osd 'allow rw tag cephfs *=*' 
  mds 'allow rw' 
  -o /tmp/rook-csi-cephfs-node.keyring
Extract Keyring Values
# Extract key for each user
ROOK_OP_KEY=$(ceph auth get-key client.rook-ceph-op)
RBD_PROV_KEY=$(ceph auth get-key client.rook-csi-rbd-provisioner)
RBD_NODE_KEY=$(ceph auth get-key client.rook-csi-rbd-node)
CEPHFS_PROV_KEY=$(ceph auth get-key client.rook-csi-cephfs-provisioner)
CEPHFS_NODE_KEY=$(ceph auth get-key client.rook-csi-cephfs-node)

# Print keys (save these securely)
echo "rook-ceph-op: $ROOK_OP_KEY"
echo "rbd-provisioner: $RBD_PROV_KEY"
echo "rbd-node: $RBD_NODE_KEY"
echo "cephfs-provisioner: $CEPHFS_PROV_KEY"
echo "cephfs-node: $CEPHFS_NODE_KEY"

# Get monitor addresses
ceph mon dump | grep -E '^[0-9]'

# Get cluster FSID
ceph fsid

# Verify users
sudo ceph auth ls | grep "^client.rook"

Add a new section for your RGW instance in your /etc/ceph/ceph.conf

Use the Import Script
# Download the script from the Rook repository matching your version
curl -sL https://raw.githubusercontent.com/rook/rook/v1.19.4/deploy/examples/create-external-cluster-resources.py -o create-external-cluster-resources.py

# Enable the module
sudo ceph mgr module enable prometheus

# Run the script to generate all secrets
python3 create-external-cluster-resources.py 
  --rbd-data-pool-name replicapool 
  --cephfs-filesystem-name myfs 
  --namespace rook-ceph-external 
  --format bash > rook-resources.sh

# View the monitor keyring (requires root)
sudo cat /var/lib/ceph/mon/ceph-$(hostname)/keyring

Setup Talos Cluster for Proxmox Ceph

Create Kubernetes Secrets for External Cluster Credentials

Create rook-ceph-external namespace and populate secrets:

# from Windows terminal:
wget -0 import-external-cluster.sh https://raw.githubusercontent.com/rook/rook/v1.19.5/deploy/examples/import-external-cluster.sh
# Copy to Ubuntu Core
scp .import-external-cluster.sh username@ubuntu_core_ip:/home/username

# Create namespace
kubectl create namespace rook-ceph-external

chmod +x import-external-cluster.sh
source rook-resources.sh
./import-external-cluster.sh
# external-cluster-secrets.yaml
apiVersion: v1
kind: Secret
metadata:
  name: rook-ceph-mon
  namespace: rook-ceph-external
type: kubernetes.io/rook
stringData:
  cluster-name: "rook-ceph-external"
  fsid: "a945d810-3d4c-4a29-b71e-3dd7ffb6c8d2"
  admin-secret: ""
  mon-secret: "AQCxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx=="
  # The Ceph user Rook will authenticate as
  ceph-username: "client.rook-ceph-op"
  ceph-secret: "AQCxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx=="
---
apiVersion: v1
kind: ConfigMap
metadata:
  name: rook-ceph-mon-endpoints
  namespace: rook-ceph-external
data:
  # Monitor endpoints in the format a=IP:PORT,b=IP:PORT,c=IP:PORT
  data: "a=192.168.1.10:6789,b=192.168.1.11:6789,c=192.168.1.12:6789"
  mapping: "{}"
  maxMonId: "0"
# csi-secrets.yaml
apiVersion: v1
kind: Secret
metadata:
  name: rook-csi-rbd-node
  namespace: rook-ceph-external
stringData:
  userID: rook-csi-rbd-node
  userKey: AQCxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx==
---
apiVersion: v1
kind: Secret
metadata:
  name: rook-csi-rbd-provisioner
  namespace: rook-ceph-external
stringData:
  userID: rook-csi-rbd-provisioner
  userKey: AQCxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx==
---
apiVersion: v1
kind: Secret
metadata:
  name: rook-csi-cephfs-node
  namespace: rook-ceph-external
stringData:
  adminID: rook-csi-cephfs-node
  adminKey: AQCxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx==
---
apiVersion: v1
kind: Secret
metadata:
  name: rook-csi-cephfs-provisioner
  namespace: rook-ceph-external
stringData:
  adminID: rook-csi-cephfs-provisioner
  adminKey: AQCxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx==

Apply settings

kubectl apply -f external-cluster-secrets.yaml
kubectl apply -f csi-secrets.yaml
Install Rook
# Add the Rook Repository
helm repo add rook-release https://charts.rook.io/release
helm repo update

# Deploy the Rook Operator
helm install --create-namespace --namespace rook-ceph rook-ceph rook-release/rook-ceph

# 1. Set the enforcement level on talos to privileged (Required to start the pods), fix "Warning: would violate PodSecurity..."
kubectl label namespace rook-ceph pod-security.kubernetes.io/enforce=privileged --overwrite
kubectl label namespace rook-ceph-external pod-security.kubernetes.io/enforce=privileged --overwrite

# 2. Set the audit and warn levels (Prevents logs from filling with warnings)
kubectl label namespace rook-ceph pod-security.kubernetes.io/audit=privileged --overwrite
kubectl label namespace rook-ceph pod-security.kubernetes.io/warn=privileged --overwrite
Deploy the External CephCluster CR
# external-cluster.yaml
apiVersion: ceph.rook.io/v1
kind: CephCluster
metadata:
  name: rook-ceph-external
  namespace: rook-ceph-external
spec:
  external:
    enable: true
  # Reference to the mon secret created above
  dataDirHostPath: /var/lib/rook
  # Disable the crashcollector for external clusters
  crashCollector:
    disable: true
  # Disable health checker for read-only external access
  healthCheck:
    daemonHealth:
      mon:
        disabled: false
        interval: 45s
kubectl apply -f external-cluster.yaml
Verify Authentication
# Check that the CephCluster is connected
kubectl get cephcluster -n rook-ceph-external

# Check operator logs for authentication errors
kubectl logs -n rook-ceph deploy/rook-ceph-operator | grep -i "external|auth|error" | tail -20

# Verify the cluster phase is "Connected"
kubectl describe cephcluster rook-ceph-external -n rook-ceph-external | grep -A5 "Status:"
Create StorageClass for RBD Block Storage
# storageclass-rbd-external.yaml
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
  name: rook-ceph-block-external
  annotations:
    storageclass.kubernetes.io/is-default-class: "false"
provisioner: rook-ceph.rbd.csi.ceph.com
parameters:
  # Must match the namespace/clusterID in the ceph-csi-config ConfigMap
  clusterID: rook-ceph-external
  # RBD pool on the external cluster
  pool: replicapool
  # RBD image format - always use 2
  imageFormat: "2"
  # RBD image features compatible with kernel RBD driver
  imageFeatures: layering,fast-diff,object-map,deep-flatten,exclusive-lock
  # CSI provisioner secret references
  csi.storage.k8s.io/provisioner-secret-name: rook-csi-rbd-provisioner
  csi.storage.k8s.io/provisioner-secret-namespace: rook-ceph-external
  csi.storage.k8s.io/controller-expand-secret-name: rook-csi-rbd-provisioner
  csi.storage.k8s.io/controller-expand-secret-namespace: rook-ceph-external
  csi.storage.k8s.io/node-stage-secret-name: rook-csi-rbd-node
  csi.storage.k8s.io/node-stage-secret-namespace: rook-ceph-external
reclaimPolicy: Delete
allowVolumeExpansion: true
volumeBindingMode: Immediate
kubectl apply -f storageclass-rbd-external.yaml

Check the pods

kubectl get pod -n rook-ceph # wait for 3-4 minutes

# Patch the Operator ConfigMap to reduce Replica to 1 (only pods pending due to not enough replica)
kubectl patch cm rook-ceph-operator-config -n rook-ceph -p '{"data": {"CSI_PROVISIONER_REPLICAS": "1"}}'

# Allow pods on Control Plane (to fix pending pods in rook-ceph namespace) - update Tolerations (if the above don't work)
helm upgrade rook-ceph rook-release/rook-ceph -n rook-ceph 
  --set csi.provisionerTolerations[0].key=node-role.kubernetes.io/control-plane 
  --set csi.provisionerTolerations[0].operator=Exists 
  --set csi.provisionerTolerations[0].effect=NoSchedule 
  --set csi.provisionerTolerations[1].key=node-role.kubernetes.io/master 
  --set csi.provisionerTolerations[1].operator=Exists 
  --set csi.provisionerTolerations[1].effect=NoSchedule

# Restart the Operator (if need to clear pending)
kubectl delete pod -l app=rook-ceph-operator -n rook-ceph
Create a Retain Policy StorageClass for Production
# storageclass-rbd-external-retain.yaml
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
  name: rook-ceph-block-external-retain
provisioner: rook-ceph.rbd.csi.ceph.com
parameters:
  clusterID: rook-ceph-external
  pool: replicapool
  imageFormat: "2"
  imageFeatures: layering,fast-diff,object-map,deep-flatten,exclusive-lock
  csi.storage.k8s.io/provisioner-secret-name: rook-csi-rbd-provisioner
  csi.storage.k8s.io/provisioner-secret-namespace: rook-ceph-external
  csi.storage.k8s.io/controller-expand-secret-name: rook-csi-rbd-provisioner
  csi.storage.k8s.io/controller-expand-secret-namespace: rook-ceph-external
  csi.storage.k8s.io/node-stage-secret-name: rook-csi-rbd-node
  csi.storage.k8s.io/node-stage-secret-namespace: rook-ceph-external
# Retain keeps the PV and RBD image after PVC deletion
reclaimPolicy: Retain
allowVolumeExpansion: true
volumeBindingMode: Immediate
kubectl apply -f storageclass-rbd-external-retain.yaml
Create StorageClass for CephFS Shared Storage

# storageclass-cephfs-external.yaml
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
  name: rook-cephfs-external
provisioner: rook-ceph.cephfs.csi.ceph.com
parameters:
  clusterID: rook-ceph-external
  # CephFS filesystem name on the external cluster
  fsName: myfs
  # CephFS pool where data will be stored
  pool: myfs-data0
  # Root path within the CephFS filesystem
  rootPath: /external-volumes
  csi.storage.k8s.io/provisioner-secret-name: rook-csi-cephfs-provisioner
  csi.storage.k8s.io/provisioner-secret-namespace: rook-ceph-external
  csi.storage.k8s.io/controller-expand-secret-name: rook-csi-cephfs-provisioner
  csi.storage.k8s.io/controller-expand-secret-namespace: rook-ceph-external
  csi.storage.k8s.io/node-stage-secret-name: rook-csi-cephfs-node
  csi.storage.k8s.io/node-stage-secret-namespace: rook-ceph-external
reclaimPolicy: Delete
allowVolumeExpansion: true
volumeBindingMode: Immediate
kubectl apply -f storageclass-cephfs-external.yaml
Create StorageClass for Erasure Coded Pool (optional)
# storageclass-ec-external.yaml
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
  name: rook-ceph-block-ec-external
provisioner: rook-ceph.rbd.csi.ceph.com
parameters:
  clusterID: rook-ceph-external
  # Metadata pool must be replicated (cannot use erasure coded pool)
  pool: replicapool
  # Data pool uses erasure coding for space efficiency
  dataPool: ec-data-pool
  imageFormat: "2"
  # Erasure coded pools require only layering feature
  imageFeatures: layering
  csi.storage.k8s.io/provisioner-secret-name: rook-csi-rbd-provisioner
  csi.storage.k8s.io/provisioner-secret-namespace: rook-ceph-external
  csi.storage.k8s.io/controller-expand-secret-name: rook-csi-rbd-provisioner
  csi.storage.k8s.io/controller-expand-secret-namespace: rook-ceph-external
  csi.storage.k8s.io/node-stage-secret-name: rook-csi-rbd-node
  csi.storage.k8s.io/node-stage-secret-namespace: rook-ceph-external
reclaimPolicy: Delete
allowVolumeExpansion: true
Create VolumeSnapshotClass for External Cluster
Install Snapshot CRDs
# Install snapshot CRDs (if not already present)
kubectl apply -f https://raw.githubusercontent.com/kubernetes-csi/external-snapshotter/v8.5.0/client/config/crd/snapshot.storage.k8s.io_volumesnapshotclasses.yaml
kubectl apply -f https://raw.githubusercontent.com/kubernetes-csi/external-snapshotter/v8.5.0/client/config/crd/snapshot.storage.k8s.io_volumesnapshotcontents.yaml
kubectl apply -f https://raw.githubusercontent.com/kubernetes-csi/external-snapshotter/v8.5.0/client/config/crd/snapshot.storage.k8s.io_volumesnapshots.yaml

# Install snapshot controller
kubectl apply -f https://raw.githubusercontent.com/kubernetes-csi/external-snapshotter/v8.5.0/deploy/kubernetes/snapshot-controller/rbac-snapshot-controller.yaml
kubectl apply -f https://raw.githubusercontent.com/kubernetes-csi/external-snapshotter/v8.5.0/deploy/kubernetes/snapshot-controller/setup-snapshot-controller.yaml
# volumesnapshotclass-external.yaml
apiVersion: snapshot.storage.k8s.io/v1
kind: VolumeSnapshotClass
metadata:
  name: csi-rbdplugin-snapclass-external
driver: rook-ceph.rbd.csi.ceph.com
parameters:
  clusterID: rook-ceph-external
  csi.storage.k8s.io/snapshotter-secret-name: rook-csi-rbd-provisioner
  csi.storage.k8s.io/snapshotter-secret-namespace: rook-ceph-external
deletionPolicy: Delete
kubectl apply -f volumesnapshotclass-external.yaml
kubectl get volumesnapshotclass

# Restart the Operator
kubectl delete pod -l app=rook-ceph-operator -n rook-ceph
Set a Default StorageClass
# Make the RBD StorageClass the default
kubectl patch storageclass rook-ceph-block-external 
  -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"true"}}}'

# Verify
kubectl get storageclass

# Final check
kubectl get cm ceph-csi-config -n rook-ceph -o yaml
Test create volume
# Test RBD block storage
kubectl apply -f - <<EOF
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: test-block-external
spec:
  accessModes:
    - ReadWriteOnce
  storageClassName: rook-ceph-block-external
  resources:
    requests:
      storage: 1Gi
EOF

# Test CephFS shared storage
kubectl apply -f - <<EOF
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: test-cephfs-external
spec:
  accessModes:
    - ReadWriteMany
  storageClassName: rook-cephfs-external
  resources:
    requests:
      storage: 1Gi
EOF

# Check PVC status
kubectl get pvc test-block-external test-cephfs-external
# Check the image name
kubectl get pv pvc-xxxxx -o yaml | grep volumeHandle

# Check on Proxmox
rbd ls -p kubernetes-storage
rbd info kubernetes-storage/csi-vol-xxxx

# Delete the test volumes
kubectl get pvc --all-namespaces | grep -E 'test-block-external|test-cephfs-external'
kubectl delete pvc test-block-external test-cephfs-external -n default

# If its "stuck", force cleanup
kubectl patch pv pvc-xxx -p '{"metadata":{"finalizers":null}}' --type=merge
kubectl patch pv pvc-xxx -p '{"metadata":{"finalizers":null}}' --type=merge

# Idenfity csi-vol for proxmox
kubectl get pv -o custom-columns=NAME:.metadata.name,IMAGE:.spec.csi.volumeAttributes.imageName

# Check and delete rbd on Proxmox
rbd ls -p <cephrbd_name>
rbd rm kubernetes-storage/csi-vol-xxxx

# Check ceph fs volumes
ceph fs subvolume ls <cephfs_name>
Change Talos Pod Security Admission to exempt rook-ceph (only if needed)
# Verify the Result: expect to see pods named csi-rbdplubin-xxxxx
kubectl get pod -n rook-ceph -w

# Check the current count
kubectl get ns rook-ceph -o yaml | grep labels -A 5

# Remove labels from rook-ceph
kubectl label namespace rook-ceph pod-security.kubernetes.io/enforce-
kubectl label namespace rook-ceph pod-security.kubernetes.io/audit-
kubectl label namespace rook-ceph pod-security.kubernetes.io/warn-

# Remove labels from rook-ceph-external
kubectl label namespace rook-ceph-external pod-security.kubernetes.io/enforce-
References

https://oneuptime.com/blog/post/2026-03-31-rook-external-ceph-auth/view#:~:text=Best%20PracticesSummary-,Introduction,authenticate%20with%20the%20external%20cluster.

https://oneuptime.com/blog/post/2026-03-31-rook-how-to-connect-rook-to-an-existing-external-ceph-cluster/view

https://oneuptime.com/blog/post/2026-03-31-rook-external-storage-classes/view#:~:text=Setting%20up%20StorageClasses%20for%20an,of%20external%20Ceph%20storage%20capabilities.

https://oneuptime.com/blog/post/2026-03-31-rook-volume-snapshot-class-rbd/view#:~:text=describe%20volumesnapshotcontent-,Summary,definitions%20enable%20restores%20or%20clones.

Leave a Comment

Your email address will not be published. Required fields are marked *

Scroll to Top