pr feedback - handle evictions and wait for disk pressure condition

This commit is contained in:
Frostebite
2025-12-29 18:01:33 +00:00
parent ed0d2c13b6
commit f4d28fa6d2
4 changed files with 64 additions and 1 deletions

View File

@@ -242,6 +242,27 @@ jobs:
echo "Taint removed. Checking nodes..."
kubectl describe nodes | grep -i taint || echo "No taints found"
fi
# Wait for disk pressure condition to clear (not just taint)
echo "Waiting for disk pressure condition to clear on nodes..."
for i in {1..20}; do
HAS_DISK_PRESSURE_CONDITION=$(kubectl get nodes -o json 2>/dev/null | grep -q '"type":"DiskPressure"' && echo "true" || echo "false")
if [ "$HAS_DISK_PRESSURE_CONDITION" = "true" ]; then
echo "Disk pressure condition still present, waiting... ($i/20)"
sleep 2
else
echo "Disk pressure condition cleared, proceeding with test"
break
fi
done
# Final check - if condition still exists, remove taint and wait a bit more
if kubectl get nodes -o json 2>/dev/null | grep -q '"type":"DiskPressure"'; then
echo "WARNING: Disk pressure condition still exists. Removing taint and waiting 10 seconds..."
NODE_NAMES=$(kubectl get nodes -o name 2>/dev/null | sed 's/node\///' || echo "")
for node in $NODE_NAMES; do
kubectl taint nodes "$node" node.kubernetes.io/disk-pressure- 2>/dev/null || true
done
sleep 10
fi
- name: Run cloud-runner-image test (validate image creation)
timeout-minutes: 10
run: yarn run test "cloud-runner-image" --detectOpenHandles --forceExit --runInBand
@@ -471,6 +492,27 @@ jobs:
echo "Taint removed. Checking nodes..."
kubectl describe nodes | grep -i taint || echo "No taints found"
fi
# Wait for disk pressure condition to clear (not just taint)
echo "Waiting for disk pressure condition to clear on nodes..."
for i in {1..20}; do
HAS_DISK_PRESSURE_CONDITION=$(kubectl get nodes -o json 2>/dev/null | grep -q '"type":"DiskPressure"' && echo "true" || echo "false")
if [ "$HAS_DISK_PRESSURE_CONDITION" = "true" ]; then
echo "Disk pressure condition still present, waiting... ($i/20)"
sleep 2
else
echo "Disk pressure condition cleared, proceeding with test"
break
fi
done
# Final check - if condition still exists, remove taint and wait a bit more
if kubectl get nodes -o json 2>/dev/null | grep -q '"type":"DiskPressure"'; then
echo "WARNING: Disk pressure condition still exists. Removing taint and waiting 10 seconds..."
NODE_NAMES=$(kubectl get nodes -o name 2>/dev/null | sed 's/node\///' || echo "")
for node in $NODE_NAMES; do
kubectl taint nodes "$node" node.kubernetes.io/disk-pressure- 2>/dev/null || true
done
sleep 10
fi
- name: Run cloud-runner-s3-steps test (validate S3 operations with K8s)
timeout-minutes: 30
run: yarn run test "cloud-runner-s3-steps" --detectOpenHandles --forceExit --runInBand