mirror of
https://github.com/game-ci/unity-builder.git
synced 2026-02-03 23:49:08 +08:00
fixes
This commit is contained in:
62
.github/workflows/cloud-runner-integrity.yml
vendored
62
.github/workflows/cloud-runner-integrity.yml
vendored
@@ -30,6 +30,8 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
K3D_NODE_CONTAINERS: 'k3d-unity-builder-agent-0'
|
||||
AWS_FORCE_PROVIDER: aws
|
||||
RESOURCE_TRACKING: 'true'
|
||||
steps:
|
||||
# ==========================================
|
||||
# SETUP SECTION
|
||||
@@ -223,66 +225,6 @@ jobs:
|
||||
kubectl run test-localstack --image=curlimages/curl --rm -i --restart=Never --timeout=10s -- \
|
||||
curl -v --max-time 5 http://host.k3d.internal:4566/_localstack/health 2>&1 | head -20 || \
|
||||
echo "Cluster connectivity test - if this fails, LocalStack may not be accessible from k3d"
|
||||
- name: Pre-pull Unity image into k3d cluster
|
||||
timeout-minutes: 15
|
||||
run: |
|
||||
# Pre-pull the Unity image into the k3d cluster before running tests
|
||||
# This ensures it's cached in the k3d node's containerd and won't need to be pulled during test execution
|
||||
UNITY_IMAGE="unityci/editor:ubuntu-2021.3.45f1-base-3"
|
||||
|
||||
# Check disk space before pulling
|
||||
echo "Checking disk space before pre-pulling Unity image..."
|
||||
K3D_NODE_CONTAINERS="${K3D_NODE_CONTAINERS:-k3d-unity-builder-agent-0 k3d-unity-builder-server-0}"
|
||||
for NODE in $K3D_NODE_CONTAINERS; do
|
||||
echo "Disk space in $NODE:"
|
||||
docker exec "$NODE" sh -c "df -h /var/lib/rancher/k3s 2>/dev/null || df -h / 2>/dev/null || true" || true
|
||||
done
|
||||
|
||||
# Clean up before pulling to ensure we have space
|
||||
echo "Cleaning up before pre-pulling image..."
|
||||
for NODE in $K3D_NODE_CONTAINERS; do
|
||||
docker exec "$NODE" sh -c "crictl rm --all 2>/dev/null || true" || true
|
||||
# Only remove non-Unity images to preserve space while keeping Unity image if already cached
|
||||
docker exec "$NODE" sh -c "for img in \$(crictl images -q 2>/dev/null); do repo=\$(crictl inspecti \$img --format '{{.repo}}' 2>/dev/null || echo ''); if echo \"\$repo\" | grep -qvE 'unityci/editor|unity'; then crictl rmi \$img 2>/dev/null || true; fi; done" || true
|
||||
done || true
|
||||
|
||||
# Explicitly pull the image on BOTH nodes to ensure it's cached wherever pods might be scheduled
|
||||
# This prevents "no space left" errors when pods are scheduled on nodes without the cached image
|
||||
echo "Pulling Unity image directly on each node to ensure it's cached..."
|
||||
for NODE in $K3D_NODE_CONTAINERS; do
|
||||
echo "Checking if image already exists on $NODE..."
|
||||
IMAGE_EXISTS=$(docker exec "$NODE" sh -c "crictl images | grep -q unityci/editor && echo 'yes' || echo 'no'" || echo "no")
|
||||
if [ "$IMAGE_EXISTS" = "yes" ]; then
|
||||
echo "Unity image already cached on $NODE, skipping pull"
|
||||
else
|
||||
echo "Pulling Unity image on $NODE (this may take several minutes for 3.9GB image)..."
|
||||
# Use crictl pull directly in the node's containerd
|
||||
# This ensures the image is cached in the node's local storage
|
||||
# Use timeout to prevent hanging indefinitely (10 minutes max)
|
||||
if timeout 600 docker exec "$NODE" sh -c "crictl pull $UNITY_IMAGE 2>&1"; then
|
||||
echo "Successfully pulled image on $NODE"
|
||||
# Verify it's cached
|
||||
docker exec "$NODE" sh -c "crictl images | grep unityci/editor || echo 'Warning: Image not found after pull'" || true
|
||||
else
|
||||
PULL_EXIT_CODE=$?
|
||||
if [ $PULL_EXIT_CODE -eq 124 ]; then
|
||||
echo "Warning: Image pull on $NODE timed out after 10 minutes. Checking if partially cached..."
|
||||
else
|
||||
echo "Warning: Image pull on $NODE failed (exit code: $PULL_EXIT_CODE). Checking if partially cached..."
|
||||
fi
|
||||
docker exec "$NODE" sh -c "crictl images | grep unityci/editor || echo 'Image not found on $NODE'" || true
|
||||
echo "Note: Pods scheduled on $NODE will attempt to pull the image during runtime, which may fail if disk space is insufficient."
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
# Verify image is cached
|
||||
echo "Checking if Unity image is cached..."
|
||||
for NODE in $K3D_NODE_CONTAINERS; do
|
||||
docker exec "$NODE" sh -c "crictl images | grep unityci/editor || echo 'Image not found in $NODE'" || true
|
||||
done
|
||||
|
||||
echo "Image pre-pull completed. Image should be cached in k3d node."
|
||||
- name: Clean up K8s test resources before tests
|
||||
run: |
|
||||
echo "Cleaning up K8s test resources..."
|
||||
|
||||
@@ -194,6 +194,10 @@ inputs:
|
||||
description:
|
||||
'[CloudRunner] Either local, k8s or aws can be used to run builds on a remote cluster. Additional parameters must
|
||||
be configured.'
|
||||
resourceTracking:
|
||||
default: 'false'
|
||||
required: false
|
||||
description: '[CloudRunner] Enable resource tracking logs for disk usage and allocation summaries.'
|
||||
containerCpu:
|
||||
default: ''
|
||||
required: false
|
||||
|
||||
@@ -19,6 +19,7 @@ import SharedWorkspaceLocking from './services/core/shared-workspace-locking';
|
||||
import { FollowLogStreamService } from './services/core/follow-log-stream-service';
|
||||
import CloudRunnerResult from './services/core/cloud-runner-result';
|
||||
import CloudRunnerOptions from './options/cloud-runner-options';
|
||||
import ResourceTracking from './services/core/resource-tracking';
|
||||
|
||||
class CloudRunner {
|
||||
public static Provider: ProviderInterface;
|
||||
@@ -37,6 +38,8 @@ class CloudRunner {
|
||||
CloudRunnerLogger.setup();
|
||||
CloudRunnerLogger.log(`Setting up cloud runner`);
|
||||
CloudRunner.buildParameters = buildParameters;
|
||||
ResourceTracking.logAllocationSummary('setup');
|
||||
await ResourceTracking.logDiskUsageSnapshot('setup');
|
||||
if (CloudRunner.buildParameters.githubCheckId === ``) {
|
||||
CloudRunner.buildParameters.githubCheckId = await GitHub.createGitHubCheck(CloudRunner.buildParameters.buildGuid);
|
||||
}
|
||||
|
||||
@@ -287,6 +287,10 @@ class CloudRunnerOptions {
|
||||
return CloudRunnerOptions.getInput('asyncCloudRunner') === 'true';
|
||||
}
|
||||
|
||||
public static get resourceTracking(): boolean {
|
||||
return CloudRunnerOptions.getInput('resourceTracking') === 'true';
|
||||
}
|
||||
|
||||
public static get useLargePackages(): boolean {
|
||||
return CloudRunnerOptions.getInput(`useLargePackages`) === `true`;
|
||||
}
|
||||
|
||||
@@ -15,6 +15,7 @@ import { ProviderWorkflow } from '../provider-workflow';
|
||||
import { TaskService } from './services/task-service';
|
||||
import CloudRunnerOptions from '../../options/cloud-runner-options';
|
||||
import { AwsClientFactory } from './aws-client-factory';
|
||||
import ResourceTracking from '../../services/core/resource-tracking';
|
||||
|
||||
class AWSBuildEnvironment implements ProviderInterface {
|
||||
private baseStackName: string;
|
||||
@@ -90,6 +91,8 @@ class AWSBuildEnvironment implements ProviderInterface {
|
||||
secrets: CloudRunnerSecret[],
|
||||
): Promise<string> {
|
||||
process.env.AWS_REGION = Input.region;
|
||||
ResourceTracking.logAllocationSummary('aws workflow');
|
||||
await ResourceTracking.logDiskUsageSnapshot('aws workflow (host)');
|
||||
AwsClientFactory.getECS();
|
||||
const CF = AwsClientFactory.getCloudFormation();
|
||||
AwsClientFactory.getKinesis();
|
||||
|
||||
@@ -17,6 +17,7 @@ import { ProviderWorkflow } from '../provider-workflow';
|
||||
import { RemoteClientLogger } from '../../remote-client/remote-client-logger';
|
||||
import { KubernetesRole } from './kubernetes-role';
|
||||
import { CloudRunnerSystem } from '../../services/core/cloud-runner-system';
|
||||
import ResourceTracking from '../../services/core/resource-tracking';
|
||||
|
||||
class Kubernetes implements ProviderInterface {
|
||||
public static Instance: Kubernetes;
|
||||
@@ -137,6 +138,9 @@ class Kubernetes implements ProviderInterface {
|
||||
): Promise<string> {
|
||||
try {
|
||||
CloudRunnerLogger.log('Cloud Runner K8s workflow!');
|
||||
ResourceTracking.logAllocationSummary('k8s workflow');
|
||||
await ResourceTracking.logDiskUsageSnapshot('k8s workflow (host)');
|
||||
await ResourceTracking.logK3dNodeDiskUsage('k8s workflow (before job)');
|
||||
|
||||
// Setup
|
||||
const id =
|
||||
|
||||
@@ -14,11 +14,13 @@ import GitHub from '../../github';
|
||||
import BuildParameters from '../../build-parameters';
|
||||
import { Cli } from '../../cli/cli';
|
||||
import CloudRunnerOptions from '../options/cloud-runner-options';
|
||||
import ResourceTracking from '../services/core/resource-tracking';
|
||||
|
||||
export class RemoteClient {
|
||||
@CliFunction(`remote-cli-pre-build`, `sets up a repository, usually before a game-ci build`)
|
||||
static async setupRemoteClient() {
|
||||
CloudRunnerLogger.log(`bootstrap game ci cloud runner...`);
|
||||
await ResourceTracking.logDiskUsageSnapshot('remote-cli-pre-build (start)');
|
||||
if (!(await RemoteClient.handleRetainedWorkspace())) {
|
||||
await RemoteClient.bootstrapRepository();
|
||||
}
|
||||
@@ -206,6 +208,7 @@ export class RemoteClient {
|
||||
// that read from the log file rather than stdout
|
||||
RemoteClientLogger.log(successMessage);
|
||||
CloudRunnerLogger.log(successMessage);
|
||||
await ResourceTracking.logDiskUsageSnapshot('remote-cli-post-build (end)');
|
||||
|
||||
return new Promise((result) => result(``));
|
||||
}
|
||||
|
||||
84
src/model/cloud-runner/services/core/resource-tracking.ts
Normal file
84
src/model/cloud-runner/services/core/resource-tracking.ts
Normal file
@@ -0,0 +1,84 @@
|
||||
import CloudRunnerLogger from './cloud-runner-logger';
|
||||
import CloudRunnerOptions from '../../options/cloud-runner-options';
|
||||
import CloudRunner from '../../cloud-runner';
|
||||
import { CloudRunnerSystem } from './cloud-runner-system';
|
||||
|
||||
class ResourceTracking {
|
||||
static isEnabled(): boolean {
|
||||
return (
|
||||
CloudRunnerOptions.resourceTracking ||
|
||||
CloudRunnerOptions.cloudRunnerDebug ||
|
||||
process.env['cloudRunnerTests'] === 'true'
|
||||
);
|
||||
}
|
||||
|
||||
static logAllocationSummary(context: string) {
|
||||
if (!ResourceTracking.isEnabled()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const buildParameters = CloudRunner.buildParameters;
|
||||
const allocations = {
|
||||
providerStrategy: buildParameters.providerStrategy,
|
||||
containerCpu: buildParameters.containerCpu,
|
||||
containerMemory: buildParameters.containerMemory,
|
||||
dockerCpuLimit: buildParameters.dockerCpuLimit,
|
||||
dockerMemoryLimit: buildParameters.dockerMemoryLimit,
|
||||
kubeVolumeSize: buildParameters.kubeVolumeSize,
|
||||
kubeStorageClass: buildParameters.kubeStorageClass,
|
||||
kubeVolume: buildParameters.kubeVolume,
|
||||
containerNamespace: buildParameters.containerNamespace,
|
||||
storageProvider: buildParameters.storageProvider,
|
||||
rcloneRemote: buildParameters.rcloneRemote,
|
||||
dockerWorkspacePath: buildParameters.dockerWorkspacePath,
|
||||
cacheKey: buildParameters.cacheKey,
|
||||
maxRetainedWorkspaces: buildParameters.maxRetainedWorkspaces,
|
||||
useCompressionStrategy: buildParameters.useCompressionStrategy,
|
||||
useLargePackages: buildParameters.useLargePackages,
|
||||
ephemeralStorageRequest: process.env['cloudRunnerTests'] === 'true' ? 'not set' : '2Gi',
|
||||
};
|
||||
|
||||
CloudRunnerLogger.log(`[ResourceTracking] Allocation summary (${context}):`);
|
||||
CloudRunnerLogger.log(JSON.stringify(allocations, undefined, 2));
|
||||
}
|
||||
|
||||
static async logDiskUsageSnapshot(context: string) {
|
||||
if (!ResourceTracking.isEnabled()) {
|
||||
return;
|
||||
}
|
||||
|
||||
CloudRunnerLogger.log(`[ResourceTracking] Disk usage snapshot (${context})`);
|
||||
await ResourceTracking.runAndLog('df -h', 'df -h');
|
||||
await ResourceTracking.runAndLog('du -sh .', 'du -sh .');
|
||||
await ResourceTracking.runAndLog('du -sh ./cloud-runner-cache', 'du -sh ./cloud-runner-cache');
|
||||
await ResourceTracking.runAndLog('du -sh ./temp', 'du -sh ./temp');
|
||||
await ResourceTracking.runAndLog('du -sh ./logs', 'du -sh ./logs');
|
||||
}
|
||||
|
||||
static async logK3dNodeDiskUsage(context: string) {
|
||||
if (!ResourceTracking.isEnabled()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const nodes = ['k3d-unity-builder-agent-0', 'k3d-unity-builder-server-0'];
|
||||
CloudRunnerLogger.log(`[ResourceTracking] K3d node disk usage (${context})`);
|
||||
for (const node of nodes) {
|
||||
await ResourceTracking.runAndLog(
|
||||
`k3d node ${node}`,
|
||||
`docker exec ${node} sh -c "df -h /var/lib/rancher/k3s 2>/dev/null || df -h / 2>/dev/null || true" || true`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
private static async runAndLog(label: string, command: string) {
|
||||
try {
|
||||
const output = await CloudRunnerSystem.Run(command, true, true);
|
||||
const trimmed = output.trim();
|
||||
CloudRunnerLogger.log(`[ResourceTracking] ${label}:\n${trimmed || 'no output'}`);
|
||||
} catch (error: any) {
|
||||
CloudRunnerLogger.log(`[ResourceTracking] ${label} failed: ${error?.message || error}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export default ResourceTracking;
|
||||
Reference in New Issue
Block a user