Cloud runner develop - Stabilizes kubernetes provider (#531)

* fixes

* fixes

* fixes

* fixes

* fixes

* check for startup message in workflows

* check for startup message in workflows

* check for startup message in workflows

* check for startup message in workflows

* check for startup message in workflows

* check for startup message in workflows

* Update cloud-runner-ci-pipeline.yml

* Update cloud-runner-ci-pipeline.yml

* no storage class specified

* log file path

* log file path

* log file path

* log file path

* log file path

* log file path

* log file path

* log file path

* updates

* log file path

* latest develop

* log file path

* log file path

* Update package.json

* log file path

* log file path

* log file path

* log file path

* log file path

* log file path

* log file path

* log file path

* log file path

* log file path

* log file path

* log file path

* log file path

* log file path

* stream logs through standard input and new remote client cli command

* stream logs through standard input and new remote client cli command

* stream logs through standard input and new remote client cli command

* stream logs through standard input and new remote client cli command

* stream logs through standard input and new remote client cli command

* stream logs through standard input and new remote client cli command

* stream logs through standard input and new remote client cli command

* stream logs through standard input and new remote client cli command

* stream logs through standard input and new remote client cli command

* stream logs through standard input and new remote client cli command

* stream logs through standard input and new remote client cli command

* stream logs through standard input and new remote client cli command

* stream logs through standard input and new remote client cli command

* stream logs through standard input and new remote client cli command

* stream logs through standard input and new remote client cli command

* update pipeline to use k3s

* version: 'latest'

* fixes

* disable aws pipe for now

* disable aws pipe for now

* disable aws pipe for now

* disable aws pipe for now

* disable aws pipe for now

* disable aws pipe for now

* disable aws pipe for now

* disable aws pipe for now

* disable aws pipe for now

* disable aws pipe for now

* push k8s logs to LOG SERVICE IP

* push k8s logs to LOG SERVICE IP

* push k8s logs to LOG SERVICE IP

* push k8s logs to LOG SERVICE IP

* push k8s logs to LOG SERVICE IP

* push k8s logs to LOG SERVICE IP

* push k8s logs to LOG SERVICE IP

* push k8s logs to LOG SERVICE IP

* tests

* tests

* tests

* tests

* tests

* tests

* tests

* tests

* tests

* tests

* tests

* tests

* tests

* tests

* tests

* tests

* tests

* podname logs for log service

* podname logs for log service

* podname logs for log service

* podname logs for log service

* podname logs for log service

* podname logs for log service

* podname logs for log service

* podname logs for log service

* podname logs for log service

* hashed logs

* hashed logs

* hashed logs

* hashed logs

* hashed logs

* hashed logs

* no wait, just repeat logs

* no wait, just repeat logs

* remove typo - double await

* test fix - kubernetes - name typo in github yaml

* test fix - kubernetes - name typo in github yaml

* check missing log file

* check missing log file

* Push to steam test

* Push to steam test

* Fix path

* k8s reliable log hashing

* k8s reliable log hashing

* k8s reliable log hashing

* hashed logging k8s

* hashed logging k8s

* hashed logging k8s

* hashed logging k8s

* hashed logging k8s

* hashed logging k8s

* Include log chunk when task runner sees log update, clarify if we can pull logs from same line or next line

* Include log chunk when task runner sees log update, clarify if we can pull logs from same line or next line

* Include log chunk when task runner sees log update, clarify if we can pull logs from same line or next line

* Include log chunk when task runner sees log update, clarify if we can pull logs from same line or next line

* Include log chunk when task runner sees log update, clarify if we can pull logs from same line or next line

* Fix exit flow for k8s job

* hash comparison logging for log complete in k8s flow

* Interrupt k8s logs when logs found

* cleanup async parameter

* cleanup async parameter

* cleanup async parameter

* fixes

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix
This commit is contained in:
Frostebite
2024-02-06 23:46:31 +00:00
committed by GitHub
parent 2800d14403
commit e73b48fb38
47 changed files with 1456 additions and 856 deletions

View File

@@ -1,6 +1,9 @@
import CloudRunner from '../../../cloud-runner';
export class TaskDefinitionFormation {
public static readonly description: string = `Game CI Cloud Runner Task Stack`;
public static readonly formation: string = `AWSTemplateFormatVersion: 2010-09-09
public static get formation(): string {
return `AWSTemplateFormatVersion: 2010-09-09
Description: ${TaskDefinitionFormation.description}
Parameters:
EnvironmentName:
@@ -26,11 +29,11 @@ Parameters:
Default: 80
Description: What port number the application inside the docker container is binding to
ContainerCpu:
Default: 1024
Default: ${CloudRunner.buildParameters.containerCpu}
Type: Number
Description: How much CPU to give the container. 1024 is 1 CPU
ContainerMemory:
Default: 4096
Default: ${CloudRunner.buildParameters.containerMemory}
Type: Number
Description: How much memory in megabytes to give the container
BUILDGUID:
@@ -92,7 +95,7 @@ Resources:
EFSVolumeConfiguration:
FilesystemId:
'Fn::ImportValue': !Sub '${'${EnvironmentName}'}:EfsFileStorageId'
TransitEncryption: ENABLED
TransitEncryption: DISABLED
RequiresCompatibilities:
- FARGATE
ExecutionRoleArn:
@@ -135,6 +138,7 @@ Resources:
DependsOn:
- LogGroup
`;
}
public static streamLogs = `
SubscriptionFilter:
Type: 'AWS::Logs::SubscriptionFilter'

View File

@@ -57,8 +57,6 @@ class AWSBuildEnvironment implements ProviderInterface {
}
async cleanupWorkflow(
// eslint-disable-next-line no-unused-vars
buildGuid: string,
// eslint-disable-next-line no-unused-vars
buildParameters: BuildParameters,
// eslint-disable-next-line no-unused-vars

View File

@@ -41,7 +41,6 @@ class LocalDockerCloudRunner implements ProviderInterface {
return new Promise((result) => result(``));
}
async cleanupWorkflow(
buildGuid: string,
buildParameters: BuildParameters,
// eslint-disable-next-line no-unused-vars
branchName: string,

View File

@@ -14,12 +14,17 @@ import { CoreV1Api } from '@kubernetes/client-node';
import CloudRunner from '../../cloud-runner';
import { ProviderResource } from '../provider-resource';
import { ProviderWorkflow } from '../provider-workflow';
import { RemoteClientLogger } from '../../remote-client/remote-client-logger';
import { KubernetesRole } from './kubernetes-role';
import { CloudRunnerSystem } from '../../services/core/cloud-runner-system';
class Kubernetes implements ProviderInterface {
public static Instance: Kubernetes;
public kubeConfig!: k8s.KubeConfig;
public kubeClient!: k8s.CoreV1Api;
public kubeClientApps!: k8s.AppsV1Api;
public kubeClientBatch!: k8s.BatchV1Api;
public rbacAuthorizationV1Api!: k8s.RbacAuthorizationV1Api;
public buildGuid: string = '';
public buildParameters!: BuildParameters;
public pvcName: string = '';
@@ -30,6 +35,7 @@ class Kubernetes implements ProviderInterface {
public containerName: string = '';
public cleanupCronJobName: string = '';
public serviceAccountName: string = '';
public ip: string = '';
// eslint-disable-next-line no-unused-vars
constructor(buildParameters: BuildParameters) {
@@ -37,11 +43,30 @@ class Kubernetes implements ProviderInterface {
this.kubeConfig = new k8s.KubeConfig();
this.kubeConfig.loadFromDefault();
this.kubeClient = this.kubeConfig.makeApiClient(k8s.CoreV1Api);
this.kubeClientApps = this.kubeConfig.makeApiClient(k8s.AppsV1Api);
this.kubeClientBatch = this.kubeConfig.makeApiClient(k8s.BatchV1Api);
this.rbacAuthorizationV1Api = this.kubeConfig.makeApiClient(k8s.RbacAuthorizationV1Api);
this.namespace = 'default';
CloudRunnerLogger.log('Loaded default Kubernetes configuration for this environment');
}
async PushLogUpdate(logs: string) {
// push logs to nginx file server via 'LOG_SERVICE_IP' env var
const ip = process.env[`LOG_SERVICE_IP`];
if (ip === undefined) {
RemoteClientLogger.logWarning(`LOG_SERVICE_IP not set, skipping log push`);
return;
}
const url = `http://${ip}/api/log`;
RemoteClientLogger.log(`Pushing logs to ${url}`);
// logs to base64
logs = Buffer.from(logs).toString('base64');
const response = await CloudRunnerSystem.Run(`curl -X POST -d "${logs}" ${url}`, false, true);
RemoteClientLogger.log(`Pushed logs to ${url} ${response}`);
}
async listResources(): Promise<ProviderResource[]> {
const pods = await this.kubeClient.listNamespacedPod(this.namespace);
const serviceAccounts = await this.kubeClient.listNamespacedServiceAccount(this.namespace);
@@ -115,9 +140,10 @@ class Kubernetes implements ProviderInterface {
CloudRunnerLogger.log('Cloud Runner K8s workflow!');
// Setup
const id = BuildParameters.shouldUseRetainedWorkspaceMode(this.buildParameters)
? CloudRunner.lockedWorkspace
: this.buildParameters.buildGuid;
const id =
BuildParameters && BuildParameters.shouldUseRetainedWorkspaceMode(this.buildParameters)
? CloudRunner.lockedWorkspace
: this.buildParameters.buildGuid;
this.pvcName = `unity-builder-pvc-${id}`;
await KubernetesStorage.createPersistentVolumeClaim(
this.buildParameters,
@@ -137,10 +163,7 @@ class Kubernetes implements ProviderInterface {
CloudRunnerLogger.log('Watching pod until running');
await KubernetesTaskRunner.watchUntilPodRunning(this.kubeClient, this.podName, this.namespace);
CloudRunnerLogger.log('Pod running, streaming logs');
CloudRunnerLogger.log(
`Starting logs follow for pod: ${this.podName} container: ${this.containerName} namespace: ${this.namespace} pvc: ${this.pvcName} ${CloudRunner.buildParameters.kubeVolumeSize}/${CloudRunner.buildParameters.containerCpu}/${CloudRunner.buildParameters.containerMemory}`,
);
CloudRunnerLogger.log('Pod is running');
output += await KubernetesTaskRunner.runTask(
this.kubeConfig,
this.kubeClient,
@@ -232,8 +255,12 @@ class Kubernetes implements ProviderInterface {
this.jobName,
k8s,
this.containerName,
this.ip,
);
await new Promise((promise) => setTimeout(promise, 15000));
// await KubernetesRole.createRole(this.serviceAccountName, this.namespace, this.rbacAuthorizationV1Api);
const result = await this.kubeClientBatch.createNamespacedJob(this.namespace, jobSpec);
CloudRunnerLogger.log(`Build job created`);
await new Promise((promise) => setTimeout(promise, 5000));
@@ -257,6 +284,7 @@ class Kubernetes implements ProviderInterface {
try {
await this.kubeClientBatch.deleteNamespacedJob(this.jobName, this.namespace);
await this.kubeClient.deleteNamespacedPod(this.podName, this.namespace);
await KubernetesRole.deleteRole(this.serviceAccountName, this.namespace, this.rbacAuthorizationV1Api);
} catch (error: any) {
CloudRunnerLogger.log(`Failed to cleanup`);
if (error.response.body.reason !== `NotFound`) {
@@ -275,14 +303,13 @@ class Kubernetes implements ProviderInterface {
}
async cleanupWorkflow(
buildGuid: string,
buildParameters: BuildParameters,
// eslint-disable-next-line no-unused-vars
branchName: string,
// eslint-disable-next-line no-unused-vars
defaultSecretsArray: { ParameterKey: string; EnvironmentVariable: string; ParameterValue: string }[],
) {
if (BuildParameters.shouldUseRetainedWorkspaceMode(buildParameters)) {
if (BuildParameters && BuildParameters.shouldUseRetainedWorkspaceMode(buildParameters)) {
return;
}
CloudRunnerLogger.log(`deleting PVC`);

View File

@@ -20,6 +20,7 @@ class KubernetesJobSpecFactory {
jobName: string,
k8s: any,
containerName: string,
ip: string = '',
) {
const job = new k8s.V1Job();
job.apiVersion = 'batch/v1';
@@ -81,6 +82,7 @@ class KubernetesJobSpecFactory {
return environmentVariable;
}),
{ name: 'LOG_SERVICE_IP', value: ip },
],
volumeMounts: [
{
@@ -92,9 +94,8 @@ class KubernetesJobSpecFactory {
preStop: {
exec: {
command: [
'bin/bash',
'-c',
`cd /data/builder/action/steps;
`wait 60s;
cd /data/builder/action/steps;
chmod +x /return_license.sh;
/return_license.sh;`,
],
@@ -108,6 +109,16 @@ class KubernetesJobSpecFactory {
},
};
if (process.env['CLOUD_RUNNER_MINIKUBE']) {
job.spec.template.spec.volumes[0] = {
name: 'build-mount',
hostPath: {
path: `/data`,
type: `Directory`,
},
};
}
job.spec.template.spec.containers[0].resources.requests[`ephemeral-storage`] = '10Gi';
return job;

View File

@@ -0,0 +1,53 @@
import { RbacAuthorizationV1Api } from '@kubernetes/client-node';
class KubernetesRole {
static async createRole(serviceAccountName: string, namespace: string, rbac: RbacAuthorizationV1Api) {
// create admin kubernetes role and role binding
const roleBinding = {
apiVersion: 'rbac.authorization.k8s.io/v1',
kind: 'RoleBinding',
metadata: {
name: `${serviceAccountName}-admin`,
namespace,
},
subjects: [
{
kind: 'ServiceAccount',
name: serviceAccountName,
namespace,
},
],
roleRef: {
apiGroup: 'rbac.authorization.k8s.io',
kind: 'Role',
name: `${serviceAccountName}-admin`,
},
};
const role = {
apiVersion: 'rbac.authorization.k8s.io/v1',
kind: 'Role',
metadata: {
name: `${serviceAccountName}-admin`,
namespace,
},
rules: [
{
apiGroups: ['*'],
resources: ['*'],
verbs: ['*'],
},
],
};
const roleBindingResponse = await rbac.createNamespacedRoleBinding(namespace, roleBinding);
const roleResponse = await rbac.createNamespacedRole(namespace, role);
return { roleBindingResponse, roleResponse };
}
public static async deleteRole(serviceAccountName: string, namespace: string, rbac: RbacAuthorizationV1Api) {
await rbac.deleteNamespacedRoleBinding(`${serviceAccountName}-admin`, namespace);
await rbac.deleteNamespacedRole(`${serviceAccountName}-admin`, namespace);
}
}
export { KubernetesRole };

View File

@@ -9,7 +9,7 @@ class KubernetesServiceAccount {
serviceAccount.metadata = {
name: serviceAccountName,
};
serviceAccount.automountServiceAccountToken = false;
serviceAccount.automountServiceAccountToken = true;
return kubeClient.createNamespacedServiceAccount(namespace, serviceAccount);
}

View File

@@ -7,7 +7,6 @@ import KubernetesPods from './kubernetes-pods';
import { FollowLogStreamService } from '../../services/core/follow-log-stream-service';
class KubernetesTaskRunner {
static lastReceivedTimestamp: number = 0;
static readonly maxRetry: number = 3;
static lastReceivedMessage: string = ``;
@@ -22,38 +21,33 @@ class KubernetesTaskRunner {
let output = '';
let shouldReadLogs = true;
let shouldCleanup = true;
let sinceTime = ``;
let retriesAfterFinish = 0;
// eslint-disable-next-line no-constant-condition
while (true) {
await new Promise((resolve) => setTimeout(resolve, 3000));
const lastReceivedMessage =
KubernetesTaskRunner.lastReceivedTimestamp > 0
? `\nLast Log Message "${this.lastReceivedMessage}" ${this.lastReceivedTimestamp}`
: ``;
CloudRunnerLogger.log(
`Streaming logs from pod: ${podName} container: ${containerName} namespace: ${namespace} ${CloudRunner.buildParameters.kubeVolumeSize}/${CloudRunner.buildParameters.containerCpu}/${CloudRunner.buildParameters.containerMemory}\n${lastReceivedMessage}`,
`Streaming logs from pod: ${podName} container: ${containerName} namespace: ${namespace} ${CloudRunner.buildParameters.kubeVolumeSize}/${CloudRunner.buildParameters.containerCpu}/${CloudRunner.buildParameters.containerMemory}`,
);
if (KubernetesTaskRunner.lastReceivedTimestamp > 0) {
const currentDate = new Date(KubernetesTaskRunner.lastReceivedTimestamp);
const dateTimeIsoString = currentDate.toISOString();
sinceTime = ` --since-time="${dateTimeIsoString}"`;
}
let extraFlags = ``;
extraFlags += (await KubernetesPods.IsPodRunning(podName, namespace, kubeClient))
? ` -f -c ${containerName}`
: ` --previous`;
let lastMessageSeenIncludedInChunk = false;
let lastMessageSeen = false;
let logs;
const callback = (outputChunk: string) => {
output += outputChunk;
// split output chunk and handle per line
for (const chunk of outputChunk.split(`\n`)) {
({ shouldReadLogs, shouldCleanup, output } = FollowLogStreamService.handleIteration(
chunk,
shouldReadLogs,
shouldCleanup,
output,
));
}
};
try {
logs = await CloudRunnerSystem.Run(
`kubectl logs ${podName}${extraFlags} --timestamps${sinceTime}`,
false,
true,
);
await CloudRunnerSystem.Run(`kubectl logs ${podName}${extraFlags}`, false, true, callback);
} catch (error: any) {
await new Promise((resolve) => setTimeout(resolve, 3000));
const continueStreaming = await KubernetesPods.IsPodRunning(podName, namespace, kubeClient);
@@ -68,34 +62,6 @@ class KubernetesTaskRunner {
}
throw error;
}
const splitLogs = logs.split(`\n`);
for (const chunk of splitLogs) {
if (
chunk.replace(/\s/g, ``) === KubernetesTaskRunner.lastReceivedMessage.replace(/\s/g, ``) &&
KubernetesTaskRunner.lastReceivedMessage.replace(/\s/g, ``) !== ``
) {
CloudRunnerLogger.log(`Previous log message found ${chunk}`);
lastMessageSeenIncludedInChunk = true;
}
}
for (const chunk of splitLogs) {
const newDate = Date.parse(`${chunk.toString().split(`Z `)[0]}Z`);
if (chunk.replace(/\s/g, ``) === KubernetesTaskRunner.lastReceivedMessage.replace(/\s/g, ``)) {
lastMessageSeen = true;
}
if (lastMessageSeenIncludedInChunk && !lastMessageSeen) {
continue;
}
const message = CloudRunner.buildParameters.cloudRunnerDebug ? chunk : chunk.split(`Z `)[1];
KubernetesTaskRunner.lastReceivedMessage = chunk;
KubernetesTaskRunner.lastReceivedTimestamp = newDate;
({ shouldReadLogs, shouldCleanup, output } = FollowLogStreamService.handleIteration(
message,
shouldReadLogs,
shouldCleanup,
output,
));
}
if (FollowLogStreamService.DidReceiveEndOfTransmission) {
CloudRunnerLogger.log('end of log stream');
break;
@@ -106,14 +72,14 @@ class KubernetesTaskRunner {
}
static async watchUntilPodRunning(kubeClient: CoreV1Api, podName: string, namespace: string) {
let success: boolean = false;
let waitComplete: boolean = false;
let message = ``;
CloudRunnerLogger.log(`Watching ${podName} ${namespace}`);
await waitUntil(
async () => {
const status = await kubeClient.readNamespacedPodStatus(podName, namespace);
const phase = status?.body.status?.phase;
success = phase === 'Running';
waitComplete = phase !== 'Pending';
message = `Phase:${status.body.status?.phase} \n Reason:${
status.body.status?.conditions?.[0].reason || ''
} \n Message:${status.body.status?.conditions?.[0].message || ''}`;
@@ -133,7 +99,7 @@ class KubernetesTaskRunner {
// 4,
// ),
// );
if (success || phase !== 'Pending') return true;
if (waitComplete || phase !== 'Pending') return true;
return false;
},
@@ -142,11 +108,11 @@ class KubernetesTaskRunner {
intervalBetweenAttempts: 15000,
},
);
if (!success) {
if (!waitComplete) {
CloudRunnerLogger.log(message);
}
return success;
return waitComplete;
}
}

View File

@@ -32,8 +32,6 @@ class LocalCloudRunner implements ProviderInterface {
throw new Error('Method not implemented.');
}
cleanupWorkflow(
// eslint-disable-next-line no-unused-vars
buildGuid: string,
// eslint-disable-next-line no-unused-vars
buildParameters: BuildParameters,
// eslint-disable-next-line no-unused-vars

View File

@@ -6,8 +6,6 @@ import { ProviderWorkflow } from './provider-workflow';
export interface ProviderInterface {
cleanupWorkflow(
// eslint-disable-next-line no-unused-vars
buildGuid: string,
// eslint-disable-next-line no-unused-vars
buildParameters: BuildParameters,
// eslint-disable-next-line no-unused-vars

View File

@@ -25,8 +25,6 @@ class TestCloudRunner implements ProviderInterface {
throw new Error('Method not implemented.');
}
cleanupWorkflow(
// eslint-disable-next-line no-unused-vars
buildGuid: string,
// eslint-disable-next-line no-unused-vars
buildParameters: BuildParameters,
// eslint-disable-next-line no-unused-vars