Merge pull request #80 from useblacksmith/gc-inode-tweaks
src: disable automatic buildkit GC
This commit is contained in:
commit
8a782a7c2b
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -110,7 +110,7 @@ describe('startBlacksmithBuilder', () => {
|
||||||
buildId: mockBuildId,
|
buildId: mockBuildId,
|
||||||
exposeId: mockExposeId
|
exposeId: mockExposeId
|
||||||
});
|
});
|
||||||
expect(setupBuilder.startAndConfigureBuildkitd).toHaveBeenCalledWith(mockParallelism, mockDevice);
|
expect(setupBuilder.startAndConfigureBuildkitd).toHaveBeenCalledWith(mockParallelism);
|
||||||
expect(core.warning).not.toHaveBeenCalled();
|
expect(core.warning).not.toHaveBeenCalled();
|
||||||
expect(reporter.reportBuildPushActionFailure).not.toHaveBeenCalled();
|
expect(reporter.reportBuildPushActionFailure).not.toHaveBeenCalled();
|
||||||
});
|
});
|
||||||
|
|
|
@ -81,7 +81,7 @@ export async function startBlacksmithBuilder(inputs: context.Inputs): Promise<{a
|
||||||
const parallelism = await getNumCPUs();
|
const parallelism = await getNumCPUs();
|
||||||
|
|
||||||
const buildkitdStartTime = Date.now();
|
const buildkitdStartTime = Date.now();
|
||||||
const buildkitdAddr = await startAndConfigureBuildkitd(parallelism, stickyDiskSetup.device);
|
const buildkitdAddr = await startAndConfigureBuildkitd(parallelism);
|
||||||
const buildkitdDurationMs = Date.now() - buildkitdStartTime;
|
const buildkitdDurationMs = Date.now() - buildkitdStartTime;
|
||||||
await reporter.reportMetric(Metric_MetricType.BPA_BUILDKITD_READY_DURATION_MS, buildkitdDurationMs);
|
await reporter.reportMetric(Metric_MetricType.BPA_BUILDKITD_READY_DURATION_MS, buildkitdDurationMs);
|
||||||
|
|
||||||
|
|
|
@ -47,7 +47,7 @@ export function createBlacksmithAgentClient() {
|
||||||
return createClient(StickyDiskService, transport);
|
return createClient(StickyDiskService, transport);
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function reportBuildPushActionFailure(error?: Error, event?: string) {
|
export async function reportBuildPushActionFailure(error?: Error, event?: string, isWarning?: boolean) {
|
||||||
const requestOptions = {
|
const requestOptions = {
|
||||||
stickydisk_key: process.env.GITHUB_REPO_NAME || '',
|
stickydisk_key: process.env.GITHUB_REPO_NAME || '',
|
||||||
repo_name: process.env.GITHUB_REPO_NAME || '',
|
repo_name: process.env.GITHUB_REPO_NAME || '',
|
||||||
|
@ -55,7 +55,8 @@ export async function reportBuildPushActionFailure(error?: Error, event?: string
|
||||||
arch: process.env.BLACKSMITH_ENV?.includes('arm') ? 'arm64' : 'amd64',
|
arch: process.env.BLACKSMITH_ENV?.includes('arm') ? 'arm64' : 'amd64',
|
||||||
vm_id: process.env.VM_ID || '',
|
vm_id: process.env.VM_ID || '',
|
||||||
petname: process.env.PETNAME || '',
|
petname: process.env.PETNAME || '',
|
||||||
message: event ? `${event}: ${error?.message || ''}` : error?.message || ''
|
message: event ? `${event}: ${error?.message || ''}` : error?.message || '',
|
||||||
|
warning: isWarning || false
|
||||||
};
|
};
|
||||||
|
|
||||||
const client = createBlacksmithAPIClient();
|
const client = createBlacksmithAPIClient();
|
||||||
|
|
|
@ -1,10 +1,9 @@
|
||||||
import * as fs from 'fs';
|
import * as fs from 'fs';
|
||||||
import * as core from '@actions/core';
|
import * as core from '@actions/core';
|
||||||
import {exec, execSync, spawn} from 'child_process';
|
import {exec, spawn} from 'child_process';
|
||||||
import {promisify} from 'util';
|
import {promisify} from 'util';
|
||||||
import * as TOML from '@iarna/toml';
|
import * as TOML from '@iarna/toml';
|
||||||
import * as reporter from './reporter';
|
import * as reporter from './reporter';
|
||||||
import FormData from 'form-data';
|
|
||||||
|
|
||||||
const mountPoint = '/var/lib/buildkit';
|
const mountPoint = '/var/lib/buildkit';
|
||||||
const execAsync = promisify(exec);
|
const execAsync = promisify(exec);
|
||||||
|
@ -51,8 +50,7 @@ export async function getNumCPUs(): Promise<number> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async function writeBuildkitdTomlFile(parallelism: number, device: string): Promise<void> {
|
async function writeBuildkitdTomlFile(parallelism: number): Promise<void> {
|
||||||
const diskSize = await getDiskSize(device);
|
|
||||||
const jsonConfig: TOML.JsonMap = {
|
const jsonConfig: TOML.JsonMap = {
|
||||||
root: '/var/lib/buildkit',
|
root: '/var/lib/buildkit',
|
||||||
grpc: {
|
grpc: {
|
||||||
|
@ -72,20 +70,11 @@ async function writeBuildkitdTomlFile(parallelism: number, device: string): Prom
|
||||||
worker: {
|
worker: {
|
||||||
oci: {
|
oci: {
|
||||||
enabled: true,
|
enabled: true,
|
||||||
gc: true,
|
// Disable automatic garbage collection, since we will prune manually. Automatic GC
|
||||||
gckeepstorage: diskSize.toString(),
|
// has been seen to negatively affect startup times of the daemon.
|
||||||
|
gc: false,
|
||||||
'max-parallelism': parallelism,
|
'max-parallelism': parallelism,
|
||||||
snapshotter: 'overlayfs',
|
snapshotter: 'overlayfs',
|
||||||
gcpolicy: [
|
|
||||||
{
|
|
||||||
all: true,
|
|
||||||
keepDuration: 1209600
|
|
||||||
},
|
|
||||||
{
|
|
||||||
all: true,
|
|
||||||
keepBytes: diskSize.toString()
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
containerd: {
|
containerd: {
|
||||||
enabled: false
|
enabled: false
|
||||||
|
@ -104,9 +93,9 @@ async function writeBuildkitdTomlFile(parallelism: number, device: string): Prom
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async function startBuildkitd(parallelism: number, device: string): Promise<string> {
|
async function startBuildkitd(parallelism: number): Promise<string> {
|
||||||
try {
|
try {
|
||||||
await writeBuildkitdTomlFile(parallelism, device);
|
await writeBuildkitdTomlFile(parallelism);
|
||||||
await execAsync('sudo mkdir -p /run/buildkit');
|
await execAsync('sudo mkdir -p /run/buildkit');
|
||||||
await execAsync('sudo chmod 755 /run/buildkit');
|
await execAsync('sudo chmod 755 /run/buildkit');
|
||||||
const addr = 'unix:///run/buildkit/buildkitd.sock';
|
const addr = 'unix:///run/buildkit/buildkitd.sock';
|
||||||
|
@ -197,8 +186,8 @@ export async function getStickyDisk(options?: {signal?: AbortSignal}): Promise<{
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function startAndConfigureBuildkitd(parallelism: number, device: string): Promise<string> {
|
export async function startAndConfigureBuildkitd(parallelism: number): Promise<string> {
|
||||||
const buildkitdAddr = await startBuildkitd(parallelism, device);
|
const buildkitdAddr = await startBuildkitd(parallelism);
|
||||||
core.debug(`buildkitd daemon started at addr ${buildkitdAddr}`);
|
core.debug(`buildkitd daemon started at addr ${buildkitdAddr}`);
|
||||||
|
|
||||||
// Change permissions on the buildkitd socket to allow non-root access
|
// Change permissions on the buildkitd socket to allow non-root access
|
||||||
|
@ -245,9 +234,34 @@ export async function startAndConfigureBuildkitd(parallelism: number, device: st
|
||||||
core.warning(`Error checking buildkit workers: ${error.message}`);
|
core.warning(`Error checking buildkit workers: ${error.message}`);
|
||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Start cache pruning in the background without blocking.
|
||||||
|
pruneBuildkitCache().catch(error => {
|
||||||
|
core.warning(`Background cache pruning failed: ${error.message}`);
|
||||||
|
});
|
||||||
|
|
||||||
return buildkitdAddr;
|
return buildkitdAddr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Prunes buildkit cache data older than 14 days.
|
||||||
|
* We don't specify any keep bytes here since we are
|
||||||
|
* handling the ceph volume size limits ourselves in
|
||||||
|
* the VM Agent.
|
||||||
|
* @throws Error if buildctl prune command fails
|
||||||
|
*/
|
||||||
|
export async function pruneBuildkitCache(): Promise<void> {
|
||||||
|
try {
|
||||||
|
const fourteenDaysInHours = 14 * 24;
|
||||||
|
await execAsync(`sudo buildctl prune --keep-duration ${fourteenDaysInHours}h --all`);
|
||||||
|
core.debug('Successfully pruned buildkit cache');
|
||||||
|
} catch (error) {
|
||||||
|
core.warning(`Error pruning buildkit cache: ${error.message}`);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// setupStickyDisk mounts a sticky disk for the entity and returns the device information.
|
// setupStickyDisk mounts a sticky disk for the entity and returns the device information.
|
||||||
// throws an error if it is unable to do so because of a timeout or an error
|
// throws an error if it is unable to do so because of a timeout or an error
|
||||||
export async function setupStickyDisk(dockerfilePath: string): Promise<{device: string; buildId?: string | null; exposeId: string}> {
|
export async function setupStickyDisk(dockerfilePath: string): Promise<{device: string; buildId?: string | null; exposeId: string}> {
|
||||||
|
@ -272,6 +286,18 @@ export async function setupStickyDisk(dockerfilePath: string): Promise<{device:
|
||||||
await execAsync(`sudo mount ${device} ${mountPoint}`);
|
await execAsync(`sudo mount ${device} ${mountPoint}`);
|
||||||
core.debug(`${device} has been mounted to ${mountPoint}`);
|
core.debug(`${device} has been mounted to ${mountPoint}`);
|
||||||
core.info('Successfully obtained sticky disk');
|
core.info('Successfully obtained sticky disk');
|
||||||
|
|
||||||
|
// Check inode usage at mountpoint, and report if over 80%.
|
||||||
|
try {
|
||||||
|
const {stdout} = await execAsync(`df -i ${mountPoint} | tail -1 | awk '{print $5}' | sed 's/%//'`);
|
||||||
|
const inodePercentage = parseInt(stdout.trim());
|
||||||
|
if (!isNaN(inodePercentage) && inodePercentage > 80) { // Report if over 80%
|
||||||
|
await reporter.reportBuildPushActionFailure(new Error(`High inode usage (${inodePercentage}%) detected at ${mountPoint}`), 'setupStickyDisk', true /* isWarning */);
|
||||||
|
core.warning(`High inode usage (${inodePercentage}%) detected at ${mountPoint}`);
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
core.debug(`Error checking inode usage: ${error.message}`);
|
||||||
|
}
|
||||||
return {device, buildId: buildResponse?.docker_build_id, exposeId: exposeId};
|
return {device, buildId: buildResponse?.docker_build_id, exposeId: exposeId};
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
core.warning(`Error in setupStickyDisk: ${(error as Error).message}`);
|
core.warning(`Error in setupStickyDisk: ${(error as Error).message}`);
|
||||||
|
|
Loading…
Reference in New Issue