Merge pull request #80 from useblacksmith/gc-inode-tweaks

src: disable automatic buildkit GC
This commit is contained in:
Aditya Maru 2024-12-23 09:21:40 -05:00 committed by GitHub
commit 8a782a7c2b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 59 additions and 32 deletions

14
dist/index.js generated vendored

File diff suppressed because one or more lines are too long

2
dist/index.js.map generated vendored

File diff suppressed because one or more lines are too long

View File

@ -110,7 +110,7 @@ describe('startBlacksmithBuilder', () => {
buildId: mockBuildId, buildId: mockBuildId,
exposeId: mockExposeId exposeId: mockExposeId
}); });
expect(setupBuilder.startAndConfigureBuildkitd).toHaveBeenCalledWith(mockParallelism, mockDevice); expect(setupBuilder.startAndConfigureBuildkitd).toHaveBeenCalledWith(mockParallelism);
expect(core.warning).not.toHaveBeenCalled(); expect(core.warning).not.toHaveBeenCalled();
expect(reporter.reportBuildPushActionFailure).not.toHaveBeenCalled(); expect(reporter.reportBuildPushActionFailure).not.toHaveBeenCalled();
}); });

View File

@ -81,7 +81,7 @@ export async function startBlacksmithBuilder(inputs: context.Inputs): Promise<{a
const parallelism = await getNumCPUs(); const parallelism = await getNumCPUs();
const buildkitdStartTime = Date.now(); const buildkitdStartTime = Date.now();
const buildkitdAddr = await startAndConfigureBuildkitd(parallelism, stickyDiskSetup.device); const buildkitdAddr = await startAndConfigureBuildkitd(parallelism);
const buildkitdDurationMs = Date.now() - buildkitdStartTime; const buildkitdDurationMs = Date.now() - buildkitdStartTime;
await reporter.reportMetric(Metric_MetricType.BPA_BUILDKITD_READY_DURATION_MS, buildkitdDurationMs); await reporter.reportMetric(Metric_MetricType.BPA_BUILDKITD_READY_DURATION_MS, buildkitdDurationMs);

View File

@ -47,7 +47,7 @@ export function createBlacksmithAgentClient() {
return createClient(StickyDiskService, transport); return createClient(StickyDiskService, transport);
} }
export async function reportBuildPushActionFailure(error?: Error, event?: string) { export async function reportBuildPushActionFailure(error?: Error, event?: string, isWarning?: boolean) {
const requestOptions = { const requestOptions = {
stickydisk_key: process.env.GITHUB_REPO_NAME || '', stickydisk_key: process.env.GITHUB_REPO_NAME || '',
repo_name: process.env.GITHUB_REPO_NAME || '', repo_name: process.env.GITHUB_REPO_NAME || '',
@ -55,7 +55,8 @@ export async function reportBuildPushActionFailure(error?: Error, event?: string
arch: process.env.BLACKSMITH_ENV?.includes('arm') ? 'arm64' : 'amd64', arch: process.env.BLACKSMITH_ENV?.includes('arm') ? 'arm64' : 'amd64',
vm_id: process.env.VM_ID || '', vm_id: process.env.VM_ID || '',
petname: process.env.PETNAME || '', petname: process.env.PETNAME || '',
message: event ? `${event}: ${error?.message || ''}` : error?.message || '' message: event ? `${event}: ${error?.message || ''}` : error?.message || '',
warning: isWarning || false
}; };
const client = createBlacksmithAPIClient(); const client = createBlacksmithAPIClient();

View File

@ -1,10 +1,9 @@
import * as fs from 'fs'; import * as fs from 'fs';
import * as core from '@actions/core'; import * as core from '@actions/core';
import {exec, execSync, spawn} from 'child_process'; import {exec, spawn} from 'child_process';
import {promisify} from 'util'; import {promisify} from 'util';
import * as TOML from '@iarna/toml'; import * as TOML from '@iarna/toml';
import * as reporter from './reporter'; import * as reporter from './reporter';
import FormData from 'form-data';
const mountPoint = '/var/lib/buildkit'; const mountPoint = '/var/lib/buildkit';
const execAsync = promisify(exec); const execAsync = promisify(exec);
@ -51,8 +50,7 @@ export async function getNumCPUs(): Promise<number> {
} }
} }
async function writeBuildkitdTomlFile(parallelism: number, device: string): Promise<void> { async function writeBuildkitdTomlFile(parallelism: number): Promise<void> {
const diskSize = await getDiskSize(device);
const jsonConfig: TOML.JsonMap = { const jsonConfig: TOML.JsonMap = {
root: '/var/lib/buildkit', root: '/var/lib/buildkit',
grpc: { grpc: {
@ -72,20 +70,11 @@ async function writeBuildkitdTomlFile(parallelism: number, device: string): Prom
worker: { worker: {
oci: { oci: {
enabled: true, enabled: true,
gc: true, // Disable automatic garbage collection, since we will prune manually. Automatic GC
gckeepstorage: diskSize.toString(), // has been seen to negatively affect startup times of the daemon.
gc: false,
'max-parallelism': parallelism, 'max-parallelism': parallelism,
snapshotter: 'overlayfs', snapshotter: 'overlayfs',
gcpolicy: [
{
all: true,
keepDuration: 1209600
},
{
all: true,
keepBytes: diskSize.toString()
}
]
}, },
containerd: { containerd: {
enabled: false enabled: false
@ -104,9 +93,9 @@ async function writeBuildkitdTomlFile(parallelism: number, device: string): Prom
} }
} }
async function startBuildkitd(parallelism: number, device: string): Promise<string> { async function startBuildkitd(parallelism: number): Promise<string> {
try { try {
await writeBuildkitdTomlFile(parallelism, device); await writeBuildkitdTomlFile(parallelism);
await execAsync('sudo mkdir -p /run/buildkit'); await execAsync('sudo mkdir -p /run/buildkit');
await execAsync('sudo chmod 755 /run/buildkit'); await execAsync('sudo chmod 755 /run/buildkit');
const addr = 'unix:///run/buildkit/buildkitd.sock'; const addr = 'unix:///run/buildkit/buildkitd.sock';
@ -197,8 +186,8 @@ export async function getStickyDisk(options?: {signal?: AbortSignal}): Promise<{
}; };
} }
export async function startAndConfigureBuildkitd(parallelism: number, device: string): Promise<string> { export async function startAndConfigureBuildkitd(parallelism: number): Promise<string> {
const buildkitdAddr = await startBuildkitd(parallelism, device); const buildkitdAddr = await startBuildkitd(parallelism);
core.debug(`buildkitd daemon started at addr ${buildkitdAddr}`); core.debug(`buildkitd daemon started at addr ${buildkitdAddr}`);
// Change permissions on the buildkitd socket to allow non-root access // Change permissions on the buildkitd socket to allow non-root access
@ -245,9 +234,34 @@ export async function startAndConfigureBuildkitd(parallelism: number, device: st
core.warning(`Error checking buildkit workers: ${error.message}`); core.warning(`Error checking buildkit workers: ${error.message}`);
throw error; throw error;
} }
// Start cache pruning in the background without blocking.
pruneBuildkitCache().catch(error => {
core.warning(`Background cache pruning failed: ${error.message}`);
});
return buildkitdAddr; return buildkitdAddr;
} }
/**
* Prunes buildkit cache data older than 14 days.
* We don't specify any keep bytes here since we are
* handling the ceph volume size limits ourselves in
* the VM Agent.
* @throws Error if buildctl prune command fails
*/
export async function pruneBuildkitCache(): Promise<void> {
try {
const fourteenDaysInHours = 14 * 24;
await execAsync(`sudo buildctl prune --keep-duration ${fourteenDaysInHours}h --all`);
core.debug('Successfully pruned buildkit cache');
} catch (error) {
core.warning(`Error pruning buildkit cache: ${error.message}`);
throw error;
}
}
// setupStickyDisk mounts a sticky disk for the entity and returns the device information. // setupStickyDisk mounts a sticky disk for the entity and returns the device information.
// throws an error if it is unable to do so because of a timeout or an error // throws an error if it is unable to do so because of a timeout or an error
export async function setupStickyDisk(dockerfilePath: string): Promise<{device: string; buildId?: string | null; exposeId: string}> { export async function setupStickyDisk(dockerfilePath: string): Promise<{device: string; buildId?: string | null; exposeId: string}> {
@ -272,6 +286,18 @@ export async function setupStickyDisk(dockerfilePath: string): Promise<{device:
await execAsync(`sudo mount ${device} ${mountPoint}`); await execAsync(`sudo mount ${device} ${mountPoint}`);
core.debug(`${device} has been mounted to ${mountPoint}`); core.debug(`${device} has been mounted to ${mountPoint}`);
core.info('Successfully obtained sticky disk'); core.info('Successfully obtained sticky disk');
// Check inode usage at mountpoint, and report if over 80%.
try {
const {stdout} = await execAsync(`df -i ${mountPoint} | tail -1 | awk '{print $5}' | sed 's/%//'`);
const inodePercentage = parseInt(stdout.trim());
if (!isNaN(inodePercentage) && inodePercentage > 80) { // Report if over 80%
await reporter.reportBuildPushActionFailure(new Error(`High inode usage (${inodePercentage}%) detected at ${mountPoint}`), 'setupStickyDisk', true /* isWarning */);
core.warning(`High inode usage (${inodePercentage}%) detected at ${mountPoint}`);
}
} catch (error) {
core.debug(`Error checking inode usage: ${error.message}`);
}
return {device, buildId: buildResponse?.docker_build_id, exposeId: exposeId}; return {device, buildId: buildResponse?.docker_build_id, exposeId: exposeId};
} catch (error) { } catch (error) {
core.warning(`Error in setupStickyDisk: ${(error as Error).message}`); core.warning(`Error in setupStickyDisk: ${(error as Error).message}`);