Merge pull request #80 from useblacksmith/gc-inode-tweaks
src: disable automatic buildkit GC
This commit is contained in:
commit
8a782a7c2b
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
@ -110,7 +110,7 @@ describe('startBlacksmithBuilder', () => {
|
|||
buildId: mockBuildId,
|
||||
exposeId: mockExposeId
|
||||
});
|
||||
expect(setupBuilder.startAndConfigureBuildkitd).toHaveBeenCalledWith(mockParallelism, mockDevice);
|
||||
expect(setupBuilder.startAndConfigureBuildkitd).toHaveBeenCalledWith(mockParallelism);
|
||||
expect(core.warning).not.toHaveBeenCalled();
|
||||
expect(reporter.reportBuildPushActionFailure).not.toHaveBeenCalled();
|
||||
});
|
||||
|
|
|
@ -81,7 +81,7 @@ export async function startBlacksmithBuilder(inputs: context.Inputs): Promise<{a
|
|||
const parallelism = await getNumCPUs();
|
||||
|
||||
const buildkitdStartTime = Date.now();
|
||||
const buildkitdAddr = await startAndConfigureBuildkitd(parallelism, stickyDiskSetup.device);
|
||||
const buildkitdAddr = await startAndConfigureBuildkitd(parallelism);
|
||||
const buildkitdDurationMs = Date.now() - buildkitdStartTime;
|
||||
await reporter.reportMetric(Metric_MetricType.BPA_BUILDKITD_READY_DURATION_MS, buildkitdDurationMs);
|
||||
|
||||
|
|
|
@ -47,7 +47,7 @@ export function createBlacksmithAgentClient() {
|
|||
return createClient(StickyDiskService, transport);
|
||||
}
|
||||
|
||||
export async function reportBuildPushActionFailure(error?: Error, event?: string) {
|
||||
export async function reportBuildPushActionFailure(error?: Error, event?: string, isWarning?: boolean) {
|
||||
const requestOptions = {
|
||||
stickydisk_key: process.env.GITHUB_REPO_NAME || '',
|
||||
repo_name: process.env.GITHUB_REPO_NAME || '',
|
||||
|
@ -55,7 +55,8 @@ export async function reportBuildPushActionFailure(error?: Error, event?: string
|
|||
arch: process.env.BLACKSMITH_ENV?.includes('arm') ? 'arm64' : 'amd64',
|
||||
vm_id: process.env.VM_ID || '',
|
||||
petname: process.env.PETNAME || '',
|
||||
message: event ? `${event}: ${error?.message || ''}` : error?.message || ''
|
||||
message: event ? `${event}: ${error?.message || ''}` : error?.message || '',
|
||||
warning: isWarning || false
|
||||
};
|
||||
|
||||
const client = createBlacksmithAPIClient();
|
||||
|
|
|
@ -1,10 +1,9 @@
|
|||
import * as fs from 'fs';
|
||||
import * as core from '@actions/core';
|
||||
import {exec, execSync, spawn} from 'child_process';
|
||||
import {exec, spawn} from 'child_process';
|
||||
import {promisify} from 'util';
|
||||
import * as TOML from '@iarna/toml';
|
||||
import * as reporter from './reporter';
|
||||
import FormData from 'form-data';
|
||||
|
||||
const mountPoint = '/var/lib/buildkit';
|
||||
const execAsync = promisify(exec);
|
||||
|
@ -51,8 +50,7 @@ export async function getNumCPUs(): Promise<number> {
|
|||
}
|
||||
}
|
||||
|
||||
async function writeBuildkitdTomlFile(parallelism: number, device: string): Promise<void> {
|
||||
const diskSize = await getDiskSize(device);
|
||||
async function writeBuildkitdTomlFile(parallelism: number): Promise<void> {
|
||||
const jsonConfig: TOML.JsonMap = {
|
||||
root: '/var/lib/buildkit',
|
||||
grpc: {
|
||||
|
@ -72,20 +70,11 @@ async function writeBuildkitdTomlFile(parallelism: number, device: string): Prom
|
|||
worker: {
|
||||
oci: {
|
||||
enabled: true,
|
||||
gc: true,
|
||||
gckeepstorage: diskSize.toString(),
|
||||
// Disable automatic garbage collection, since we will prune manually. Automatic GC
|
||||
// has been seen to negatively affect startup times of the daemon.
|
||||
gc: false,
|
||||
'max-parallelism': parallelism,
|
||||
snapshotter: 'overlayfs',
|
||||
gcpolicy: [
|
||||
{
|
||||
all: true,
|
||||
keepDuration: 1209600
|
||||
},
|
||||
{
|
||||
all: true,
|
||||
keepBytes: diskSize.toString()
|
||||
}
|
||||
]
|
||||
},
|
||||
containerd: {
|
||||
enabled: false
|
||||
|
@ -104,9 +93,9 @@ async function writeBuildkitdTomlFile(parallelism: number, device: string): Prom
|
|||
}
|
||||
}
|
||||
|
||||
async function startBuildkitd(parallelism: number, device: string): Promise<string> {
|
||||
async function startBuildkitd(parallelism: number): Promise<string> {
|
||||
try {
|
||||
await writeBuildkitdTomlFile(parallelism, device);
|
||||
await writeBuildkitdTomlFile(parallelism);
|
||||
await execAsync('sudo mkdir -p /run/buildkit');
|
||||
await execAsync('sudo chmod 755 /run/buildkit');
|
||||
const addr = 'unix:///run/buildkit/buildkitd.sock';
|
||||
|
@ -197,8 +186,8 @@ export async function getStickyDisk(options?: {signal?: AbortSignal}): Promise<{
|
|||
};
|
||||
}
|
||||
|
||||
export async function startAndConfigureBuildkitd(parallelism: number, device: string): Promise<string> {
|
||||
const buildkitdAddr = await startBuildkitd(parallelism, device);
|
||||
export async function startAndConfigureBuildkitd(parallelism: number): Promise<string> {
|
||||
const buildkitdAddr = await startBuildkitd(parallelism);
|
||||
core.debug(`buildkitd daemon started at addr ${buildkitdAddr}`);
|
||||
|
||||
// Change permissions on the buildkitd socket to allow non-root access
|
||||
|
@ -245,9 +234,34 @@ export async function startAndConfigureBuildkitd(parallelism: number, device: st
|
|||
core.warning(`Error checking buildkit workers: ${error.message}`);
|
||||
throw error;
|
||||
}
|
||||
|
||||
// Start cache pruning in the background without blocking.
|
||||
pruneBuildkitCache().catch(error => {
|
||||
core.warning(`Background cache pruning failed: ${error.message}`);
|
||||
});
|
||||
|
||||
return buildkitdAddr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Prunes buildkit cache data older than 14 days.
|
||||
* We don't specify any keep bytes here since we are
|
||||
* handling the ceph volume size limits ourselves in
|
||||
* the VM Agent.
|
||||
* @throws Error if buildctl prune command fails
|
||||
*/
|
||||
export async function pruneBuildkitCache(): Promise<void> {
|
||||
try {
|
||||
const fourteenDaysInHours = 14 * 24;
|
||||
await execAsync(`sudo buildctl prune --keep-duration ${fourteenDaysInHours}h --all`);
|
||||
core.debug('Successfully pruned buildkit cache');
|
||||
} catch (error) {
|
||||
core.warning(`Error pruning buildkit cache: ${error.message}`);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// setupStickyDisk mounts a sticky disk for the entity and returns the device information.
|
||||
// throws an error if it is unable to do so because of a timeout or an error
|
||||
export async function setupStickyDisk(dockerfilePath: string): Promise<{device: string; buildId?: string | null; exposeId: string}> {
|
||||
|
@ -272,6 +286,18 @@ export async function setupStickyDisk(dockerfilePath: string): Promise<{device:
|
|||
await execAsync(`sudo mount ${device} ${mountPoint}`);
|
||||
core.debug(`${device} has been mounted to ${mountPoint}`);
|
||||
core.info('Successfully obtained sticky disk');
|
||||
|
||||
// Check inode usage at mountpoint, and report if over 80%.
|
||||
try {
|
||||
const {stdout} = await execAsync(`df -i ${mountPoint} | tail -1 | awk '{print $5}' | sed 's/%//'`);
|
||||
const inodePercentage = parseInt(stdout.trim());
|
||||
if (!isNaN(inodePercentage) && inodePercentage > 80) { // Report if over 80%
|
||||
await reporter.reportBuildPushActionFailure(new Error(`High inode usage (${inodePercentage}%) detected at ${mountPoint}`), 'setupStickyDisk', true /* isWarning */);
|
||||
core.warning(`High inode usage (${inodePercentage}%) detected at ${mountPoint}`);
|
||||
}
|
||||
} catch (error) {
|
||||
core.debug(`Error checking inode usage: ${error.message}`);
|
||||
}
|
||||
return {device, buildId: buildResponse?.docker_build_id, exposeId: exposeId};
|
||||
} catch (error) {
|
||||
core.warning(`Error in setupStickyDisk: ${(error as Error).message}`);
|
||||
|
|
Loading…
Reference in New Issue