Initial: use Isolate for isolation

This commit is contained in:
Omar Brikaa 2024-08-24 20:14:48 +03:00
parent 59338eee33
commit c8b69fe5ea
6 changed files with 264 additions and 337 deletions

View File

@ -1,3 +1,13 @@
FROM buildpack-deps@sha256:d56cd472000631b8faca51f40d4e3f1b20deffa588f9f207fa6c60efb62ba7c4 AS isolate
RUN apt-get update && \
apt-get install -y --no-install-recommends git libcap-dev && \
rm -rf /var/lib/apt/lists/* && \
git clone https://github.com/envicutor/isolate.git /tmp/isolate/ && \
cd /tmp/isolate && \
git checkout af6db68042c3aa0ded80787fbb78bc0846ea2114 && \
make -j$(nproc) install && \
rm -rf /tmp/*
FROM node:15.10.0-buster-slim FROM node:15.10.0-buster-slim
ENV DEBIAN_FRONTEND=noninteractive ENV DEBIAN_FRONTEND=noninteractive
@ -15,6 +25,8 @@ RUN apt-get update && \
libfftw3-dev libglpk-dev libqhull-dev libqrupdate-dev libsuitesparse-dev \ libfftw3-dev libglpk-dev libqhull-dev libqrupdate-dev libsuitesparse-dev \
libsundials-dev libpcre2-dev && \ libsundials-dev libpcre2-dev && \
rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists/*
COPY --from=isolate /usr/local/bin/isolate /usr/local/bin
COPY --from=isolate /usr/local/etc/isolate /usr/local/etc/isolate
RUN sed -i '/en_US.UTF-8/s/^# //g' /etc/locale.gen && locale-gen RUN sed -i '/en_US.UTF-8/s/^# //g' /etc/locale.gen && locale-gen
@ -23,7 +35,5 @@ COPY ["package.json", "package-lock.json", "./"]
RUN npm install RUN npm install
COPY ./src ./src COPY ./src ./src
RUN make -C ./src/nosocket/ all && make -C ./src/nosocket/ install CMD ["/piston_api/src/docker-entrypoint.sh"]
CMD [ "node", "src"]
EXPOSE 2000/tcp EXPOSE 2000/tcp

View File

@ -0,0 +1,12 @@
#!/bin/bash
cd /sys/fs/cgroup && \
mkdir isolate/ && \
echo 1 > isolate/cgroup.procs && \
echo '+cpuset +cpu +io +memory +pids' > cgroup.subtree_control && \
cd isolate && \
mkdir init && \
echo 1 > init/cgroup.procs && \
echo '+cpuset +memory' > cgroup.subtree_control && \
echo "Initialized cgroup" && \
exec su -- piston -c 'ulimit -n 65536 && node'

View File

@ -11,7 +11,6 @@ const platform = `${is_docker() ? 'docker' : 'baremetal'}-${fs
module.exports = { module.exports = {
data_directories: { data_directories: {
packages: 'packages', packages: 'packages',
jobs: 'jobs',
}, },
version: require('../package.json').version, version: require('../package.json').version,
platform, platform,

View File

@ -1,13 +1,9 @@
const logplease = require('logplease'); const logplease = require('logplease');
const logger = logplease.create('job');
const { v4: uuidv4 } = require('uuid'); const { v4: uuidv4 } = require('uuid');
const cp = require('child_process'); const cp = require('child_process');
const path = require('path'); const path = require('path');
const config = require('./config'); const config = require('./config');
const globals = require('./globals');
const fs = require('fs/promises'); const fs = require('fs/promises');
const fss = require('fs');
const wait_pid = require('waitpid');
const job_states = { const job_states = {
READY: Symbol('Ready to be primed'), READY: Symbol('Ready to be primed'),
@ -15,15 +11,19 @@ const job_states = {
EXECUTED: Symbol('Executed and ready for cleanup'), EXECUTED: Symbol('Executed and ready for cleanup'),
}; };
let uid = 0; const MAX_BOX_ID = 999;
let gid = 0; const ISOLATE_PATH = '/usr/local/bin/isolate';
let box_id = 0;
let remaining_job_spaces = config.max_concurrent_jobs; let remaining_job_spaces = config.max_concurrent_jobs;
let job_queue = []; let job_queue = [];
const get_next_box_id = () => (box_id + 1) % MAX_BOX_ID;
class Job { class Job {
#active_timeouts; #box_id;
#active_parent_processes; #metadata_file_path;
#box_dir;
constructor({ runtime, files, args, stdin, timeouts, memory_limits }) { constructor({ runtime, files, args, stdin, timeouts, memory_limits }) {
this.uuid = uuidv4(); this.uuid = uuidv4();
@ -46,29 +46,10 @@ class Job {
this.stdin += '\n'; this.stdin += '\n';
} }
this.#active_timeouts = [];
this.#active_parent_processes = [];
this.timeouts = timeouts; this.timeouts = timeouts;
this.memory_limits = memory_limits; this.memory_limits = memory_limits;
this.uid = config.runner_uid_min + uid;
this.gid = config.runner_gid_min + gid;
uid++;
gid++;
uid %= config.runner_uid_max - config.runner_uid_min + 1;
gid %= config.runner_gid_max - config.runner_gid_min + 1;
this.logger.debug(`Assigned uid=${this.uid} gid=${this.gid}`);
this.state = job_states.READY; this.state = job_states.READY;
this.dir = path.join(
config.data_directory,
globals.data_directories.jobs,
this.uuid
);
} }
async prime() { async prime() {
@ -80,31 +61,46 @@ class Job {
} }
this.logger.info(`Priming job`); this.logger.info(`Priming job`);
remaining_job_spaces--; remaining_job_spaces--;
this.logger.debug('Writing files to job cache'); this.logger.debug('Running isolate --init');
this.#box_id = get_next_box_id();
this.#metadata_file_path = `/tmp/${this.#box_id}-metadata.txt`;
await new Promise((res, rej) => {
cp.exec(
`isolate --init --cg -b${this.#box_id}`,
(error, stdout, stderr) => {
if (error) {
rej(
`Failed to run isolate --init: ${error.message}\nstdout: ${stdout}\nstderr: ${stderr}`
);
}
if (stdout === '') {
rej('Received empty stdout from isolate --init');
}
this.#box_dir = stdout;
res();
}
);
});
this.logger.debug(`Transfering ownership`); this.logger.debug(`Creating submission files in Isolate box`);
await fs.mkdir(this.dir, { mode: 0o700 });
await fs.chown(this.dir, this.uid, this.gid);
await fs.mkdir(path.join(this.#box_dir, 'submission'));
for (const file of this.files) { for (const file of this.files) {
const file_path = path.join(this.dir, file.name); const file_path = path.join(box_dir, file.name);
const rel = path.relative(this.dir, file_path); const rel = path.relative(box_dir, file_path);
const file_content = Buffer.from(file.content, file.encoding);
if (rel.startsWith('..')) if (rel.startsWith('..'))
throw Error( throw Error(
`File path "${file.name}" tries to escape parent directory: ${rel}` `File path "${file.name}" tries to escape parent directory: ${rel}`
); );
const file_content = Buffer.from(file.content, file.encoding);
await fs.mkdir(path.dirname(file_path), { await fs.mkdir(path.dirname(file_path), {
recursive: true, recursive: true,
mode: 0o700, mode: 0o700,
}); });
await fs.chown(path.dirname(file_path), this.uid, this.gid);
await fs.write_file(file_path, file_content); await fs.write_file(file_path, file_content);
await fs.chown(file_path, this.uid, this.gid);
} }
this.state = job_states.PRIMED; this.state = job_states.PRIMED;
@ -112,167 +108,204 @@ class Job {
this.logger.debug('Primed job'); this.logger.debug('Primed job');
} }
exit_cleanup() {
for (const timeout of this.#active_timeouts) {
clear_timeout(timeout);
}
this.#active_timeouts = [];
this.logger.debug('Cleared the active timeouts');
this.cleanup_processes();
this.logger.debug(`Finished exit cleanup`);
}
close_cleanup() {
for (const proc of this.#active_parent_processes) {
proc.stderr.destroy();
if (!proc.stdin.destroyed) {
proc.stdin.end();
proc.stdin.destroy();
}
proc.stdout.destroy();
}
this.#active_parent_processes = [];
this.logger.debug('Destroyed processes writables');
}
async safe_call(file, args, timeout, memory_limit, event_bus = null) { async safe_call(file, args, timeout, memory_limit, event_bus = null) {
return new Promise((resolve, reject) => { var stdout = '';
const nonetwork = config.disable_networking ? ['nosocket'] : []; var stderr = '';
var output = '';
const prlimit = [ const proc = cp.spawn(
'prlimit', ISOLATE_PATH,
'--nproc=' + this.runtime.max_process_count, [
'--nofile=' + this.runtime.max_open_files, '--run',
'--fsize=' + this.runtime.max_file_size, `-b${this.#box_id}`,
]; `--meta=${this.#metadata_file_path}`,
'--cg',
const timeout_call = [
'timeout',
'-s', '-s',
'9', '-c',
Math.ceil(timeout / 1000), '/box/submission',
]; '-e',
`--dir=/runtime=${this.runtime.pkgdir}`,
if (memory_limit >= 0) { `--processes=${this.runtime.max_process_count}`,
prlimit.push('--as=' + memory_limit); `--open-files=${this.runtime.max_open_files}`,
} `--fsize=${this.runtime.max_file_size}`,
`--time=${timeout}`,
const proc_call = [ `--extra-time=0`,
'nice', ...(memory_limit >= 0 ? [`--cg-mem=${memory_limit}`] : []),
...timeout_call, ...(config.disable_networking ? [] : '--share-net'),
...prlimit, '--',
...nonetwork,
'bash',
file, file,
...args, ...args,
]; ],
{
var stdout = '';
var stderr = '';
var output = '';
const proc = cp.spawn(proc_call[0], proc_call.splice(1), {
env: { env: {
...this.runtime.env_vars, ...this.runtime.env_vars,
PISTON_LANGUAGE: this.runtime.language, PISTON_LANGUAGE: this.runtime.language,
}, },
stdio: 'pipe', stdio: 'pipe',
cwd: this.dir,
uid: this.uid,
gid: this.gid,
detached: true, //give this process its own process group
});
this.#active_parent_processes.push(proc);
if (event_bus === null) {
proc.stdin.write(this.stdin);
proc.stdin.end();
proc.stdin.destroy();
} else {
event_bus.on('stdin', data => {
proc.stdin.write(data);
});
event_bus.on('kill', signal => {
proc.kill(signal);
});
} }
);
const kill_timeout = if (event_bus === null) {
(timeout >= 0 && proc.stdin.write(this.stdin);
set_timeout(async _ => { proc.stdin.end();
this.logger.info(`Timeout exceeded timeout=${timeout}`); proc.stdin.destroy();
try { } else {
process.kill(proc.pid, 'SIGKILL'); event_bus.on('stdin', data => {
} proc.stdin.write(data);
catch (e) { });
// Could already be dead and just needs to be waited on
this.logger.debug(
`Got error while SIGKILLing process ${proc}:`,
e
);
}
}, timeout)) ||
null;
this.#active_timeouts.push(kill_timeout);
proc.stderr.on('data', async data => { event_bus.on('kill', signal => {
if (event_bus !== null) { proc.kill(signal);
event_bus.emit('stderr', data); });
} else if ((stderr.length + data.length) > this.runtime.output_max_size) { }
this.logger.info(`stderr length exceeded`);
try { proc.stderr.on('data', async data => {
process.kill(proc.pid, 'SIGKILL'); if (event_bus !== null) {
} event_bus.emit('stderr', data);
catch (e) { } else if (
// Could already be dead and just needs to be waited on stderr.length + data.length >
this.logger.debug( this.runtime.output_max_size
`Got error while SIGKILLing process ${proc}:`, ) {
e this.logger.info(`stderr length exceeded`);
); try {
} process.kill(proc.pid, 'SIGABRT');
} else { } catch (e) {
stderr += data; // Could already be dead and just needs to be waited on
output += data; this.logger.debug(
`Got error while SIGABRTing process ${proc}:`,
e
);
} }
}); } else {
stderr += data;
output += data;
}
});
proc.stdout.on('data', async data => { proc.stdout.on('data', async data => {
if (event_bus !== null) { if (event_bus !== null) {
event_bus.emit('stdout', data); event_bus.emit('stdout', data);
} else if ((stdout.length + data.length) > this.runtime.output_max_size) { } else if (
this.logger.info(`stdout length exceeded`); stdout.length + data.length >
try { this.runtime.output_max_size
process.kill(proc.pid, 'SIGKILL'); ) {
} this.logger.info(`stdout length exceeded`);
catch (e) { try {
// Could already be dead and just needs to be waited on process.kill(proc.pid, 'SIGABRT');
this.logger.debug( } catch (e) {
`Got error while SIGKILLing process ${proc}:`, // Could already be dead and just needs to be waited on
e this.logger.debug(
); `Got error while SIGABRTing process ${proc}:`,
} e
} else { );
stdout += data;
output += data;
} }
} else {
stdout += data;
output += data;
}
});
let memory = null;
let code = null;
let signal = null;
let message = null;
let status = null;
let time = null;
try {
const metadata_str = await fs.readFile(
self.metadata_file_path,
'utf-8'
);
const metadata_lines = metadata_str.split('\n');
for (const line of metadata_lines) {
if (!line) continue;
const [key, value] = line.split(':');
if (key === undefined || value === undefined) {
throw new Error(
`Failed to parse metadata file, received: ${line}`
);
}
switch (key) {
case 'cg-mem':
memory =
parseInt(value) ||
(() => {
throw new Error(
`Failed to parse memory usage, received value: ${value}`
);
})();
break;
case 'exitcode':
code =
parseInt(value) ||
(() => {
throw new Error(
`Failed to parse exit code, received value: ${value}`
);
})();
break;
case 'exitsig':
signal =
parseInt(value) ||
(() => {
throw new Error(
`Failed to parse exit signal, received value: ${value}`
);
})();
break;
case 'message':
message = value;
break;
case 'status':
status = value;
break;
case 'time':
time =
parseFloat(value) ||
(() => {
throw new Error(
`Failed to parse cpu time, received value: ${value}`
);
})();
break;
default:
break;
}
}
} catch (e) {
throw new Error(
`Error reading metadata file: ${self.metadata_file_path}\nError: ${e.message}\nIsolate run stdout: ${stdout}\nIsolate run stderr: ${stderr}`
);
}
proc.on('close', () => {
resolve({
stdout,
stderr,
code,
signal,
output,
memory,
message,
status,
time,
}); });
});
proc.on('exit', () => this.exit_cleanup()); proc.on('error', err => {
reject({
proc.on('close', (code, signal) => { error: err,
this.close_cleanup(); stdout,
stderr,
resolve({ stdout, stderr, code, signal, output }); code,
}); signal,
output,
proc.on('error', err => { memory,
this.exit_cleanup(); message,
this.close_cleanup(); status,
time,
reject({ error: err, stdout, stderr, output });
}); });
}); });
} }
@ -281,7 +314,7 @@ class Job {
if (this.state !== job_states.PRIMED) { if (this.state !== job_states.PRIMED) {
throw new Error( throw new Error(
'Job must be in primed state, current state: ' + 'Job must be in primed state, current state: ' +
this.state.toString() this.state.toString()
); );
} }
@ -298,49 +331,49 @@ class Job {
const { emit_event_bus_result, emit_event_bus_stage } = const { emit_event_bus_result, emit_event_bus_stage } =
event_bus === null event_bus === null
? { ? {
emit_event_bus_result: () => { }, emit_event_bus_result: () => {},
emit_event_bus_stage: () => { }, emit_event_bus_stage: () => {},
} }
: { : {
emit_event_bus_result: (stage, result, event_bus) => { emit_event_bus_result: (stage, result) => {
const { error, code, signal } = result; const { error, code, signal } = result;
event_bus.emit('exit', stage, { event_bus.emit('exit', stage, {
error, error,
code, code,
signal, signal,
}); });
}, },
emit_event_bus_stage: (stage, event_bus) => { emit_event_bus_stage: stage => {
event_bus.emit('stage', stage); event_bus.emit('stage', stage);
}, },
}; };
if (this.runtime.compiled) { if (this.runtime.compiled) {
this.logger.debug('Compiling'); this.logger.debug('Compiling');
emit_event_bus_stage('compile', event_bus); emit_event_bus_stage('compile');
compile = await this.safe_call( compile = await this.safe_call(
path.join(this.runtime.pkgdir, 'compile'), '/runtime/compile',
code_files.map(x => x.name), code_files.map(x => x.name),
this.timeouts.compile, this.timeouts.compile,
this.memory_limits.compile, this.memory_limits.compile,
event_bus event_bus
); );
emit_event_bus_result('compile', compile, event_bus); emit_event_bus_result('compile', compile);
compile_errored = compile.code !== 0; compile_errored = compile.code !== 0;
} }
let run; let run;
if (!compile_errored) { if (!compile_errored) {
this.logger.debug('Running'); this.logger.debug('Running');
emit_event_bus_stage('run', event_bus); emit_event_bus_stage('run');
run = await this.safe_call( run = await this.safe_call(
path.join(this.runtime.pkgdir, 'run'), '/runtime/run',
[code_files[0].name, ...this.args], [code_files[0].name, ...this.args],
this.timeouts.run, this.timeouts.run,
this.memory_limits.run, this.memory_limits.run,
event_bus event_bus
); );
emit_event_bus_result('run', run, event_bus); emit_event_bus_result('run', run);
} }
this.state = job_states.EXECUTED; this.state = job_states.EXECUTED;
@ -353,135 +386,10 @@ class Job {
}; };
} }
cleanup_processes(dont_wait = []) {
let processes = [1];
const to_wait = [];
this.logger.debug(`Cleaning up processes`);
while (processes.length > 0) {
processes = [];
const proc_ids = fss.readdir_sync('/proc');
processes = proc_ids.map(proc_id => {
if (isNaN(proc_id)) return -1;
try {
const proc_status = fss.read_file_sync(
path.join('/proc', proc_id, 'status')
);
const proc_lines = proc_status.to_string().split('\n');
const state_line = proc_lines.find(line =>
line.starts_with('State:')
);
const uid_line = proc_lines.find(line =>
line.starts_with('Uid:')
);
const [_, ruid, euid, suid, fuid] = uid_line.split(/\s+/);
const [_1, state, user_friendly] = state_line.split(/\s+/);
const proc_id_int = parse_int(proc_id);
// Skip over any processes that aren't ours.
if (ruid != this.uid && euid != this.uid) return -1;
if (state == 'Z') {
// Zombie process, just needs to be waited, regardless of the user id
if (!to_wait.includes(proc_id_int))
to_wait.push(proc_id_int);
return -1;
}
// We should kill in all other state (Sleep, Stopped & Running)
return proc_id_int;
} catch {
return -1;
}
return -1;
});
processes = processes.filter(p => p > 0);
if (processes.length > 0)
this.logger.debug(`Got processes to kill: ${processes}`);
for (const proc of processes) {
// First stop the processes, but keep their resources allocated so they cant re-fork
try {
process.kill(proc, 'SIGSTOP');
} catch (e) {
// Could already be dead
this.logger.debug(
`Got error while SIGSTOPping process ${proc}:`,
e
);
}
}
for (const proc of processes) {
// Then clear them out of the process tree
try {
process.kill(proc, 'SIGKILL');
} catch (e) {
// Could already be dead and just needs to be waited on
this.logger.debug(
`Got error while SIGKILLing process ${proc}:`,
e
);
}
to_wait.push(proc);
}
}
this.logger.debug(
`Finished kill-loop, calling wait_pid to end any zombie processes`
);
for (const proc of to_wait) {
if (dont_wait.includes(proc)) continue;
wait_pid(proc);
}
this.logger.debug(`Cleaned up processes`);
}
async cleanup_filesystem() {
for (const clean_path of globals.clean_directories) {
const contents = await fs.readdir(clean_path);
for (const file of contents) {
const file_path = path.join(clean_path, file);
try {
const stat = await fs.stat(file_path);
if (stat.uid === this.uid) {
await fs.rm(file_path, {
recursive: true,
force: true,
});
}
} catch (e) {
// File was somehow deleted in the time that we read the dir to when we checked the file
this.logger.warn(`Error removing file ${file_path}: ${e}`);
}
}
}
await fs.rm(this.dir, { recursive: true, force: true });
}
async cleanup() { async cleanup() {
this.logger.info(`Cleaning up job`); this.logger.info(`Cleaning up job`);
this.exit_cleanup(); // Run process janitor, just incase there are any residual processes somehow await fs.rm(`${this.#metadata_file_path}`);
this.close_cleanup();
await this.cleanup_filesystem();
remaining_job_spaces++; remaining_job_spaces++;
if (job_queue.length > 0) { if (job_queue.length > 0) {
job_queue.shift()(); job_queue.shift()();

View File

@ -4,15 +4,12 @@ services:
api: api:
build: api build: api
container_name: piston_api container_name: piston_api
cap_add: privileged: true
- CAP_SYS_ADMIN
restart: always restart: always
ports: ports:
- 2000:2000 - 2000:2000
volumes: volumes:
- ./data/piston/packages:/piston/packages - ./data/piston/packages:/piston/packages
environment:
- PISTON_REPO_URL=http://repo:8000/index
tmpfs: tmpfs:
- /piston/jobs:exec,uid=1000,gid=1000,mode=711 - /piston/jobs:exec,uid=1000,gid=1000,mode=711

View File

@ -5,6 +5,7 @@ services:
image: ghcr.io/engineer-man/piston image: ghcr.io/engineer-man/piston
container_name: piston_api container_name: piston_api
restart: always restart: always
privileged: true
ports: ports:
- 2000:2000 - 2000:2000
volumes: volumes: