From c8b69fe5eadc70a2222dab37191ac2799ff8bfa9 Mon Sep 17 00:00:00 2001 From: Omar Brikaa Date: Sat, 24 Aug 2024 20:14:48 +0300 Subject: [PATCH] Initial: use Isolate for isolation --- api/Dockerfile | 16 +- api/src/docker-entrypoint.sh | 12 + api/src/globals.js | 1 - api/src/job.js | 566 +++++++++++++++-------------------- docker-compose.dev.yaml | 5 +- docker-compose.yaml | 1 + 6 files changed, 264 insertions(+), 337 deletions(-) create mode 100644 api/src/docker-entrypoint.sh diff --git a/api/Dockerfile b/api/Dockerfile index ec0d2a8..640fbcc 100644 --- a/api/Dockerfile +++ b/api/Dockerfile @@ -1,3 +1,13 @@ +FROM buildpack-deps@sha256:d56cd472000631b8faca51f40d4e3f1b20deffa588f9f207fa6c60efb62ba7c4 AS isolate +RUN apt-get update && \ + apt-get install -y --no-install-recommends git libcap-dev && \ + rm -rf /var/lib/apt/lists/* && \ + git clone https://github.com/envicutor/isolate.git /tmp/isolate/ && \ + cd /tmp/isolate && \ + git checkout af6db68042c3aa0ded80787fbb78bc0846ea2114 && \ + make -j$(nproc) install && \ + rm -rf /tmp/* + FROM node:15.10.0-buster-slim ENV DEBIAN_FRONTEND=noninteractive @@ -15,6 +25,8 @@ RUN apt-get update && \ libfftw3-dev libglpk-dev libqhull-dev libqrupdate-dev libsuitesparse-dev \ libsundials-dev libpcre2-dev && \ rm -rf /var/lib/apt/lists/* +COPY --from=isolate /usr/local/bin/isolate /usr/local/bin +COPY --from=isolate /usr/local/etc/isolate /usr/local/etc/isolate RUN sed -i '/en_US.UTF-8/s/^# //g' /etc/locale.gen && locale-gen @@ -23,7 +35,5 @@ COPY ["package.json", "package-lock.json", "./"] RUN npm install COPY ./src ./src -RUN make -C ./src/nosocket/ all && make -C ./src/nosocket/ install - -CMD [ "node", "src"] +CMD ["/piston_api/src/docker-entrypoint.sh"] EXPOSE 2000/tcp diff --git a/api/src/docker-entrypoint.sh b/api/src/docker-entrypoint.sh new file mode 100644 index 0000000..1160a25 --- /dev/null +++ b/api/src/docker-entrypoint.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +cd /sys/fs/cgroup && \ +mkdir isolate/ && \ +echo 1 > isolate/cgroup.procs && \ +echo '+cpuset +cpu +io +memory +pids' > cgroup.subtree_control && \ +cd isolate && \ +mkdir init && \ +echo 1 > init/cgroup.procs && \ +echo '+cpuset +memory' > cgroup.subtree_control && \ +echo "Initialized cgroup" && \ +exec su -- piston -c 'ulimit -n 65536 && node' diff --git a/api/src/globals.js b/api/src/globals.js index 933d2ca..d83d834 100644 --- a/api/src/globals.js +++ b/api/src/globals.js @@ -11,7 +11,6 @@ const platform = `${is_docker() ? 'docker' : 'baremetal'}-${fs module.exports = { data_directories: { packages: 'packages', - jobs: 'jobs', }, version: require('../package.json').version, platform, diff --git a/api/src/job.js b/api/src/job.js index a2641f9..1ca235f 100644 --- a/api/src/job.js +++ b/api/src/job.js @@ -1,13 +1,9 @@ const logplease = require('logplease'); -const logger = logplease.create('job'); const { v4: uuidv4 } = require('uuid'); const cp = require('child_process'); const path = require('path'); const config = require('./config'); -const globals = require('./globals'); const fs = require('fs/promises'); -const fss = require('fs'); -const wait_pid = require('waitpid'); const job_states = { READY: Symbol('Ready to be primed'), @@ -15,15 +11,19 @@ const job_states = { EXECUTED: Symbol('Executed and ready for cleanup'), }; -let uid = 0; -let gid = 0; +const MAX_BOX_ID = 999; +const ISOLATE_PATH = '/usr/local/bin/isolate'; +let box_id = 0; let remaining_job_spaces = config.max_concurrent_jobs; let job_queue = []; +const get_next_box_id = () => (box_id + 1) % MAX_BOX_ID; + class Job { - #active_timeouts; - #active_parent_processes; + #box_id; + #metadata_file_path; + #box_dir; constructor({ runtime, files, args, stdin, timeouts, memory_limits }) { this.uuid = uuidv4(); @@ -46,29 +46,10 @@ class Job { this.stdin += '\n'; } - this.#active_timeouts = []; - this.#active_parent_processes = []; - this.timeouts = timeouts; this.memory_limits = memory_limits; - this.uid = config.runner_uid_min + uid; - this.gid = config.runner_gid_min + gid; - - uid++; - gid++; - - uid %= config.runner_uid_max - config.runner_uid_min + 1; - gid %= config.runner_gid_max - config.runner_gid_min + 1; - - this.logger.debug(`Assigned uid=${this.uid} gid=${this.gid}`); - this.state = job_states.READY; - this.dir = path.join( - config.data_directory, - globals.data_directories.jobs, - this.uuid - ); } async prime() { @@ -80,31 +61,46 @@ class Job { } this.logger.info(`Priming job`); remaining_job_spaces--; - this.logger.debug('Writing files to job cache'); + this.logger.debug('Running isolate --init'); + this.#box_id = get_next_box_id(); + this.#metadata_file_path = `/tmp/${this.#box_id}-metadata.txt`; + await new Promise((res, rej) => { + cp.exec( + `isolate --init --cg -b${this.#box_id}`, + (error, stdout, stderr) => { + if (error) { + rej( + `Failed to run isolate --init: ${error.message}\nstdout: ${stdout}\nstderr: ${stderr}` + ); + } + if (stdout === '') { + rej('Received empty stdout from isolate --init'); + } + this.#box_dir = stdout; + res(); + } + ); + }); - this.logger.debug(`Transfering ownership`); - - await fs.mkdir(this.dir, { mode: 0o700 }); - await fs.chown(this.dir, this.uid, this.gid); + this.logger.debug(`Creating submission files in Isolate box`); + await fs.mkdir(path.join(this.#box_dir, 'submission')); for (const file of this.files) { - const file_path = path.join(this.dir, file.name); - const rel = path.relative(this.dir, file_path); - const file_content = Buffer.from(file.content, file.encoding); + const file_path = path.join(box_dir, file.name); + const rel = path.relative(box_dir, file_path); if (rel.startsWith('..')) throw Error( `File path "${file.name}" tries to escape parent directory: ${rel}` ); + const file_content = Buffer.from(file.content, file.encoding); + await fs.mkdir(path.dirname(file_path), { recursive: true, mode: 0o700, }); - await fs.chown(path.dirname(file_path), this.uid, this.gid); - await fs.write_file(file_path, file_content); - await fs.chown(file_path, this.uid, this.gid); } this.state = job_states.PRIMED; @@ -112,167 +108,204 @@ class Job { this.logger.debug('Primed job'); } - exit_cleanup() { - for (const timeout of this.#active_timeouts) { - clear_timeout(timeout); - } - this.#active_timeouts = []; - this.logger.debug('Cleared the active timeouts'); - - this.cleanup_processes(); - this.logger.debug(`Finished exit cleanup`); - } - - close_cleanup() { - for (const proc of this.#active_parent_processes) { - proc.stderr.destroy(); - if (!proc.stdin.destroyed) { - proc.stdin.end(); - proc.stdin.destroy(); - } - proc.stdout.destroy(); - } - this.#active_parent_processes = []; - this.logger.debug('Destroyed processes writables'); - } - async safe_call(file, args, timeout, memory_limit, event_bus = null) { - return new Promise((resolve, reject) => { - const nonetwork = config.disable_networking ? ['nosocket'] : []; + var stdout = ''; + var stderr = ''; + var output = ''; - const prlimit = [ - 'prlimit', - '--nproc=' + this.runtime.max_process_count, - '--nofile=' + this.runtime.max_open_files, - '--fsize=' + this.runtime.max_file_size, - ]; - - const timeout_call = [ - 'timeout', + const proc = cp.spawn( + ISOLATE_PATH, + [ + '--run', + `-b${this.#box_id}`, + `--meta=${this.#metadata_file_path}`, + '--cg', '-s', - '9', - Math.ceil(timeout / 1000), - ]; - - if (memory_limit >= 0) { - prlimit.push('--as=' + memory_limit); - } - - const proc_call = [ - 'nice', - ...timeout_call, - ...prlimit, - ...nonetwork, - 'bash', + '-c', + '/box/submission', + '-e', + `--dir=/runtime=${this.runtime.pkgdir}`, + `--processes=${this.runtime.max_process_count}`, + `--open-files=${this.runtime.max_open_files}`, + `--fsize=${this.runtime.max_file_size}`, + `--time=${timeout}`, + `--extra-time=0`, + ...(memory_limit >= 0 ? [`--cg-mem=${memory_limit}`] : []), + ...(config.disable_networking ? [] : '--share-net'), + '--', file, ...args, - ]; - - var stdout = ''; - var stderr = ''; - var output = ''; - - const proc = cp.spawn(proc_call[0], proc_call.splice(1), { + ], + { env: { ...this.runtime.env_vars, PISTON_LANGUAGE: this.runtime.language, }, stdio: 'pipe', - cwd: this.dir, - uid: this.uid, - gid: this.gid, - detached: true, //give this process its own process group - }); - - this.#active_parent_processes.push(proc); - - if (event_bus === null) { - proc.stdin.write(this.stdin); - proc.stdin.end(); - proc.stdin.destroy(); - } else { - event_bus.on('stdin', data => { - proc.stdin.write(data); - }); - - event_bus.on('kill', signal => { - proc.kill(signal); - }); } + ); - const kill_timeout = - (timeout >= 0 && - set_timeout(async _ => { - this.logger.info(`Timeout exceeded timeout=${timeout}`); - try { - process.kill(proc.pid, 'SIGKILL'); - } - catch (e) { - // Could already be dead and just needs to be waited on - this.logger.debug( - `Got error while SIGKILLing process ${proc}:`, - e - ); - } - }, timeout)) || - null; - this.#active_timeouts.push(kill_timeout); + if (event_bus === null) { + proc.stdin.write(this.stdin); + proc.stdin.end(); + proc.stdin.destroy(); + } else { + event_bus.on('stdin', data => { + proc.stdin.write(data); + }); - proc.stderr.on('data', async data => { - if (event_bus !== null) { - event_bus.emit('stderr', data); - } else if ((stderr.length + data.length) > this.runtime.output_max_size) { - this.logger.info(`stderr length exceeded`); - try { - process.kill(proc.pid, 'SIGKILL'); - } - catch (e) { - // Could already be dead and just needs to be waited on - this.logger.debug( - `Got error while SIGKILLing process ${proc}:`, - e - ); - } - } else { - stderr += data; - output += data; + event_bus.on('kill', signal => { + proc.kill(signal); + }); + } + + proc.stderr.on('data', async data => { + if (event_bus !== null) { + event_bus.emit('stderr', data); + } else if ( + stderr.length + data.length > + this.runtime.output_max_size + ) { + this.logger.info(`stderr length exceeded`); + try { + process.kill(proc.pid, 'SIGABRT'); + } catch (e) { + // Could already be dead and just needs to be waited on + this.logger.debug( + `Got error while SIGABRTing process ${proc}:`, + e + ); } - }); + } else { + stderr += data; + output += data; + } + }); - proc.stdout.on('data', async data => { - if (event_bus !== null) { - event_bus.emit('stdout', data); - } else if ((stdout.length + data.length) > this.runtime.output_max_size) { - this.logger.info(`stdout length exceeded`); - try { - process.kill(proc.pid, 'SIGKILL'); - } - catch (e) { - // Could already be dead and just needs to be waited on - this.logger.debug( - `Got error while SIGKILLing process ${proc}:`, - e - ); - } - } else { - stdout += data; - output += data; + proc.stdout.on('data', async data => { + if (event_bus !== null) { + event_bus.emit('stdout', data); + } else if ( + stdout.length + data.length > + this.runtime.output_max_size + ) { + this.logger.info(`stdout length exceeded`); + try { + process.kill(proc.pid, 'SIGABRT'); + } catch (e) { + // Could already be dead and just needs to be waited on + this.logger.debug( + `Got error while SIGABRTing process ${proc}:`, + e + ); } + } else { + stdout += data; + output += data; + } + }); + + let memory = null; + let code = null; + let signal = null; + let message = null; + let status = null; + let time = null; + + try { + const metadata_str = await fs.readFile( + self.metadata_file_path, + 'utf-8' + ); + const metadata_lines = metadata_str.split('\n'); + for (const line of metadata_lines) { + if (!line) continue; + + const [key, value] = line.split(':'); + if (key === undefined || value === undefined) { + throw new Error( + `Failed to parse metadata file, received: ${line}` + ); + } + switch (key) { + case 'cg-mem': + memory = + parseInt(value) || + (() => { + throw new Error( + `Failed to parse memory usage, received value: ${value}` + ); + })(); + break; + case 'exitcode': + code = + parseInt(value) || + (() => { + throw new Error( + `Failed to parse exit code, received value: ${value}` + ); + })(); + break; + case 'exitsig': + signal = + parseInt(value) || + (() => { + throw new Error( + `Failed to parse exit signal, received value: ${value}` + ); + })(); + break; + case 'message': + message = value; + break; + case 'status': + status = value; + break; + case 'time': + time = + parseFloat(value) || + (() => { + throw new Error( + `Failed to parse cpu time, received value: ${value}` + ); + })(); + break; + default: + break; + } + } + } catch (e) { + throw new Error( + `Error reading metadata file: ${self.metadata_file_path}\nError: ${e.message}\nIsolate run stdout: ${stdout}\nIsolate run stderr: ${stderr}` + ); + } + + proc.on('close', () => { + resolve({ + stdout, + stderr, + code, + signal, + output, + memory, + message, + status, + time, }); + }); - proc.on('exit', () => this.exit_cleanup()); - - proc.on('close', (code, signal) => { - this.close_cleanup(); - - resolve({ stdout, stderr, code, signal, output }); - }); - - proc.on('error', err => { - this.exit_cleanup(); - this.close_cleanup(); - - reject({ error: err, stdout, stderr, output }); + proc.on('error', err => { + reject({ + error: err, + stdout, + stderr, + code, + signal, + output, + memory, + message, + status, + time, }); }); } @@ -281,7 +314,7 @@ class Job { if (this.state !== job_states.PRIMED) { throw new Error( 'Job must be in primed state, current state: ' + - this.state.toString() + this.state.toString() ); } @@ -298,49 +331,49 @@ class Job { const { emit_event_bus_result, emit_event_bus_stage } = event_bus === null ? { - emit_event_bus_result: () => { }, - emit_event_bus_stage: () => { }, - } + emit_event_bus_result: () => {}, + emit_event_bus_stage: () => {}, + } : { - emit_event_bus_result: (stage, result, event_bus) => { - const { error, code, signal } = result; - event_bus.emit('exit', stage, { - error, - code, - signal, - }); - }, - emit_event_bus_stage: (stage, event_bus) => { - event_bus.emit('stage', stage); - }, - }; + emit_event_bus_result: (stage, result) => { + const { error, code, signal } = result; + event_bus.emit('exit', stage, { + error, + code, + signal, + }); + }, + emit_event_bus_stage: stage => { + event_bus.emit('stage', stage); + }, + }; if (this.runtime.compiled) { this.logger.debug('Compiling'); - emit_event_bus_stage('compile', event_bus); + emit_event_bus_stage('compile'); compile = await this.safe_call( - path.join(this.runtime.pkgdir, 'compile'), + '/runtime/compile', code_files.map(x => x.name), this.timeouts.compile, this.memory_limits.compile, event_bus ); - emit_event_bus_result('compile', compile, event_bus); + emit_event_bus_result('compile', compile); compile_errored = compile.code !== 0; } let run; if (!compile_errored) { this.logger.debug('Running'); - emit_event_bus_stage('run', event_bus); + emit_event_bus_stage('run'); run = await this.safe_call( - path.join(this.runtime.pkgdir, 'run'), + '/runtime/run', [code_files[0].name, ...this.args], this.timeouts.run, this.memory_limits.run, event_bus ); - emit_event_bus_result('run', run, event_bus); + emit_event_bus_result('run', run); } this.state = job_states.EXECUTED; @@ -353,135 +386,10 @@ class Job { }; } - cleanup_processes(dont_wait = []) { - let processes = [1]; - const to_wait = []; - this.logger.debug(`Cleaning up processes`); - - while (processes.length > 0) { - processes = []; - - const proc_ids = fss.readdir_sync('/proc'); - - processes = proc_ids.map(proc_id => { - if (isNaN(proc_id)) return -1; - try { - const proc_status = fss.read_file_sync( - path.join('/proc', proc_id, 'status') - ); - const proc_lines = proc_status.to_string().split('\n'); - const state_line = proc_lines.find(line => - line.starts_with('State:') - ); - const uid_line = proc_lines.find(line => - line.starts_with('Uid:') - ); - const [_, ruid, euid, suid, fuid] = uid_line.split(/\s+/); - - const [_1, state, user_friendly] = state_line.split(/\s+/); - - const proc_id_int = parse_int(proc_id); - - // Skip over any processes that aren't ours. - if (ruid != this.uid && euid != this.uid) return -1; - - if (state == 'Z') { - // Zombie process, just needs to be waited, regardless of the user id - if (!to_wait.includes(proc_id_int)) - to_wait.push(proc_id_int); - - return -1; - } - // We should kill in all other state (Sleep, Stopped & Running) - - return proc_id_int; - } catch { - return -1; - } - - return -1; - }); - - processes = processes.filter(p => p > 0); - - if (processes.length > 0) - this.logger.debug(`Got processes to kill: ${processes}`); - - for (const proc of processes) { - // First stop the processes, but keep their resources allocated so they cant re-fork - try { - process.kill(proc, 'SIGSTOP'); - } catch (e) { - // Could already be dead - this.logger.debug( - `Got error while SIGSTOPping process ${proc}:`, - e - ); - } - } - - for (const proc of processes) { - // Then clear them out of the process tree - try { - process.kill(proc, 'SIGKILL'); - } catch (e) { - // Could already be dead and just needs to be waited on - this.logger.debug( - `Got error while SIGKILLing process ${proc}:`, - e - ); - } - - to_wait.push(proc); - } - } - - this.logger.debug( - `Finished kill-loop, calling wait_pid to end any zombie processes` - ); - - for (const proc of to_wait) { - if (dont_wait.includes(proc)) continue; - - wait_pid(proc); - } - - this.logger.debug(`Cleaned up processes`); - } - - async cleanup_filesystem() { - for (const clean_path of globals.clean_directories) { - const contents = await fs.readdir(clean_path); - - for (const file of contents) { - const file_path = path.join(clean_path, file); - - try { - const stat = await fs.stat(file_path); - - if (stat.uid === this.uid) { - await fs.rm(file_path, { - recursive: true, - force: true, - }); - } - } catch (e) { - // File was somehow deleted in the time that we read the dir to when we checked the file - this.logger.warn(`Error removing file ${file_path}: ${e}`); - } - } - } - - await fs.rm(this.dir, { recursive: true, force: true }); - } - async cleanup() { this.logger.info(`Cleaning up job`); - this.exit_cleanup(); // Run process janitor, just incase there are any residual processes somehow - this.close_cleanup(); - await this.cleanup_filesystem(); - + await fs.rm(`${this.#metadata_file_path}`); remaining_job_spaces++; if (job_queue.length > 0) { job_queue.shift()(); diff --git a/docker-compose.dev.yaml b/docker-compose.dev.yaml index 8a0d385..a3d74cc 100644 --- a/docker-compose.dev.yaml +++ b/docker-compose.dev.yaml @@ -4,15 +4,12 @@ services: api: build: api container_name: piston_api - cap_add: - - CAP_SYS_ADMIN + privileged: true restart: always ports: - 2000:2000 volumes: - ./data/piston/packages:/piston/packages - environment: - - PISTON_REPO_URL=http://repo:8000/index tmpfs: - /piston/jobs:exec,uid=1000,gid=1000,mode=711 diff --git a/docker-compose.yaml b/docker-compose.yaml index 839b340..1d8e307 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -5,6 +5,7 @@ services: image: ghcr.io/engineer-man/piston container_name: piston_api restart: always + privileged: true ports: - 2000:2000 volumes: