Source code for gyoza.cli.commands.run_container

"""Run-container command for the gyoza CLI."""

from __future__ import annotations

import ast
import json
import subprocess
import tempfile
from pathlib import Path
from typing import Any

import typer

from gyoza.cli.errors import fail

_SEPARATOR = "━" * 50
_CONTAINER_INPUT_PATH = "/data/input.json"
_CONTAINER_OUTPUT_PATH = "/data/output.json"
_CONTAINER_MAPPED_DIR = "/data/mapped"


# ---------------------------------------------------------------------------
# Input helpers
# ---------------------------------------------------------------------------


def _parse_inline_input(raw: str) -> dict[str, Any]:
    """
    Parse an inline string into a dict.

    Parameters
    ----------
    raw : str
        JSON or Python-literal dict string.

    Returns
    -------
    dict[str, Any]
        Parsed input data.

    Raises
    ------
    typer.Exit
        If the string cannot be parsed or is not a dict.
    """
    try:
        parsed = json.loads(raw)
    except json.JSONDecodeError:
        try:
            parsed = ast.literal_eval(raw)
        except (ValueError, SyntaxError) as exc:
            fail(f"invalid --inline-input: {exc}")

    if not isinstance(parsed, dict):
        fail("--inline-input must be a JSON object (dictionary)")
    return parsed


def _write_temp_json(data: dict[str, Any]) -> Path:
    """
    Serialise a dict to a named temporary JSON file.

    Parameters
    ----------
    data : dict[str, Any]
        Data to serialise.

    Returns
    -------
    Path
        Path to the created temporary file (caller is responsible for cleanup).
    """
    tmp = tempfile.NamedTemporaryFile(
        mode="w",
        suffix=".json",
        prefix=".gyoza-input-",
        delete=False,
    )
    json.dump(data, tmp)
    tmp.flush()
    tmp.close()
    return Path(tmp.name)


def _map_paths_in_input(
    input_path: Path,
) -> tuple[Path, list[tuple[str, str]]]:
    """
    Scan input JSON values for local paths and generate volume mappings.

    For each string value that resolves to an existing local file or
    directory, a ``(host_path, container_path)`` bind-mount pair is
    produced and the value in the data is rewritten to the container path.
    The rewritten data is written to a new temporary file.

    Parameters
    ----------
    input_path : Path
        Path to the input JSON file on the host.

    Returns
    -------
    tuple[Path, list[tuple[str, str]]]
        Rewritten temporary input file and list of volume pairs.
    """
    with input_path.open() as fh:
        data: dict[str, Any] = json.load(fh)

    volumes: list[tuple[str, str]] = []
    for idx, (key, value) in enumerate(data.items()):
        if not isinstance(value, str):
            continue
        candidate = Path(value)
        if not candidate.exists():
            continue

        resolved = candidate.resolve()
        container_dir = f"{_CONTAINER_MAPPED_DIR}/{idx}"

        if resolved.is_file():
            volumes.append((str(resolved.parent), container_dir))
            data[key] = f"{container_dir}/{resolved.name}"
        else:
            volumes.append((str(resolved), container_dir))
            data[key] = container_dir

    return _write_temp_json(data), volumes


def _resolve_input(
    input_path: str | None,
    inline_input: dict[str, Any] | None,
) -> tuple[Path | None, list[Path]]:
    """
    Resolve the effective input file, handling inline data.

    Parameters
    ----------
    input_path : str | None
        Host path to an existing input JSON file.
    inline_input : dict[str, Any] | None
        Inline input data to serialise to a temp file.

    Returns
    -------
    tuple[Path | None, list[Path]]
        Resolved input path (or ``None``) and temp files to clean up.

    Raises
    ------
    FileNotFoundError
        If *input_path* does not exist on disk.
    """
    if inline_input is not None:
        tmp = _write_temp_json(inline_input)
        return tmp, [tmp]

    if input_path is not None:
        resolved = Path(input_path).resolve()
        if not resolved.exists():
            msg = f"Input file not found: {resolved}"
            raise FileNotFoundError(msg)
        return resolved, []

    return None, []


# ---------------------------------------------------------------------------
# Docker command builder
# ---------------------------------------------------------------------------


def _build_docker_cmd(
    image: str,
    *,
    input_path: Path | None,
    output_path: Path | None,
    extra_volumes: list[tuple[str, str]],
    extra_docker_args: list[str],
) -> list[str]:
    """
    Assemble the ``docker run`` command token list.

    Parameters
    ----------
    image : str
        Docker image to run.
    input_path : Path | None
        Host path to the input file to mount.
    output_path : Path | None
        Host path where the container will write its output.
    extra_volumes : list[tuple[str, str]]
        Additional ``(host_path, container_path)`` bind-mount pairs.
    extra_docker_args : list[str]
        Arguments forwarded verbatim to ``docker run``.

    Returns
    -------
    list[str]
        Command tokens ready for ``subprocess.run``.
    """
    cmd: list[str] = ["docker", "run", "--rm"]

    if input_path is not None:
        cmd += ["-v", f"{input_path.resolve()}:{_CONTAINER_INPUT_PATH}:ro"]
        cmd += ["-e", f"GYOZA_INPUT_PATH={_CONTAINER_INPUT_PATH}"]

    if output_path is not None:
        out_dir = str(output_path.resolve().parent)
        cmd += ["-v", f"{out_dir}:/data/output"]
        cmd += ["-e", f"GYOZA_OUTPUT_PATH=/data/output/{output_path.name}"]

    for host_path, container_path in extra_volumes:
        cmd += ["-v", f"{host_path}:{container_path}"]

    cmd += extra_docker_args
    cmd.append(image)
    return cmd


# ---------------------------------------------------------------------------
# Command
# ---------------------------------------------------------------------------


[docs] def run_container( image: str = typer.Argument( ..., help="Docker image to run (e.g. 'myregistry/my-op:latest').", ), input: str | None = typer.Option( # noqa: A002 None, "--input", "-i", help=( "Host path to the input JSON file. " "Mounted read-only into the container. " "Ignored when --inline-input is provided." ), ), output: str | None = typer.Option( None, "--output", "-o", help="Host path where the container writes its output JSON file.", ), inline_input: str | None = typer.Option( None, "--inline-input", help=( 'Inline JSON dict string for inputs, e.g. \'{"a": 1, "b": 2}\'. ' "Serialised to a temp file and mounted as input. " "Mutually exclusive with --input." ), ), no_map_paths: bool = typer.Option( False, "--no-map-paths", help=( "Disable automatic path mapping. By default, string values " "in the input JSON that point to local files/directories are " "automatically mounted into the container." ), ), docker_args: list[str] | None = typer.Argument( None, help=( "Extra arguments forwarded verbatim to 'docker run'. " "Place them after '--', e.g.: " "gyoza run-container myimage -- --gpus all --network host" ), ), ) -> None: """Run a gyoza operation inside its Docker container. Wraps ``docker run`` with automatic volume mounts for input/output files. Any arguments after ``--`` are forwarded directly to ``docker run``. Execution modes: 1. ``--inline-input`` — parse input from the CLI string. 2. ``--input`` — read input from a JSON file on the host. 3. default — let the container use its built-in env defaults. By default, string values in the input that point to existing local files or directories are bind-mounted and rewritten transparently. Use ``--no-map-paths`` to disable this. """ if inline_input and input: fail("--input and --inline-input are mutually exclusive") typer.echo(f"\n{_SEPARATOR}") typer.echo(" 🐳 GYOZA RUN-CONTAINER") typer.echo(f"{_SEPARATOR}\n") typer.echo(f" 🖼️ Image: {image}") if inline_input: typer.echo(" 📝 Mode: inline") typer.echo(f" 📥 Input: {inline_input}") elif input: typer.echo(" 📝 Mode: path") typer.echo(f" 📥 Input: {input}") else: typer.echo(" 📝 Mode: default (container env vars)") if output: typer.echo(f" 📤 Output: {output}") typer.echo(f" 🗺️ Path mapping: {'disabled' if no_map_paths else 'enabled'}") if docker_args: typer.echo(f" ⚙️ Extra args: {' '.join(docker_args)}") typer.echo(f"\n{_SEPARATOR}\n") parsed_inline = _parse_inline_input(inline_input) if inline_input else None try: resolved_input, temp_files = _resolve_input(input, parsed_inline) extra_volumes: list[tuple[str, str]] = [] if not no_map_paths and resolved_input is not None: rewritten, volumes = _map_paths_in_input(resolved_input) temp_files.append(rewritten) extra_volumes.extend(volumes) resolved_input = rewritten resolved_output = Path(output).resolve() if output else None cmd = _build_docker_cmd( image, input_path=resolved_input, output_path=resolved_output, extra_volumes=extra_volumes, extra_docker_args=docker_args or [], ) try: subprocess.run(cmd, check=True, text=True) # noqa: S603 finally: for tmp in temp_files: tmp.unlink(missing_ok=True) except FileNotFoundError as exc: fail(str(exc)) except subprocess.CalledProcessError as exc: fail(f"docker run failed (exit code {exc.returncode})") typer.echo(f"\n{_SEPARATOR}") typer.echo(" ✅ Done.") typer.echo(f"{_SEPARATOR}\n")