From 174bd8607f614612b4d934ba096321a48738f1c1 Mon Sep 17 00:00:00 2001 From: Daniel Langbein Date: Tue, 10 Jan 2023 17:24:35 +0100 Subject: [PATCH] add execute_print_capture_bin --- .gitignore | 3 +- subprocess_util.py | 4 +- subprocess_util_2.py | 107 +++++++++++++++++++++++++++++++++++++++++++ test.py | 14 ++++++ 4 files changed, 125 insertions(+), 3 deletions(-) create mode 100644 subprocess_util_2.py diff --git a/.gitignore b/.gitignore index 757fee3..d56e3c4 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ -/.idea \ No newline at end of file +/.idea +/test \ No newline at end of file diff --git a/subprocess_util.py b/subprocess_util.py index 99d69a4..bb27ade 100644 --- a/subprocess_util.py +++ b/subprocess_util.py @@ -94,7 +94,7 @@ def execute_print_capture(command: list[str], encoding='UTF-8') -> [int, list[st stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True, - bufsize=1, + bufsize=1, # line buffering text=True, encoding=encoding, ) @@ -113,8 +113,8 @@ def execute_print_capture(command: list[str], encoding='UTF-8') -> [int, list[st for t in (t_out, t_err, t_write): t.daemon = True t.start() - returncode = process.wait() + returncode = process.wait() for t in (t_out, t_err): t.join() diff --git a/subprocess_util_2.py b/subprocess_util_2.py new file mode 100644 index 0000000..4e27624 --- /dev/null +++ b/subprocess_util_2.py @@ -0,0 +1,107 @@ +from pathlib import Path +import sys +import threading +import subprocess +from typing import AnyStr, IO + + +def _save_chunk(chunk: bytes, stdout_dir: Path, ct: int): + print(f"Saving chunk {ct}") + file = stdout_dir.joinpath(str(ct)) + file.write_bytes(chunk) + + +def _save_output(pipe: IO[AnyStr], stdout_dir: Path): + stdout_dir.mkdir(parents=True, exist_ok=False) # TODO + + b: bytes + ct: int = 1 + for b in pipe: + stdout_dir.joinpath(str(ct)).write_bytes(b) + ct += 1 + + # TODO: Has this any effect? + # pipe.close() + + +def _save_output_chunks(pipe: IO[AnyStr], stdout_dir: Path, chunk_size): + stdout_dir.mkdir(parents=True, exist_ok=False) # TODO + + # b: bytes + # ct: int = 1 + # for b in pipe: + # stdout_dir.joinpath(str(ct)).write_bytes(b) + # ct += 1 + + ct = 0 + remaining_bytes = chunk_size + chunk: bytes = b'' + while True: + + # https://docs.python.org/3/library/io.html#io.RawIOBase.read + # If 0 bytes are returned, and size was not 0, this indicates end of file. + # If the object is in non-blocking mode and no bytes are available, None is returned. + b = pipe.read(remaining_bytes) + if len(b) == 0: + # EOF reached + _save_chunk(chunk, stdout_dir, ct) + break + chunk += b + + chunk_len = len(chunk) + if chunk_len == chunk_size: + _save_chunk(chunk, stdout_dir, ct) + chunk = b'' + remaining_bytes = chunk_size + ct += 1 + elif chunk_len < chunk_size: + remaining_bytes = chunk_size - chunk_len + else: + raise ValueError("Invalid state") + + # TODO: Has this any effect? + # pipe.close() + + +def _print_output(pipe: IO[AnyStr]): + line: str + for line in pipe: + sys.stderr.write(f'[STDERR] {line}') + + # TODO: Has this any effect? + # pipe.close() + + +# Goal: We want to save the stdout to small files and print stderr while running the command. +def execute_print_capture_bin(command: list[str], stdout_dir: Path, chunk_size = 1024 * 1024) -> int: + """ + Executes the given command saving its stdout to stdout_dir. + + Stderr is printed in real time. + + :param chunk_size: Defaults to 1MB (1024*1024). + :param stdout_dir: Directory where stdout is saved to. + :param command: Command to execute, e.g. ['ls', '-la', '/home'] + :return: returncode + """ + process = subprocess.Popen( + command, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + close_fds=True, + ) + + t_out = threading.Thread( + target=_save_output_chunks, args=(process.stdout, stdout_dir, chunk_size)) + t_err = threading.Thread( + target=_print_output, args=(process.stderr,)) + + for t in (t_out, t_err): + t.daemon = True + t.start() + + returncode = process.wait() + for t in (t_out, t_err): + t.join() + + return returncode diff --git a/test.py b/test.py index ab359e9..e871a3b 100644 --- a/test.py +++ b/test.py @@ -1,11 +1,25 @@ +from pathlib import Path + from subprocess_util import execute_print_capture +from subprocess_util_2 import execute_print_capture_bin def test(): + print("TEST ONE") + returncode, out, err = execute_print_capture(['ls', '-la']) print() returncode, out, err = execute_print_capture(['ls', '/foo/bar']) + print("TEST TWO-1") + execute_print_capture(['rm', '-rf', 'test/1', 'test/2', 'test/3']) + returncode = execute_print_capture_bin(['ls', '-la'], Path('test/1')) + print("TEST TWO-2") + returncode = execute_print_capture_bin(['ls', '/foo/bar'], Path('test/2')) + print("TEST TWO-3") + returncode = execute_print_capture_bin(['cat', 'subprocess_util.py'], Path('test/3'), + chunk_size=1024) + if __name__ == '__main__': test()