from pathlib import Path import sys import threading import subprocess from typing import AnyStr, IO, Callable def _chunk_transfer(chunk_file: Path): print(f'Transferring chunk {chunk_file} to ... (This is the default method, it has no effect)') def _rotate_chunk(chunk_file: Path, chunk_transfer_fun: Callable, chunk_transfer_args: tuple): print(f'Transferring chunk {chunk_file}') chunk_transfer_fun(*chunk_transfer_args) print(f'Removing chunk {chunk_file}') chunk_file.unlink(missing_ok=False) def _save_chunk(chunk: bytes, chunk_file: Path): print(f'Saving chunk {chunk_file}') # Fails if file does already exist. with open(chunk_file, 'xb') as f: f.write(chunk) def _save_output(pipe: IO[AnyStr], stdout_dir: Path): stdout_dir.mkdir(parents=True, exist_ok=False) b: bytes ct: int = 1 for b in pipe: stdout_dir.joinpath(str(ct)).write_bytes(b) ct += 1 # TODO: Has this any effect? # pipe.close() def _save_output_rotating_chunks(pipe: IO[AnyStr], chunk_file: Path, chunk_size, chunk_transfer_fun: Callable, chunk_transfer_args: tuple): chunk_file.parent.mkdir(parents=True, exist_ok=True) remaining_bytes = chunk_size chunk: bytes = b'' while True: # https://docs.python.org/3/library/io.html#io.RawIOBase.read # If 0 bytes are returned, and size was not 0, this indicates end of file. # If the object is in non-blocking mode and no bytes are available, None is returned. b = pipe.read(remaining_bytes) if len(b) == 0: # EOF reached _save_chunk(chunk, chunk_file) _rotate_chunk(chunk_file, chunk_transfer_fun, chunk_transfer_args) break chunk += b chunk_len = len(chunk) if chunk_len == chunk_size: _save_chunk(chunk, chunk_file) chunk = b'' remaining_bytes = chunk_size _rotate_chunk(chunk_file, chunk_transfer_fun, chunk_transfer_args) elif chunk_len < chunk_size: remaining_bytes = chunk_size - chunk_len else: raise ValueError('Invalid state') # TODO: Has this any effect? # pipe.close() def _print_output(pipe: IO[AnyStr]): line: str for line in pipe: sys.stderr.write(f'[STDERR] {line}') # TODO: Has this any effect? # pipe.close() def execute_print_transfer_chunks(command: list[str], chunk_file: Path, chunk_transfer_fun: Callable = _chunk_transfer, chunk_transfer_args: tuple = None, chunk_size=1024 * 1024, ) -> int: """ Executes the given command. Until the command has finished: - saves a small part of the commands stdout to chunk_file - calls chunk_transfer_fun with chunk_transfer args - deletes chunk_file During command execution: Forwards stderr output of the command to stderr. :param chunk_file: Chunks are saved as this file before they are transferred. :param command: Command to execute, e.g. ['cat', '/some/large/file'] :param chunk_transfer_fun: :param chunk_transfer_args: :param chunk_size: Defaults to 1MB (1024*1024). :return: returncode of executed command """ if chunk_transfer_args is None: chunk_transfer_args = (chunk_file,) process = subprocess.Popen( command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True, ) t_out = threading.Thread( target=_save_output_rotating_chunks, args=(process.stdout, chunk_file, chunk_size, chunk_transfer_fun, chunk_transfer_args)) t_err = threading.Thread( target=_print_output, args=(process.stderr,)) for t in (t_out, t_err): t.daemon = True t.start() returncode = process.wait() for t in (t_out, t_err): t.join() return returncode