Fix reading of output from subprocess

This commit is contained in:
TheAssassin 2022-07-15 17:01:48 +02:00
parent 649fc0247d
commit 4677fd9280
5 changed files with 112 additions and 89 deletions

View File

@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.2)
project(linuxdeploy C CXX)
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${PROJECT_SOURCE_DIR}/cmake/Modules/")

View File

@ -4,15 +4,24 @@
#include <set>
#include <vector>
#include <poll.h>
#include <chrono>
/**
* Reads from a pipe when data is available, and hands data to registered callbacks.
*/
class pipe_reader {
private:
const int pipe_fd_;
struct pollfd pollfd_;
public:
static constexpr std::chrono::milliseconds READ_TIMEOUT{50};
enum class result {
SUCCESS = 0,
TIMEOUT,
END_OF_FILE,
};
/**
* Construct new instance from pipe file descriptor.
* @param pipe_fd file descriptor for pipe we will read from (e.g., a subprocess's stdout, stderr pipes)
@ -27,10 +36,12 @@ public:
* - no more data left in the pipe to be read
* - buffer is completely filled
*
* The buffer will be resized to the number of bytes read from the pipe.
*
* On errors, a subprocess_error is thrown.
*
* @param buffer buffer to store read data into
* @returns amount of characters read from the pipe
* @returns
*/
size_t read(std::vector<std::string::value_type>& buffer) const;
result read(std::vector<std::string::value_type>& buffer, std::chrono::milliseconds read_timeout = READ_TIMEOUT);
};

View File

@ -68,65 +68,59 @@ namespace linuxdeploy {
subprocess::subprocess_result_buffer_t intermediate_buffer(4096);
// (try to) read from pipe
const auto bytes_read = pipe_to_be_logged.reader_.read(intermediate_buffer);
switch (pipe_to_be_logged.reader_.read(intermediate_buffer)) {
case pipe_reader::result::SUCCESS: {
// all we have to do now is to look for CR or LF, send everything up to that location into the ldLog instance,
// write our prefix and then repeat
for (auto it = intermediate_buffer.begin(); it != intermediate_buffer.end(); ++it) {
if (pipe_to_be_logged.print_prefix_in_next_iteration_) {
pipe_to_be_logged.log_ << log_prefix;
}
// 0 means EOF
if (bytes_read == 0) {
pipe_to_be_logged.eof = true;
break;
}
const auto next_lf = std::find(it, intermediate_buffer.end(), '\n');
const auto next_cr = std::find(it, intermediate_buffer.end(), '\r');
// we just trim the buffer to the bytes we read (makes the code below easier)
intermediate_buffer.resize(bytes_read);
// we don't care which one goes first -- we pick the closest one, write everything up to it into our ldLog,
// then print our prefix and repeat that until there's nothing left in our buffer
auto next_control_char = std::min({next_lf, next_cr});
// all we have to do now is to look for CR or LF, send everything up to that location into the ldLog instance,
// write our prefix and then repeat
for (auto it = intermediate_buffer.begin(); it != intermediate_buffer.end(); ++it) {
if (pipe_to_be_logged.print_prefix_in_next_iteration_) {
pipe_to_be_logged.log_ << log_prefix;
}
// if there is a control char, we remember this for the next iteration, where we print our
// log prefix
// in any case, we can write the remaining buffer contents into the ldLog object
pipe_to_be_logged.print_prefix_in_next_iteration_ = (next_control_char !=
intermediate_buffer.end());
const auto next_lf = std::find(it, intermediate_buffer.end(), '\n');
const auto next_cr = std::find(it, intermediate_buffer.end(), '\r');
const auto distance_from_begin_to_it = std::distance(intermediate_buffer.begin(), it);
auto distance_from_it_to_next_cc = std::distance(it, next_control_char);
// we don't care which one goes first -- we pick the closest one, write everything up to it into our ldLog,
// then print our prefix and repeat that until there's nothing left in our buffer
auto next_control_char = std::min({next_lf, next_cr});
if (pipe_to_be_logged.print_prefix_in_next_iteration_) {
distance_from_it_to_next_cc++;
}
// if there is a control char, we remember this for the next iteration, where we print our
// log prefix
// in any case, we can write the remaining buffer contents into the ldLog object
pipe_to_be_logged.print_prefix_in_next_iteration_ = (next_control_char !=
intermediate_buffer.end());
// need to make sure we include the control char in the write
pipe_to_be_logged.log_.write(
intermediate_buffer.data() + distance_from_begin_to_it,
distance_from_it_to_next_cc
);
const auto distance_from_begin_to_it = std::distance(intermediate_buffer.begin(), it);
auto distance_from_it_to_next_cc = std::distance(it, next_control_char);
it = next_control_char;
if (pipe_to_be_logged.print_prefix_in_next_iteration_) {
distance_from_it_to_next_cc++;
}
// need to make sure we include the control char in the write
pipe_to_be_logged.log_.write(
intermediate_buffer.data() + distance_from_begin_to_it,
distance_from_it_to_next_cc
);
it = next_control_char;
// TODO: should not be necessary, should be fixed in for loop
if (!pipe_to_be_logged.print_prefix_in_next_iteration_) {
// TODO: should not be necessary, should be fixed in for loop
if (!pipe_to_be_logged.print_prefix_in_next_iteration_) {
break;
}
}
break;
}
case pipe_reader::result::END_OF_FILE: {
pipe_to_be_logged.eof = true;
break;
}
case pipe_reader::result::TIMEOUT:
break;
}
}
// do-while might be a little more elegant, but we can save this one unnecessary sleep, so...
if (proc.is_running()) {
// reduce load on CPU
std::this_thread::sleep_for(std::chrono::milliseconds(50));
}
// once all buffers are EOF, we can stop reading
if (std::all_of(pipes_to_be_logged.begin(), pipes_to_be_logged.end(), [](const pipe_to_be_logged& pipe_state) {
return pipe_state.eof;

View File

@ -1,37 +1,59 @@
// system headers
#include <algorithm>
#include <fcntl.h>
#include <unistd.h>
#include <functional>
#include <cstring>
#include <poll.h>
#include <unistd.h>
#include <stdexcept>
// local headers
#include "linuxdeploy/subprocess/pipe_reader.h"
pipe_reader::pipe_reader(int pipe_fd) : pipe_fd_(pipe_fd) {
// add O_NONBLOCK TO fd's flags to be able to read
auto flags = fcntl(pipe_fd_, F_GETFL, 0);
flags |= O_NONBLOCK;
fcntl(pipe_fd_, F_SETFL, flags);
}
pipe_reader::pipe_reader(int pipe_fd) : pollfd_(pollfd{pipe_fd, POLLIN | POLLHUP}) {}
size_t pipe_reader::read(std::vector<std::string::value_type>& buffer) const {
for (;;) {
ssize_t rv = ::read(pipe_fd_, buffer.data(), buffer.size());
pipe_reader::result pipe_reader::read(std::vector<std::string::value_type>& buffer, std::chrono::milliseconds read_timeout) {
const auto timeout_msec = std::chrono::duration_cast<std::chrono::milliseconds>(read_timeout).count();
if (rv == -1) {
switch (errno) {
// retry in case data is currently not available
case EINTR:
case EAGAIN:
continue;
default:
// TODO: introduce custom subprocess_error
throw std::runtime_error{"unexpected error reading from pipe: " + std::string(strerror(errno))};
// we could (and probably should) be using poll on multiple fds at once
// however given the low bandwidth of data to handle, this should be fine, given we use a small-enough timeout
// also the read buffer sizes could be further increased to improve the overall performance
switch (poll(&pollfd_, 1, static_cast<int>(timeout_msec))) {
case -1:
// TODO: introduce custom subprocess_error
throw std::runtime_error{"unexpected error reading from pipe: " + std::string(strerror(errno))};
case 0:
return result::TIMEOUT;
case 1: {
if ((pollfd_.revents & POLLIN) != 0) {
ssize_t rv = ::read(pollfd_.fd, buffer.data(), buffer.size());
switch (rv) {
case -1: {
throw std::runtime_error{"unexpected error reading from pipe: " + std::string(strerror(errno))};
}
case 0: {
return result::END_OF_FILE;
}
default: {
// set the size correctly so the caller can just query the vector's size if the number of read chars is needed
buffer.resize(rv);
return result::SUCCESS;
}
}
}
}
return rv;
if ((pollfd_.revents & POLLHUP) != 0) {
// appears like this can be considered eof
return result::END_OF_FILE;
}
if ((pollfd_.revents & POLLERR) != 0 || (pollfd_.revents & POLLNVAL) != 0) {
throw std::runtime_error{"poll() failed unexpectedly"};
}
break;
}
default:
// this is a should-never-ever-happen case, a return value not handled by the lines above is actually not possible
throw std::runtime_error{"unexpected return value from pollfd"};
}
}

View File

@ -51,23 +51,19 @@ namespace linuxdeploy {
subprocess_result_buffer_t intermediate_buffer(4096);
// (try to) read all available data from pipe
for (;;) {
if (pipe_state.eof) {
break;
for (; !pipe_state.eof; ) {
switch (pipe_state.reader.read(intermediate_buffer)) {
case pipe_reader::result::SUCCESS: {
// append to main buffer
pipe_state.buffer.reserve(pipe_state.buffer.size() + intermediate_buffer.size());
std::copy(intermediate_buffer.begin(), intermediate_buffer.end(), std::back_inserter(pipe_state.buffer));
}
case pipe_reader::result::END_OF_FILE: {
pipe_state.eof = true;
}
default:
break;
}
const auto bytes_read = pipe_state.reader.read(intermediate_buffer);
// 0 means EOF
if (bytes_read == 0) {
pipe_state.eof = true;
break;
}
// append to main buffer
pipe_state.buffer.reserve(pipe_state.buffer.size() + bytes_read);
std::copy(intermediate_buffer.begin(), (intermediate_buffer.begin() + bytes_read),
std::back_inserter(pipe_state.buffer));
}
}