Home > OS >  Why does sanitizer give warnings when processing asynchronous boost::asio operations in parallel in
Why does sanitizer give warnings when processing asynchronous boost::asio operations in parallel in

Time:02-24

I am decided to test my project written using boost::asio (I run io_service::run in different threads) with all kinds of sanitizers and on thread-sanitizer got data race, namely it reported that socket is at the same time checked for is_open somewhere inside io_service and is at the same time closed due to timeout. I managed to repeat the problem on small code, but I don't understand why it occurs, shouldn't it build a chain using boost::asio::io_service::strand and synchronize a single object of class D (from the example below) relative to other threads? Please help, thank you!

#include <boost/asio.hpp>
#include <boost/asio/deadline_timer.hpp>
#include <boost/asio/io_service.hpp>
#include <boost/asio/ip/tcp.hpp>
#include <boost/asio/read_until.hpp>
#include <boost/asio/streambuf.hpp>
#include <boost/asio/strand.hpp>
#include <boost/thread.hpp>
#include <boost/asio/write.hpp>
#include <boost/bind.hpp>
#include <iostream>
#include <thread>

using namespace boost::asio;
io_service service;
struct D {
    explicit D(io_service & ios) : socket_(ios), timeout_(ios), strand_(ios), resolver_(ios) {
        do_response();
    }
private:
    void do_response(){
        resolver_.async_resolve("tracker.dler.com",  "6969",
                                strand_.wrap( [this](boost::system::error_code const & ec,
                                                      ip::tcp::resolver::iterator endpoints){
                                    if (!ec) {
                                        do_connect(std::move(endpoints));
                                        timeout_.async_wait([this](boost::system::error_code const & ec) {
                                            if (!ec) {
                                                deadline();
                                            }
                                        });
                                    } else {
                                        socket_.close();
                                        std::cerr << "ec == true" << std::endl;
                                    }
                                }));
    }
    void do_connect(ip::tcp::resolver::iterator endpoint){
        async_connect(socket_, std::move(endpoint),
                          strand_.wrap([this](boost::system::error_code const & ec, [[maybe_unused]] const ip::tcp::resolver::iterator&){
                              std::this_thread::sleep_for(std::chrono::milliseconds(100));
                              if (!ec) {
                                  std::cout << "OK!" << std::endl;
                                  timeout_.cancel();
                              }
                          }));
        timeout_.expires_from_now(boost::posix_time::seconds(1));
    }
    void deadline(){
        if (timeout_.expires_at() <=  deadline_timer::traits_type::now()) {
            socket_.close();
            std::cerr << "TIMEOUT" << std::endl;
        } else {
            timeout_.async_wait(strand_.wrap([this](boost::system::error_code const &ec) {
                if (!ec) {
                    deadline();
                }
            }));
        }
    }

    ip::tcp::socket socket_;
    deadline_timer timeout_;
    io_service::strand strand_;
    ip::tcp::resolver resolver_;
};
void func(int i)
{
    std::cout << "func called, i= " << i << "/" << boost::this_thread::get_id() << std::endl;
}
void worker_thread()
{
    service.run();
}
int main(int argc, char* argv[])
{
    std::list<D> ds;
    for ( int i = 0; i < 15;   i) {
        ds.emplace_back(service);
    }
    boost::thread_group threads;
    for ( int i = 0; i < 3;   i)
        threads.create_thread(worker_thread);
    // wait for all threads to be created
    boost::this_thread::sleep( boost::posix_time::millisec(500));
    threads.join_all();
}

Sanitizer output: from is_open from deadline

reactive_socket_service_base.hpp
Data race (pid=7375)
Read of size 4 at 0x7b34000001b8 by thread T2:
    0x415147 boost::asio::detail::reactive_socket_service_base::is_open const reactive_socket_service_base.hpp:89
    0x437f3c boost::asio::basic_socket::is_open const basic_socket.hpp:489
    0x435c0b boost::asio::detail::iterator_connect_op::operator() connect.hpp:550
    0x445036 boost::asio::detail::binder1::operator() bind_handler.hpp:65
    0x449677 boost::asio::detail::rewrapped_handler::operator() wrapped_handler.hpp:191
    0x449564 (cpptorrent 0x449563) handler_invoke_hook.hpp:69
    0x449378 boost_asio_handler_invoke_helpers::invoke<…> handler_invoke_helpers.hpp:37
    0x448f5f boost::asio::detail::asio_handler_invoke<…> wrapped_handler.hpp:275
    0x448c35 boost_asio_handler_invoke_helpers::invoke<…> handler_invoke_helpers.hpp:37
    0x449318 boost::asio::detail::handler_work::complete<…> handler_work.hpp:100
    0x448e9e boost::asio::detail::completion_handler::do_complete completion_handler.hpp:70
    0x4489ae boost::asio::detail::strand_service::dispatch<…> strand_service.hpp:88
    0x448349 boost::asio::io_context::strand::initiate_dispatch::operator()<…> const io_context_strand.hpp:343
    0x447e0f boost::asio::async_result::initiate<…> async_result.hpp:82
    0x44728e boost::asio::async_initiate<…> async_result.hpp:257
    0x4468c4 boost::asio::io_context::strand::dispatch<…> io_context_strand.hpp:189
    0x44583d boost::asio::detail::asio_handler_invoke<…> wrapped_handler.hpp:232
    0x444f7e boost_asio_handler_invoke_helpers::invoke<…> handler_invoke_helpers.hpp:37
    0x4443d0 boost::asio::detail::asio_handler_invoke<…> connect.hpp:612
    0x443f36 boost_asio_handler_invoke_helpers::invoke<…> handler_invoke_helpers.hpp:37
    0x443910 boost::asio::detail::asio_handler_invoke<…> bind_handler.hpp:106
    0x4430de boost_asio_handler_invoke_helpers::invoke<…> handler_invoke_helpers.hpp:37
    0x4426d6 boost::asio::detail::io_object_executor::dispatch<…> const io_object_executor.hpp:119
    0x441148 boost::asio::detail::handler_work::complete<…> handler_work.hpp:72
    0x43fbd6 boost::asio::detail::reactive_socket_connect_op::do_complete reactive_socket_connect_op.hpp:102
    0x40b513 boost::asio::detail::scheduler_operation::complete scheduler_operation.hpp:40
    0x412d5f boost::asio::detail::scheduler::do_run_one scheduler.ipp:447
    0x412543 boost::asio::detail::scheduler::run scheduler.ipp:200
    0x413581 boost::asio::io_context::run io_context.ipp:63
    0x4087ee worker_thread main.cpp:82
    0x44af46 boost::detail::thread_data::run thread.hpp:120
    0x44ef6c thread_proxy
Previous write of size 4 at 0x7b34000001b8 by thread T3:
    0x41520e boost::asio::detail::reactive_socket_service_base::construct reactive_socket_service_base.ipp:45
    0x4154d8 boost::asio::detail::reactive_socket_service_base::close reactive_socket_service_base.ipp:127
    0x4222f9 boost::asio::basic_socket::close basic_socket.hpp:507
    0x41bca1 D::deadline main.cpp:62
    0x41b8b9 <lambda#1>::operator() const main.cpp:39
    0x441d16 boost::asio::detail::binder1::operator() bind_handler.hpp:65
    0x44039a (cpptorrent 0x440399) handler_invoke_hook.hpp:69
    0x43ef12 boost_asio_handler_invoke_helpers::invoke<…> handler_invoke_helpers.hpp:37
    0x43dbbd boost::asio::detail::asio_handler_invoke<…> bind_handler.hpp:106
    0x43cb52 boost_asio_handler_invoke_helpers::invoke<…> handler_invoke_helpers.hpp:37
    0x43bc18 boost::asio::detail::io_object_executor::dispatch<…> const io_object_executor.hpp:119
    0x43a854 boost::asio::detail::handler_work::complete<…> handler_work.hpp:72
    0x439206 boost::asio::detail::wait_handler::do_complete wait_handler.hpp:73
    0x40b513 boost::asio::detail::scheduler_operation::complete scheduler_operation.hpp:40
    0x412d5f boost::asio::detail::scheduler::do_run_one scheduler.ipp:447
    0x412543 boost::asio::detail::scheduler::run scheduler.ipp:200
    0x413581 boost::asio::io_context::run io_context.ipp:63
    0x4087ee worker_thread main.cpp:82
    0x44af46 boost::detail::thread_data::run thread.hpp:120
    0x44ef6c thread_proxy
Location is heap block of size 208 at 0x7b34000001a0 allocated by main thread:
    0x7f0da81022af operator new
    0x4381e1 __gnu_cxx::new_allocator::allocate new_allocator.h:115
    0x435fa4 std::allocator_traits::allocate alloc_traits.h:460
    0x434014 std::_List_base::_M_get_node stl_list.h:442
    0x42fb5f std::list::_M_create_node<…> stl_list.h:634
    0x429629 std::list::_M_insert<…> stl_list.h:1911
    0x422d5b std::list::emplace_back<…> stl_list.h:1227
    0x40885d main main.cpp:88
Thread T2 (tid=7380, running) created by main thread at:
    0x7f0da80d5466 pthread_create
    0x44e0ca boost::thread::start_thread_noexcept
    0x429715 boost::thread::thread<…> thread.hpp:269
    0x422de0 boost::thread_group::create_thread<…> thread_group.hpp:79
    0x408893 main main.cpp:92
Thread T3 (tid=7381, running) created by main thread at:
    0x7f0da80d5466 pthread_create
    0x44e0ca boost::thread::start_thread_noexcept
    0x429715 boost::thread::thread<…> thread.hpp:269
    0x422de0 boost::thread_group::create_thread<…> thread_group.hpp:79
    0x408893 main main.cpp:92
Data race (pid=7375)
reactive_socket_service_base.ipp

CodePudding user response:

I couldn't reproduce the problem. I have reviewed the code.

  • There's some dead code in your question (f).
  • You're not using recent interfaces, consider using io_context instead of the deprecated io_service interface.
  • You can be much more safe/succinct by constructing your IO objects directly on the strand executor, rather than on the general service. This means that all completions will happen on the strand by default.
  • The 500 ms wait "for all threads to be created" does nothing, because join_all on a thread_group already blocks until the service exits (runs out of work).
  • Consider using asio::thread_pool instead of manually io_context thread_group. It comes down to the same but with more correctness (see e.g. Should the exception thrown by boost::asio::io_service::run() be caught?)
  • you never initially set the time expiration before your first async_wait. You also don't reset it when chaining a new async_wait.
  • You are doing a hard socket.close(). I'd suggest doing a cancel() instead and leaving the close to the destructor. This prevents a category of bugs surrounding reused socket fds (which aren't even detectable as data races, because they are application level races).
  • I'm not too sure in the first place why aborted timers restart in the first place. Shouldn't you just wait until a new socket operation is attempted that required a timeout?
  • It seems pretty counter-productive to wait 100ms before canceling the timer. That just makes it so the socket might well be closed even if connection did complete in timely fashion? [Perhaps this is just because the sample code is removed so much other code.]

With that said, here's a simplified listing. I did not see data race in your code. I will look at the reports when you add them.

For now, hopefully this review might help you:

#include <boost/asio.hpp>
#include <iostream>
#include <thread>
#include <list>

using namespace std::chrono_literals;
using boost::asio::ip::tcp;
using boost::system::error_code;
using Timer = boost::asio::steady_timer;
using std::this_thread::sleep_for;

struct D {
    template <typename Executor>
    explicit D(Executor ex) : socket_(ex)
                            , timeout_(ex)
                            , resolver_(ex)
    {
        // Post here is actually redundant because during construction, nothing
        // else will have a reference to our IO objects, but for
        // consistency/documentation value:
        post(socket_.get_executor(), [this] { do_response(); });

        // All other methods will be on the strand by definition, because
        // they're all invoked from completion handlers from our IO objects
    }

  private:
    void do_response()
    {
        resolver_.async_resolve(
            "tracker.dler.com", "6969",
            [this](error_code ec, tcp::resolver::iterator endpoints) {
                if (!ec) {
                    do_connect(std::move(endpoints));
                } else {
                    socket_.close();
                    std::cerr << "ec == true" << std::endl;
                }
            });
    }

    void do_connect(tcp::resolver::iterator endpoint)
    {
        timeout_.expires_from_now(1s);
        timeout_.async_wait([this](error_code ec) { if (!ec) { deadline(); } });
        async_connect( //
            socket_, std::move(endpoint),
            [this](error_code ec, const tcp::resolver::iterator&) {
                std::cout << "connect: " << ec.message() << " (" << socket_.remote_endpoint() << ")" << std::endl;
                timeout_.cancel();
                if (!ec) {
                    sleep_for(100ms);
                }
            });
    }
    
    void deadline()
    {
        if (timeout_.expires_at() <= std::chrono::steady_clock::now()) {
            std::cerr << "TIMEOUT" << std::endl;
            socket_.cancel();
        }
    }

    tcp::socket   socket_;
    Timer         timeout_;
    tcp::resolver resolver_;
};

int main()
{
    boost::asio::thread_pool io(3);

    std::list<D> ds;
    for (int i = 0; i < 15;   i) {
        ds.emplace_back(make_strand(io/*.get_executor()*/));
    }

    io.join();
}
  • Related