A performance bottleneck of my program is frequent calls to functions like following update
functions.
Given that flag
parameter is always a bool literal, I want to "unroll" the update
function to two versions, one with flag=true
and one with flag=false
, to avoid branch prediction failure.
for normal functions, a bool template parameter can solve this problem easily. However, template cannot be applied to virtual functions.
I can create two virtual functions manually, but then I have to copy the long code
part. It makes futher development harder.
Is there anyway allowing me to write two versions in one function, controlled by a compiling-time constant flag
?
#include <iostream>
#include <random>
using std::cout;
using std::endl;
struct Base
{
virtual void update(bool flag) = 0;
};
struct Derived1 : public Base
{
void update(bool flag)
{
if (flag)
{
// some computations
cout << "Derived1 flag=true" << endl;
}
else
{
// some computations
cout << "Derived1 flag=false" << endl;
}
// long code containing several flag-conditioned blocks like the block above
cout << "Derived1" << endl;
}
};
struct Derived2 : public Base
{
void update(bool flag)
{
if (flag)
{
// some computations
cout << "Derived2 flag=true" << endl;
}
else
{
// some computations
cout << "Derived2 flag=false" << endl;
}
// long code containing several flag-conditioned blocks like the block above
cout << "Derived2" << endl;
}
};
int main()
{
Base *p;
srand(time(nullptr));
if (rand() % 2 == 1)
{
p = new Derived1();
}
else
{
p = new Derived2();
}
p->update(false);
p->update(true);
}
CodePudding user response:
Unfortunately, there is no such thing as virtual templates. What can be done however is to create several virtual functions taking an integral (boolean in this particular case) constant, if the flag is really a compile time literal:
#include <iostream>
#include <random>
#include <type_traits>
#include <memory>
using std::cout;
struct Base
{
virtual void updateSeparate(std::true_type) = 0;
virtual void updateSeparate(std::false_type) = 0;
};
struct Derived1 : public Base
{
void updateSeparate(std::true_type)
{
cout << "Derived1 flag=true\n";
updateCommonImpl();
}
void updateSeparate(std::false_type)
{
cout << "Derived1 flag=false\n";
updateCommonImpl();
}
private:
void updateCommonImpl() //or just a static function inside implementation file if members are not used
{
cout << "Derived1\n";
}
};
struct Derived2 : public Base
{
void updateSeparate(std::true_type)
{
cout << "Derived2 flag=true\n";
updateCommonImpl();
}
void updateSeparate(std::false_type)
{
cout << "Derived2 flag=false\n";
updateCommonImpl();
}
private:
void updateCommonImpl() //or just a static function inside implementation file if members are not used
{
cout << "Derived2\n";
}
};
int main()
{
std::unique_ptr<Base> p;
srand(time(nullptr));
if (rand() % 2 == 1)
{
p = std::make_unique<Derived1>();
}
else
{
p = std::make_unique<Derived2>();
}
p->updateSeparate(std::bool_constant<false>{});
p->updateSeparate(std::bool_constant<true>{});
}
DEMO However, I cannot tell if that will help or maybe hinder the performance even more by making the vtable lookup time even longer, you have to experiment with that by yourself I'm afraid.
CodePudding user response:
I tried to implement a CRTP pattern with constexpr
template parameter, please take a look
template<typename T>
struct Base {
template<bool flag>
int update() {
return static_cast<T*>(this)->template updateImpl<flag>();
}
};
struct Derived1 : public Base<Derived1> {
template<bool flag>
constexpr int updateImpl() {
if constexpr (flag) {
return 1;
} else {
return 2;
}
}
};
struct Derived2 : public Base<Derived2> {
template<bool flag>
constexpr int updateImpl() {
return 3;
}
};
int main() {
auto obj1 = new Derived1();
std::cout << obj1->update<true>(); // 1
std::cout << obj1->update<false>(); // 2
auto obj2 = new Derived2();
std::cout << obj2->update<true>(); // 3
std::cout << obj2->update<false>(); // 3
}