I'm trying to test the runtime of a code, however I'm always getting incorrect values, the first test will always be the one that has the worst time. And most of the time the second test is always 0.
#include <iostream>
#include <math.h>
#include <intrin.h>
#include <chrono>
using namespace std;
#define MAX_LOOP 100000
#define NUM 10000.f
auto sse_sqrt( float n )
{
__m128 reg = _mm_load_ss( &n );
return _mm_mul_ss( reg, _mm_rsqrt_ss( reg ) ).m128_f32[ 0 ];
}
auto stl_sqrt_timer()
{
auto start = std::chrono::high_resolution_clock::now();
for ( auto i = 0; i < MAX_LOOP; i++ )
{
auto v = std::sqrt( NUM );
}
auto end = std::chrono::high_resolution_clock::now();
return ( end - start ).count();
}
auto sse_sqrt_timer()
{
auto start = std::chrono::high_resolution_clock::now();
for ( auto i = 0; i < MAX_LOOP; i++ )
{
auto v = sse_sqrt( NUM );
}
auto end = std::chrono::high_resolution_clock::now();
return ( end - start ).count();
}
int main()
{
cout << "sse_sqrt: " << sse_sqrt_timer() << "\n";
cout << "stl_sqrt: " << stl_sqrt_timer() << "\n";
cin.ignore();
return 0;
}
First Run: sse_sqrt: 12461 stl_sqrt: 0
Second run: sse_sqrt: 2643 stl_sqrt: 378
Reversing the order of tests:
stl_sqrt: 23032 sse_sqrt: 378
stl_sqrt: 2265 sse_sqrt: 0
I'm compiling in Release x86, with optimization / Ox