More timing functions

This commit is contained in:
David 2021-08-28 01:04:31 +02:00
commit 321c677da2
12 changed files with 184 additions and 129 deletions

View file

@ -1,9 +1,9 @@
INCLUDE=./include
LIBS=-pthread -lm
FLAGS=-Og -g -Wall -Wextra -Wpedantic
FLAGS=-Ofast -march=native -g -Wall -Wextra -Wpedantic
raytracer: camera.hpp color.hpp hittable.hpp hittable_list.hpp main.cpp material.hpp ray.hpp rtweekend.hpp sphere.hpp vec3.hpp $(INCLUDE)/Remotery.c $(INCLUDE)/Remotery.h
@g++ $(FLAGS) -I$(INCLUDE) $(LIBS) main.cpp -o raytracer
g++ $(FLAGS) -I$(INCLUDE) $(LIBS) main.cpp -o raytracer
image: raytracer
@./raytracer > image.ppm

View file

@ -10,21 +10,21 @@ struct camera {
vec3 horizontal;
vec3 vertical;
vec3 u,v,w;
double lens_radius;
float lens_radius;
/* Constructors */
camera(point3 lookfrom,
point3 lookat,
vec3 vup,
double vfov,
double aspect_ratio,
double aperture,
double focus_dist)
float vfov,
float aspect_ratio,
float aperture,
float focus_dist)
{
double theta = degrees_to_radians(vfov);
double h = tan(theta/2);
double viewport_height = 2.0 * h;
double viewport_width = aspect_ratio * viewport_height;
float theta = degrees_to_radians(vfov);
float h = tan(theta/2);
float viewport_height = 2.0 * h;
float viewport_width = aspect_ratio * viewport_height;
w = normalize(lookfrom - lookat);
u = normalize(cross(vup,w));
@ -40,9 +40,8 @@ struct camera {
/* Methods */
ray get_ray(double s, double t) const
ray get_ray(float s, float t) const
{
rmt_ScopedCPUSample(GetRay, RMTSF_Aggregate);
vec3 rd = lens_radius * random_in_unit_disk();
vec3 offset = u * rd.x + v * rd.y;

View file

@ -9,12 +9,12 @@
/* Writes color components as a space-delimited string of numbers in the range [0,255] */
void write_color(FILE *fp, color c, uint32_t samples_per_pixel)
{
double scale = 1.0 / samples_per_pixel;
float scale = 1.0 / samples_per_pixel;
// Divide the color by the number of samples
double r = sqrt(c.x * scale);
double g = sqrt(c.y * scale);
double b = sqrt(c.z * scale);
float r = sqrt(c.x * scale);
float g = sqrt(c.y * scale);
float b = sqrt(c.z * scale);
/* Write output */
fprintf(fp,

View file

@ -6,7 +6,7 @@
/* Virtual class that represents objects who could collide against a ray */
struct hittable {
virtual bool hit(const ray& r, double t_min, double t_max, hit_record& rec) const = 0;
virtual bool hit(const ray& r, float t_min, float t_max, hit_record& rec) const = 0;
};
#endif

View file

@ -2,6 +2,7 @@
#define HITTABLE_LIST_H
#include "hittable.hpp"
#include "sphere.hpp"
#include <memory>
#include <vector>
@ -9,30 +10,34 @@
using std::shared_ptr;
using std::make_shared;
struct hittable_list : hittable {
template <typename T = sphere>
struct hittable_list {
/* Attributes */
std::vector<shared_ptr<hittable>> objects;
std::vector<T> objects;
/* Constructors */
hittable_list () {}
hittable_list(shared_ptr<hittable> h) { add(h); }
hittable_list(T object) { add(object); }
/* Methods */
void clear() { objects.clear(); }
void add (shared_ptr<hittable> h) { objects.push_back(h); }
void add (T h) { objects.push_back(h); }
virtual bool hit(const ray& r, double t_min, double t_max, hit_record& rec) const override;
bool hit(const ray& r, float t_min, float t_max, hit_record& rec);
};
bool hittable_list::hit(const ray& r, double t_min, double t_max, hit_record& rec) const
template <typename T>
bool hittable_list<T>::hit(const ray& r, float t_min, float t_max, hit_record& rec)
{
rmt_ScopedCPUSample(HittableList_Hit, RMTSF_Aggregate);
hit_record temp_rec;
bool hit_anything = false;
double closest_so_far = t_max;
float closest_so_far = t_max;
for (const auto& object : objects)
for (uint32_t i = 0; i < objects.size(); ++i)
{
T *object = &objects[i];
if (object->hit(r, t_min, closest_so_far, temp_rec))
{
hit_anything = true;

View file

@ -3,6 +3,9 @@
#include <stdlib.h>
// Disable profiling
#define RMT_ENABLED 1
// Lib includes
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-parameter"
@ -18,23 +21,23 @@
#include "sphere.hpp"
#include "camera.hpp"
color ray_color(const ray& r, const hittable& world, int32_t depth);
double hit_sphere(const point3& center, double radius, const ray& r);
hittable_list random_scene();
float hit_sphere(const point3& center, float radius, const ray& r);
void print_timers();
hittable_list<sphere> random_scene();
hittable_list random_scene() {
hittable_list world;
hittable_list<sphere> random_scene() {
hittable_list<sphere> world;
auto ground_material = make_shared<lambertian>(color(0.5, 0.5, 0.5));
world.add(make_shared<sphere>(point3(0,-1000,0), 1000, ground_material));
world.add(sphere(point3(0,-1000,0), 1000, ground_material));
for (int32_t a = -11; a < 11; a++)
{
for (int32_t b = -11; b < 11; b++)
{
double choose_mat = random_double();
point3 center(a + 0.9*random_double(), 0.2, b + 0.9*random_double());
float choose_mat = random_float();
point3 center(a + 0.9*random_float(), 0.2, b + 0.9*random_float());
if ((center - point3(4, 0.2, 0)).length() > 0.9)
{
@ -44,39 +47,40 @@ hittable_list random_scene() {
// diffuse
color albedo = color::random() * color::random();
sphere_material = make_shared<lambertian>(albedo);
world.add(make_shared<sphere>(center, 0.2, sphere_material));
world.add(sphere(center, 0.2, sphere_material));
}
else if (choose_mat < 0.95)
{
// metal
color albedo = color::random(0.5, 1);
double fuzz = random_double(0, 0.5);
float fuzz = random_float(0, 0.5);
sphere_material = make_shared<metal>(albedo, fuzz);
world.add(make_shared<sphere>(center, 0.2, sphere_material));
world.add(sphere(center, 0.2, sphere_material));
}
else
{
// glass
sphere_material = make_shared<dielectric>(1.5);
world.add(make_shared<sphere>(center, 0.2, sphere_material));
world.add(sphere(center, 0.2, sphere_material));
}
}
}
}
auto material1 = make_shared<dielectric>(1.5);
world.add(make_shared<sphere>(point3(0, 1, 0), 1.0, material1));
world.add(sphere(point3(0, 1, 0), 1.0, material1));
auto material2 = make_shared<lambertian>(color(0.4, 0.2, 0.1));
world.add(make_shared<sphere>(point3(-4, 1, 0), 1.0, material2));
world.add(sphere(point3(-4, 1, 0), 1.0, material2));
auto material3 = make_shared<metal>(color(0.7, 0.6, 0.5), 0.0);
world.add(make_shared<sphere>(point3(4, 1, 0), 1.0, material3));
world.add(sphere(point3(4, 1, 0), 1.0, material3));
return world;
}
color ray_color(const ray& r, const hittable& world, int32_t depth)
template<typename T>
color ray_color(const ray& r, hittable_list<T>& world, int32_t depth)
{
rmt_ScopedCPUSample(Scatter, RMTSF_Aggregate | RMTSF_Recursive);
if (depth <= 0)
@ -102,17 +106,17 @@ color ray_color(const ray& r, const hittable& world, int32_t depth)
}
}
vec3 unit_direction = normalize(r.direction);
double t = 0.5 * (unit_direction.y + 1.0);
float t = 0.5 * (unit_direction.y + 1.0);
return (1-t) * color(1,1,1) + t*color(0.5,0.7,1.0);
}
double hit_sphere(const point3& center, double radius, const ray& r)
float hit_sphere(const point3& center, float radius, const ray& r)
{
vec3 oc = r.origin - center;
double a = r.direction.length_squared();
double half_b = dot(oc, r.direction);
double c = oc.length_squared() - radius*radius;
double discriminant = half_b*half_b - a*c;
float a = r.direction.length_squared();
float half_b = dot(oc, r.direction);
float c = oc.length_squared() - radius*radius;
float discriminant = half_b*half_b - a*c;
if (discriminant < 0)
return -1;
@ -130,7 +134,7 @@ int32_t main()
// Image
const double aspect_ratio = 3.0 / 2.0;
const float aspect_ratio = 3.0 / 2.0;
const int32_t image_width = 1200;
const int32_t image_height = (int32_t) (image_width / aspect_ratio);
int32_t samples_per_pixel = 500;
@ -141,17 +145,15 @@ int32_t main()
samples_per_pixel = strtol(getenv("SPP"), NULL, 10);
}
// World
hittable_list world = random_scene();
hittable_list<sphere> world = random_scene();
// Camera
point3 lookfrom(13,2,3);
point3 lookat(0,0,0);
vec3 vup(0,1,0);
double dist_to_focus = 10.0;
double aperture = 0.1;
float dist_to_focus = 10.0;
float aperture = 0.1;
camera cam(lookfrom, lookat, vup, 20, aspect_ratio, aperture, dist_to_focus);
@ -163,6 +165,7 @@ int32_t main()
{
rmt_ScopedCPUSample(OuterLoop, RMTSF_Aggregate);
fprintf(stderr, "\rScanlines remaining: %d ", j);
print_timers();
fflush(stderr);
for (int32_t i = 0; i < image_width; ++i)
{
@ -171,8 +174,8 @@ int32_t main()
for (int32_t s = 0; s < samples_per_pixel; ++s)
{
double u = ((i + random_double()) / (image_width-1));
double v = ((j + random_double()) / (image_height-1));
float u = ((i + random_float()) / (image_width-1));
float v = ((j + random_float()) / (image_height-1));
ray r = cam.get_ray(u,v);
pixel_color += ray_color(r, world, max_depth);
}
@ -184,3 +187,27 @@ int32_t main()
fprintf(stderr, "\nDone\n");
rmt_DestroyGlobalInstance(rmt);
}
debug_record debug_record_array[__COUNTER__];
void print_timers()
{
for (uint32_t i = 0;
i < sizeof(debug_record_array) / sizeof(debug_record_array[0]);
++i)
{
debug_record *record = &debug_record_array[i];
fprintf(stderr,
"%d: %s:%s:%d; "
"Cycles = %ld; "
"Hit count %ld; "
"Cycles/hit %f; "
"Time %f",
i, record->filename, record->function_name, record->line_number,
record->cycles,
record->hit_count,
(double)record->cycles / record->hit_count,
(double)record->cycles / CLOCKS_PER_SEC);
}
}

View file

@ -37,9 +37,9 @@ struct lambertian : material {
struct metal : material {
/* Attributes */
color albedo;
double fuzz;
float fuzz;
// Constructor
metal(const color& c, double f)
metal(const color& c, float f)
{
albedo = c;
fuzz = f;
@ -57,17 +57,17 @@ struct metal : material {
struct dielectric : material
{
/* Attributes */
double ri; // refraction index
float ri; // refraction index
// Constructor
dielectric(double refraction_index) { ri = refraction_index; }
dielectric(float refraction_index) { ri = refraction_index; }
/* Methods */
// Schlick's approximation of reflectance
static double reflectance(double cosine, double ref_idx)
static float reflectance(float cosine, float ref_idx)
{
double r0 = (1-ref_idx) / (1+ref_idx);
float r0 = (1-ref_idx) / (1+ref_idx);
r0 = r0*r0;
return r0 + (1-r0)*pow((1 - cosine), 5);
}
@ -77,17 +77,17 @@ struct dielectric : material
virtual bool scatter(const ray& r_in, const hit_record& rec, color& attenuation, ray& scattered) const override
{
attenuation = color(1,1,1);
double refraction_ratio = rec.front_face ? (1.0/ri) : ri;
float refraction_ratio = rec.front_face ? (1.0/ri) : ri;
vec3 unit_direction = normalize(r_in.direction);
double cos_theta = fmin(dot(-unit_direction, rec.normal), 1);
double sin_theta = sqrt(1.0 - cos_theta*cos_theta);
float cos_theta = fmin(dot(-unit_direction, rec.normal), 1);
float sin_theta = sqrt(1.0 - cos_theta*cos_theta);
bool cannot_refract = refraction_ratio * sin_theta > 1.0;
vec3 direction;
if (cannot_refract || reflectance(cos_theta, refraction_ratio) > random_double())
if (cannot_refract || reflectance(cos_theta, refraction_ratio) > random_float())
direction = reflect(unit_direction, rec.normal);
else
direction = refract(unit_direction, rec.normal, refraction_ratio);

View file

@ -24,7 +24,7 @@ struct ray {
}
// Returns position after time t
point3 at(double t) const
point3 at(float t) const
{
return origin + t*direction;
}

View file

@ -3,36 +3,28 @@
#include <math.h>
#include <memory>
/* Utility macros */
#define TIMED_BLOCK_2(c, flags) rmt_ScopedCPUSample(Counter##c, flags)
#define TIMED_BLOCK_1(c, flags) TIMED_BLOCK_2(c, flags)
#define TIMED_BLOCK(flags) TIMED_BLOCK_1(__COUNTER__, flags)
// #define TIMED_BLOCK_(counter, flags) rmt_ScopedCPUSample(counter, flags)
// #define TIMED_BLOCK(flags) TIMED_BLOCK_(__COUNTER__, flags)
#include "timer.hpp"
/* Utility functions */
double degrees_to_radians(double d)
float degrees_to_radians(float d)
{
return d * M_PI / 180;
}
/* Returns a double in the range [0,1) */
inline double random_double()
/* Returns a float in the range [0,1) */
inline float random_float()
{
return rand() * (1.0 / RAND_MAX);
}
/* Returns a double in the range [min,max) */
inline double random_double(double min, double max)
/* Returns a float in the range [min,max) */
inline float random_float(float min, float max)
{
return min + (max-min) * random_double();
return min + (max-min) * random_float();
}
/* Clamps a value between [min,max] */
inline double clamp(double v, double min, double max)
inline float clamp(float v, float min, float max)
{
return v < min ? min : v > max ? max : v;
}
@ -50,7 +42,7 @@ struct hit_record {
point3 p;
vec3 normal;
std::shared_ptr<material> mat_ptr;
double t;
float t;
bool front_face;
inline void set_face_normal(const ray& r, const vec3& outward_normal)

View file

@ -7,11 +7,11 @@
struct sphere : hittable {
/* Attributes */
point3 center;
double radius;
float radius;
std::shared_ptr<material> mat_ptr;
/* Contructor */
sphere(point3 c, double r, std::shared_ptr<material> m)
sphere(point3 c, float r, std::shared_ptr<material> m)
{
center = c;
radius = r;
@ -19,34 +19,29 @@ struct sphere : hittable {
}
/* Virtual methods declaration */
virtual bool hit(const ray& r, double t_min, double t_max, hit_record& rec) const override;
bool hit(const ray& r, float t_min, float t_max, hit_record& rec) const;
};
/* Virtual method implementations */
bool sphere::hit(const ray& r, double t_min, double t_max, hit_record& rec) const
bool sphere::hit(const ray& r, float t_min, float t_max, hit_record& rec) const
{
rmt_ScopedCPUSample(Sphere_Hit, RMTSF_Aggregate);
/* NOTE: This function is called too many times (and too fast) for it to be
profiled in a usual way using Remotery. */
// Part 1
TIMED_BLOCK();
vec3 oc = r.origin - center;
double a = r.direction.length_squared();
double half_b = dot(oc, r.direction);
double c = oc.length_squared() - radius*radius;
float a = r.direction.length_squared();
float half_b = dot(oc, r.direction);
float c = oc.length_squared() - radius*radius;
// Part 2
double discriminant = half_b*half_b - a*c;
float discriminant = half_b*half_b - a*c;
if (discriminant < 0)
return false;
double sqrtd = sqrt(discriminant);
float sqrtd = sqrt(discriminant);
// Find the nearest root that lies in the acceptable range
// Part 3
double root = (-half_b - sqrtd) / a;
float root = (-half_b - sqrtd) / a;
if (root < t_min || t_max < root)
{
root = (-half_b + sqrtd) / a;
@ -54,8 +49,6 @@ bool sphere::hit(const ray& r, double t_min, double t_max, hit_record& rec) cons
return false;
}
// Part 4
rec.t = root;
rec.p = r.at(rec.t);
vec3 outward_normal = (rec.p - center) / radius;

39
timer.hpp Normal file
View file

@ -0,0 +1,39 @@
#include <x86intrin.h>
#include "rtweekend.hpp"
#define TIMED_BLOCK__(number, ...) timed_block timed_block_##Number(__COUNTER__, __FILE__, __LINE__, __FUNCTION__, ##__VA_ARGS__)
#define TIMED_BLOCK_(number, ...) TIMED_BLOCK__(number, ##__VA_ARGS__)
#define TIMED_BLOCK(...) TIMED_BLOCK_(__LINE__, ##__VA_ARGS__)
struct debug_record
{
uint64_t cycles;
uint64_t hit_count;
const char *filename;
const char *function_name;
uint32_t line_number;
};
extern debug_record debug_record_array[];
struct timed_block {
// TODO: Thread safety
debug_record *record;
timed_block(int counter, const char *filename, int line_number, const char *function_name, int hit_count = 1)
{
record = debug_record_array + counter;
record->filename = filename;
record->line_number = line_number;
record->function_name = function_name;
record->cycles -= __rdtsc();
record->hit_count += hit_count;
}
~timed_block()
{
record->cycles += __rdtsc();
}
};

View file

@ -5,12 +5,12 @@
struct vec3 {
/* Members */
double x;
double y;
double z;
float x;
float y;
float z;
// Constructor proper. Values default to 0
vec3(double x = 0, double y = 0, double z = 0)
vec3(float x = 0, float y = 0, float z = 0)
{
this->x = x;
this->y = y;
@ -35,7 +35,7 @@ struct vec3 {
}
// Scalar multiplication
vec3& operator*=(const double t)
vec3& operator*=(const float t)
{
x *= t;
y *= t;
@ -44,7 +44,7 @@ struct vec3 {
}
// Division by a scalar t
vec3& operator/=(const double t)
vec3& operator/=(const float t)
{
x /= t;
y /= t;
@ -54,13 +54,13 @@ struct vec3 {
/* Methods */
double length() const
float length() const
{
return sqrt(x*x + y*y + z*z);
}
// Length squared, useful for some calculations
double length_squared() const
float length_squared() const
{
return x*x + y*y + z*z;
}
@ -68,19 +68,19 @@ struct vec3 {
// Get a vec3 with random components in the range [0,1)
inline static vec3 random()
{
return vec3(random_double(), random_double(), random_double());
return vec3(random_float(), random_float(), random_float());
}
// Get a vec3 with random components in the range [min, max)
inline static vec3 random(double min, double max)
inline static vec3 random(float min, float max)
{
return vec3(random_double(min, max), random_double(min, max), random_double(min, max));
return vec3(random_float(min, max), random_float(min, max), random_float(min, max));
}
// Check if all vector components are near zero
bool near_zero() const
{
double s = 1e-8;
float s = 1e-8;
return (fabs(x) < s) && (fabs(y) < s) && (fabs(z) < s);
}
};
@ -117,24 +117,24 @@ inline vec3 operator*(const vec3 &u, const vec3 &v)
}
// Scalar product
inline vec3 operator*(double t,const vec3 &v)
inline vec3 operator*(float t,const vec3 &v)
{
return vec3(t*v.x, t*v.y, t*v.z);
}
inline vec3 operator*(const vec3 &v, double t)
inline vec3 operator*(const vec3 &v, float t)
{
return t * v;
}
// Vector division by scalar. Note that we redefine it as multiplying by 1/t to avoid division by 0
inline vec3 operator/(vec3 v, double t)
inline vec3 operator/(vec3 v, float t)
{
return 1/t * v;
}
// Straightforward dot product
inline double dot(const vec3 &u, const vec3 &v)
inline float dot(const vec3 &u, const vec3 &v)
{
return u.x*v.x + u.y*v.y + u.z*v.z;
@ -191,9 +191,9 @@ vec3 reflect(const vec3& v, const vec3 n)
return v - 2*dot(v,n)*n;
}
vec3 refract (const vec3& uv, const vec3& n, double etai_over_etat)
vec3 refract (const vec3& uv, const vec3& n, float etai_over_etat)
{
double cos_theta = fmin(dot(-uv, n), 1.0);
float cos_theta = fmin(dot(-uv, n), 1.0);
vec3 r_out_perp = etai_over_etat * (uv + cos_theta*n);
vec3 r_out_parallel = -sqrt(fabs(1.0 - r_out_perp.length_squared())) * n;
return r_out_perp + r_out_parallel;
@ -203,7 +203,7 @@ vec3 random_in_unit_disk()
{
while (true)
{
auto p = vec3(random_double(-1,1), random_double(-1,1), 0);
auto p = vec3(random_float(-1,1), random_float(-1,1), 0);
if (p.length_squared() >= 1) continue;
return p;
}