From 321c677da24334f7db24a33b5f31676413c3ee4b Mon Sep 17 00:00:00 2001 From: David Date: Sat, 28 Aug 2021 01:04:31 +0200 Subject: [PATCH] More timing functions --- Makefile | 4 +-- camera.hpp | 23 ++++++------- color.hpp | 8 ++--- hittable.hpp | 2 +- hittable_list.hpp | 21 +++++++----- main.cpp | 87 +++++++++++++++++++++++++++++++---------------- material.hpp | 20 +++++------ ray.hpp | 2 +- rtweekend.hpp | 26 +++++--------- sphere.hpp | 43 ++++++++++------------- timer.hpp | 39 +++++++++++++++++++++ vec3.hpp | 38 ++++++++++----------- 12 files changed, 184 insertions(+), 129 deletions(-) create mode 100644 timer.hpp diff --git a/Makefile b/Makefile index 22930b9..5f5fdf7 100644 --- a/Makefile +++ b/Makefile @@ -1,9 +1,9 @@ INCLUDE=./include LIBS=-pthread -lm -FLAGS=-Og -g -Wall -Wextra -Wpedantic +FLAGS=-Ofast -march=native -g -Wall -Wextra -Wpedantic raytracer: camera.hpp color.hpp hittable.hpp hittable_list.hpp main.cpp material.hpp ray.hpp rtweekend.hpp sphere.hpp vec3.hpp $(INCLUDE)/Remotery.c $(INCLUDE)/Remotery.h - @g++ $(FLAGS) -I$(INCLUDE) $(LIBS) main.cpp -o raytracer + g++ $(FLAGS) -I$(INCLUDE) $(LIBS) main.cpp -o raytracer image: raytracer @./raytracer > image.ppm diff --git a/camera.hpp b/camera.hpp index 970ffad..cd28486 100644 --- a/camera.hpp +++ b/camera.hpp @@ -10,21 +10,21 @@ struct camera { vec3 horizontal; vec3 vertical; vec3 u,v,w; - double lens_radius; + float lens_radius; /* Constructors */ camera(point3 lookfrom, point3 lookat, vec3 vup, - double vfov, - double aspect_ratio, - double aperture, - double focus_dist) + float vfov, + float aspect_ratio, + float aperture, + float focus_dist) { - double theta = degrees_to_radians(vfov); - double h = tan(theta/2); - double viewport_height = 2.0 * h; - double viewport_width = aspect_ratio * viewport_height; + float theta = degrees_to_radians(vfov); + float h = tan(theta/2); + float viewport_height = 2.0 * h; + float viewport_width = aspect_ratio * viewport_height; w = normalize(lookfrom - lookat); u = normalize(cross(vup,w)); @@ -40,9 +40,8 @@ struct camera { /* Methods */ - ray get_ray(double s, double t) const - { - rmt_ScopedCPUSample(GetRay, RMTSF_Aggregate); + ray get_ray(float s, float t) const + { vec3 rd = lens_radius * random_in_unit_disk(); vec3 offset = u * rd.x + v * rd.y; diff --git a/color.hpp b/color.hpp index ca38d1f..0b667bb 100644 --- a/color.hpp +++ b/color.hpp @@ -9,12 +9,12 @@ /* Writes color components as a space-delimited string of numbers in the range [0,255] */ void write_color(FILE *fp, color c, uint32_t samples_per_pixel) { - double scale = 1.0 / samples_per_pixel; + float scale = 1.0 / samples_per_pixel; // Divide the color by the number of samples - double r = sqrt(c.x * scale); - double g = sqrt(c.y * scale); - double b = sqrt(c.z * scale); + float r = sqrt(c.x * scale); + float g = sqrt(c.y * scale); + float b = sqrt(c.z * scale); /* Write output */ fprintf(fp, diff --git a/hittable.hpp b/hittable.hpp index 6baef66..7edb7c9 100644 --- a/hittable.hpp +++ b/hittable.hpp @@ -6,7 +6,7 @@ /* Virtual class that represents objects who could collide against a ray */ struct hittable { - virtual bool hit(const ray& r, double t_min, double t_max, hit_record& rec) const = 0; + virtual bool hit(const ray& r, float t_min, float t_max, hit_record& rec) const = 0; }; #endif diff --git a/hittable_list.hpp b/hittable_list.hpp index 3a1416b..a9c2dcd 100644 --- a/hittable_list.hpp +++ b/hittable_list.hpp @@ -2,6 +2,7 @@ #define HITTABLE_LIST_H #include "hittable.hpp" +#include "sphere.hpp" #include #include @@ -9,30 +10,34 @@ using std::shared_ptr; using std::make_shared; -struct hittable_list : hittable { +template +struct hittable_list { /* Attributes */ - std::vector> objects; + std::vector objects; /* Constructors */ hittable_list () {} - hittable_list(shared_ptr h) { add(h); } + hittable_list(T object) { add(object); } /* Methods */ void clear() { objects.clear(); } - void add (shared_ptr h) { objects.push_back(h); } + void add (T h) { objects.push_back(h); } - virtual bool hit(const ray& r, double t_min, double t_max, hit_record& rec) const override; + bool hit(const ray& r, float t_min, float t_max, hit_record& rec); }; -bool hittable_list::hit(const ray& r, double t_min, double t_max, hit_record& rec) const +template +bool hittable_list::hit(const ray& r, float t_min, float t_max, hit_record& rec) { rmt_ScopedCPUSample(HittableList_Hit, RMTSF_Aggregate); hit_record temp_rec; bool hit_anything = false; - double closest_so_far = t_max; + float closest_so_far = t_max; - for (const auto& object : objects) + + for (uint32_t i = 0; i < objects.size(); ++i) { + T *object = &objects[i]; if (object->hit(r, t_min, closest_so_far, temp_rec)) { hit_anything = true; diff --git a/main.cpp b/main.cpp index 3eae6ce..8d5c035 100644 --- a/main.cpp +++ b/main.cpp @@ -3,6 +3,9 @@ #include +// Disable profiling +#define RMT_ENABLED 1 + // Lib includes #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" @@ -18,23 +21,23 @@ #include "sphere.hpp" #include "camera.hpp" - color ray_color(const ray& r, const hittable& world, int32_t depth); -double hit_sphere(const point3& center, double radius, const ray& r); -hittable_list random_scene(); +float hit_sphere(const point3& center, float radius, const ray& r); +void print_timers(); +hittable_list random_scene(); -hittable_list random_scene() { - hittable_list world; +hittable_list random_scene() { + hittable_list world; auto ground_material = make_shared(color(0.5, 0.5, 0.5)); - world.add(make_shared(point3(0,-1000,0), 1000, ground_material)); + world.add(sphere(point3(0,-1000,0), 1000, ground_material)); for (int32_t a = -11; a < 11; a++) { for (int32_t b = -11; b < 11; b++) { - double choose_mat = random_double(); - point3 center(a + 0.9*random_double(), 0.2, b + 0.9*random_double()); + float choose_mat = random_float(); + point3 center(a + 0.9*random_float(), 0.2, b + 0.9*random_float()); if ((center - point3(4, 0.2, 0)).length() > 0.9) { @@ -44,39 +47,40 @@ hittable_list random_scene() { // diffuse color albedo = color::random() * color::random(); sphere_material = make_shared(albedo); - world.add(make_shared(center, 0.2, sphere_material)); + world.add(sphere(center, 0.2, sphere_material)); } else if (choose_mat < 0.95) { // metal color albedo = color::random(0.5, 1); - double fuzz = random_double(0, 0.5); + float fuzz = random_float(0, 0.5); sphere_material = make_shared(albedo, fuzz); - world.add(make_shared(center, 0.2, sphere_material)); + world.add(sphere(center, 0.2, sphere_material)); } else { // glass sphere_material = make_shared(1.5); - world.add(make_shared(center, 0.2, sphere_material)); + world.add(sphere(center, 0.2, sphere_material)); } } } } auto material1 = make_shared(1.5); - world.add(make_shared(point3(0, 1, 0), 1.0, material1)); + world.add(sphere(point3(0, 1, 0), 1.0, material1)); auto material2 = make_shared(color(0.4, 0.2, 0.1)); - world.add(make_shared(point3(-4, 1, 0), 1.0, material2)); + world.add(sphere(point3(-4, 1, 0), 1.0, material2)); auto material3 = make_shared(color(0.7, 0.6, 0.5), 0.0); - world.add(make_shared(point3(4, 1, 0), 1.0, material3)); + world.add(sphere(point3(4, 1, 0), 1.0, material3)); return world; } -color ray_color(const ray& r, const hittable& world, int32_t depth) +template +color ray_color(const ray& r, hittable_list& world, int32_t depth) { rmt_ScopedCPUSample(Scatter, RMTSF_Aggregate | RMTSF_Recursive); if (depth <= 0) @@ -102,17 +106,17 @@ color ray_color(const ray& r, const hittable& world, int32_t depth) } } vec3 unit_direction = normalize(r.direction); - double t = 0.5 * (unit_direction.y + 1.0); + float t = 0.5 * (unit_direction.y + 1.0); return (1-t) * color(1,1,1) + t*color(0.5,0.7,1.0); } -double hit_sphere(const point3& center, double radius, const ray& r) +float hit_sphere(const point3& center, float radius, const ray& r) { vec3 oc = r.origin - center; - double a = r.direction.length_squared(); - double half_b = dot(oc, r.direction); - double c = oc.length_squared() - radius*radius; - double discriminant = half_b*half_b - a*c; + float a = r.direction.length_squared(); + float half_b = dot(oc, r.direction); + float c = oc.length_squared() - radius*radius; + float discriminant = half_b*half_b - a*c; if (discriminant < 0) return -1; @@ -130,7 +134,7 @@ int32_t main() // Image - const double aspect_ratio = 3.0 / 2.0; + const float aspect_ratio = 3.0 / 2.0; const int32_t image_width = 1200; const int32_t image_height = (int32_t) (image_width / aspect_ratio); int32_t samples_per_pixel = 500; @@ -141,17 +145,15 @@ int32_t main() samples_per_pixel = strtol(getenv("SPP"), NULL, 10); } - - // World - hittable_list world = random_scene(); + hittable_list world = random_scene(); // Camera point3 lookfrom(13,2,3); point3 lookat(0,0,0); vec3 vup(0,1,0); - double dist_to_focus = 10.0; - double aperture = 0.1; + float dist_to_focus = 10.0; + float aperture = 0.1; camera cam(lookfrom, lookat, vup, 20, aspect_ratio, aperture, dist_to_focus); @@ -163,6 +165,7 @@ int32_t main() { rmt_ScopedCPUSample(OuterLoop, RMTSF_Aggregate); fprintf(stderr, "\rScanlines remaining: %d ", j); + print_timers(); fflush(stderr); for (int32_t i = 0; i < image_width; ++i) { @@ -171,8 +174,8 @@ int32_t main() for (int32_t s = 0; s < samples_per_pixel; ++s) { - double u = ((i + random_double()) / (image_width-1)); - double v = ((j + random_double()) / (image_height-1)); + float u = ((i + random_float()) / (image_width-1)); + float v = ((j + random_float()) / (image_height-1)); ray r = cam.get_ray(u,v); pixel_color += ray_color(r, world, max_depth); } @@ -184,3 +187,27 @@ int32_t main() fprintf(stderr, "\nDone\n"); rmt_DestroyGlobalInstance(rmt); } + +debug_record debug_record_array[__COUNTER__]; + + +void print_timers() +{ + for (uint32_t i = 0; + i < sizeof(debug_record_array) / sizeof(debug_record_array[0]); + ++i) + { + debug_record *record = &debug_record_array[i]; + fprintf(stderr, + "%d: %s:%s:%d; " + "Cycles = %ld; " + "Hit count %ld; " + "Cycles/hit %f; " + "Time %f", + i, record->filename, record->function_name, record->line_number, + record->cycles, + record->hit_count, + (double)record->cycles / record->hit_count, + (double)record->cycles / CLOCKS_PER_SEC); + } +} diff --git a/material.hpp b/material.hpp index 5792e38..0e2e5fd 100644 --- a/material.hpp +++ b/material.hpp @@ -37,9 +37,9 @@ struct lambertian : material { struct metal : material { /* Attributes */ color albedo; - double fuzz; + float fuzz; // Constructor - metal(const color& c, double f) + metal(const color& c, float f) { albedo = c; fuzz = f; @@ -57,17 +57,17 @@ struct metal : material { struct dielectric : material { /* Attributes */ - double ri; // refraction index + float ri; // refraction index // Constructor - dielectric(double refraction_index) { ri = refraction_index; } + dielectric(float refraction_index) { ri = refraction_index; } /* Methods */ // Schlick's approximation of reflectance - static double reflectance(double cosine, double ref_idx) + static float reflectance(float cosine, float ref_idx) { - double r0 = (1-ref_idx) / (1+ref_idx); + float r0 = (1-ref_idx) / (1+ref_idx); r0 = r0*r0; return r0 + (1-r0)*pow((1 - cosine), 5); } @@ -77,17 +77,17 @@ struct dielectric : material virtual bool scatter(const ray& r_in, const hit_record& rec, color& attenuation, ray& scattered) const override { attenuation = color(1,1,1); - double refraction_ratio = rec.front_face ? (1.0/ri) : ri; + float refraction_ratio = rec.front_face ? (1.0/ri) : ri; vec3 unit_direction = normalize(r_in.direction); - double cos_theta = fmin(dot(-unit_direction, rec.normal), 1); - double sin_theta = sqrt(1.0 - cos_theta*cos_theta); + float cos_theta = fmin(dot(-unit_direction, rec.normal), 1); + float sin_theta = sqrt(1.0 - cos_theta*cos_theta); bool cannot_refract = refraction_ratio * sin_theta > 1.0; vec3 direction; - if (cannot_refract || reflectance(cos_theta, refraction_ratio) > random_double()) + if (cannot_refract || reflectance(cos_theta, refraction_ratio) > random_float()) direction = reflect(unit_direction, rec.normal); else direction = refract(unit_direction, rec.normal, refraction_ratio); diff --git a/ray.hpp b/ray.hpp index ac4231e..8ad69eb 100644 --- a/ray.hpp +++ b/ray.hpp @@ -24,7 +24,7 @@ struct ray { } // Returns position after time t - point3 at(double t) const + point3 at(float t) const { return origin + t*direction; } diff --git a/rtweekend.hpp b/rtweekend.hpp index 8468343..410329b 100644 --- a/rtweekend.hpp +++ b/rtweekend.hpp @@ -3,36 +3,28 @@ #include #include - -/* Utility macros */ - -#define TIMED_BLOCK_2(c, flags) rmt_ScopedCPUSample(Counter##c, flags) -#define TIMED_BLOCK_1(c, flags) TIMED_BLOCK_2(c, flags) -#define TIMED_BLOCK(flags) TIMED_BLOCK_1(__COUNTER__, flags) - -// #define TIMED_BLOCK_(counter, flags) rmt_ScopedCPUSample(counter, flags) -// #define TIMED_BLOCK(flags) TIMED_BLOCK_(__COUNTER__, flags) +#include "timer.hpp" /* Utility functions */ -double degrees_to_radians(double d) +float degrees_to_radians(float d) { return d * M_PI / 180; } -/* Returns a double in the range [0,1) */ -inline double random_double() +/* Returns a float in the range [0,1) */ +inline float random_float() { return rand() * (1.0 / RAND_MAX); } -/* Returns a double in the range [min,max) */ -inline double random_double(double min, double max) +/* Returns a float in the range [min,max) */ +inline float random_float(float min, float max) { - return min + (max-min) * random_double(); + return min + (max-min) * random_float(); } /* Clamps a value between [min,max] */ -inline double clamp(double v, double min, double max) +inline float clamp(float v, float min, float max) { return v < min ? min : v > max ? max : v; } @@ -50,7 +42,7 @@ struct hit_record { point3 p; vec3 normal; std::shared_ptr mat_ptr; - double t; + float t; bool front_face; inline void set_face_normal(const ray& r, const vec3& outward_normal) diff --git a/sphere.hpp b/sphere.hpp index c54965a..a913a31 100644 --- a/sphere.hpp +++ b/sphere.hpp @@ -7,11 +7,11 @@ struct sphere : hittable { /* Attributes */ point3 center; - double radius; + float radius; std::shared_ptr mat_ptr; /* Contructor */ - sphere(point3 c, double r, std::shared_ptr m) + sphere(point3 c, float r, std::shared_ptr m) { center = c; radius = r; @@ -19,49 +19,42 @@ struct sphere : hittable { } /* Virtual methods declaration */ - virtual bool hit(const ray& r, double t_min, double t_max, hit_record& rec) const override; + bool hit(const ray& r, float t_min, float t_max, hit_record& rec) const; }; /* Virtual method implementations */ -bool sphere::hit(const ray& r, double t_min, double t_max, hit_record& rec) const +bool sphere::hit(const ray& r, float t_min, float t_max, hit_record& rec) const { - rmt_ScopedCPUSample(Sphere_Hit, RMTSF_Aggregate); - - // Part 1 - + /* NOTE: This function is called too many times (and too fast) for it to be + profiled in a usual way using Remotery. */ + + TIMED_BLOCK(); + vec3 oc = r.origin - center; - double a = r.direction.length_squared(); - double half_b = dot(oc, r.direction); - double c = oc.length_squared() - radius*radius; + float a = r.direction.length_squared(); + float half_b = dot(oc, r.direction); + float c = oc.length_squared() - radius*radius; - - // Part 2 - - double discriminant = half_b*half_b - a*c; + float discriminant = half_b*half_b - a*c; if (discriminant < 0) return false; - double sqrtd = sqrt(discriminant); - + float sqrtd = sqrt(discriminant); // Find the nearest root that lies in the acceptable range - // Part 3 - - double root = (-half_b - sqrtd) / a; + float root = (-half_b - sqrtd) / a; if (root < t_min || t_max < root) { root = (-half_b + sqrtd) / a; if (root < t_min || t_max < root) return false; } - - // Part 4 - + rec.t = root; rec.p = r.at(rec.t); vec3 outward_normal = (rec.p - center) / radius; rec.set_face_normal(r, outward_normal); - rec.mat_ptr = mat_ptr; - + rec.mat_ptr = mat_ptr; + return true; } diff --git a/timer.hpp b/timer.hpp new file mode 100644 index 0000000..ed537cb --- /dev/null +++ b/timer.hpp @@ -0,0 +1,39 @@ +#include +#include "rtweekend.hpp" + + +#define TIMED_BLOCK__(number, ...) timed_block timed_block_##Number(__COUNTER__, __FILE__, __LINE__, __FUNCTION__, ##__VA_ARGS__) +#define TIMED_BLOCK_(number, ...) TIMED_BLOCK__(number, ##__VA_ARGS__) +#define TIMED_BLOCK(...) TIMED_BLOCK_(__LINE__, ##__VA_ARGS__) + +struct debug_record +{ + uint64_t cycles; + uint64_t hit_count; + const char *filename; + const char *function_name; + + uint32_t line_number; + +}; + +extern debug_record debug_record_array[]; + +struct timed_block { + // TODO: Thread safety + debug_record *record; + + timed_block(int counter, const char *filename, int line_number, const char *function_name, int hit_count = 1) + { + record = debug_record_array + counter; + record->filename = filename; + record->line_number = line_number; + record->function_name = function_name; + record->cycles -= __rdtsc(); + record->hit_count += hit_count; + } + ~timed_block() + { + record->cycles += __rdtsc(); + } +}; diff --git a/vec3.hpp b/vec3.hpp index b7340a1..4138ca2 100644 --- a/vec3.hpp +++ b/vec3.hpp @@ -5,12 +5,12 @@ struct vec3 { /* Members */ - double x; - double y; - double z; + float x; + float y; + float z; // Constructor proper. Values default to 0 - vec3(double x = 0, double y = 0, double z = 0) + vec3(float x = 0, float y = 0, float z = 0) { this->x = x; this->y = y; @@ -35,7 +35,7 @@ struct vec3 { } // Scalar multiplication - vec3& operator*=(const double t) + vec3& operator*=(const float t) { x *= t; y *= t; @@ -44,7 +44,7 @@ struct vec3 { } // Division by a scalar t - vec3& operator/=(const double t) + vec3& operator/=(const float t) { x /= t; y /= t; @@ -54,13 +54,13 @@ struct vec3 { /* Methods */ - double length() const + float length() const { return sqrt(x*x + y*y + z*z); } // Length squared, useful for some calculations - double length_squared() const + float length_squared() const { return x*x + y*y + z*z; } @@ -68,19 +68,19 @@ struct vec3 { // Get a vec3 with random components in the range [0,1) inline static vec3 random() { - return vec3(random_double(), random_double(), random_double()); + return vec3(random_float(), random_float(), random_float()); } // Get a vec3 with random components in the range [min, max) - inline static vec3 random(double min, double max) + inline static vec3 random(float min, float max) { - return vec3(random_double(min, max), random_double(min, max), random_double(min, max)); + return vec3(random_float(min, max), random_float(min, max), random_float(min, max)); } // Check if all vector components are near zero bool near_zero() const { - double s = 1e-8; + float s = 1e-8; return (fabs(x) < s) && (fabs(y) < s) && (fabs(z) < s); } }; @@ -117,24 +117,24 @@ inline vec3 operator*(const vec3 &u, const vec3 &v) } // Scalar product -inline vec3 operator*(double t,const vec3 &v) +inline vec3 operator*(float t,const vec3 &v) { return vec3(t*v.x, t*v.y, t*v.z); } -inline vec3 operator*(const vec3 &v, double t) +inline vec3 operator*(const vec3 &v, float t) { return t * v; } // Vector division by scalar. Note that we redefine it as multiplying by 1/t to avoid division by 0 -inline vec3 operator/(vec3 v, double t) +inline vec3 operator/(vec3 v, float t) { return 1/t * v; } // Straightforward dot product -inline double dot(const vec3 &u, const vec3 &v) +inline float dot(const vec3 &u, const vec3 &v) { return u.x*v.x + u.y*v.y + u.z*v.z; @@ -191,9 +191,9 @@ vec3 reflect(const vec3& v, const vec3 n) return v - 2*dot(v,n)*n; } -vec3 refract (const vec3& uv, const vec3& n, double etai_over_etat) +vec3 refract (const vec3& uv, const vec3& n, float etai_over_etat) { - double cos_theta = fmin(dot(-uv, n), 1.0); + float cos_theta = fmin(dot(-uv, n), 1.0); vec3 r_out_perp = etai_over_etat * (uv + cos_theta*n); vec3 r_out_parallel = -sqrt(fabs(1.0 - r_out_perp.length_squared())) * n; return r_out_perp + r_out_parallel; @@ -203,7 +203,7 @@ vec3 random_in_unit_disk() { while (true) { - auto p = vec3(random_double(-1,1), random_double(-1,1), 0); + auto p = vec3(random_float(-1,1), random_float(-1,1), 0); if (p.length_squared() >= 1) continue; return p; }