Per-thread RNGs and RNG bugfix

This commit is contained in:
David 2021-08-30 18:34:14 +02:00
commit 52806e4457
6 changed files with 70 additions and 39 deletions

View file

@ -2,7 +2,7 @@ INCLUDE=./include
LIBS=-pthread -lm LIBS=-pthread -lm
FLAGS=-Ofast -march=native -g -Wall -Wextra -Wpedantic FLAGS=-Ofast -march=native -g -Wall -Wextra -Wpedantic
raytracer: camera.hpp color.hpp hittable.hpp hittable_list.hpp main.cpp material.hpp ray.hpp rtweekend.hpp sphere.hpp vec3.hpp $(INCLUDE)/Remotery.c $(INCLUDE)/Remotery.h raytracer: camera.hpp color.hpp hittable.hpp hittable_list.hpp main.cpp material.hpp random.h ray.hpp rtweekend.hpp sphere.hpp vec3.hpp $(INCLUDE)/Remotery.c $(INCLUDE)/Remotery.h
g++ $(FLAGS) -I$(INCLUDE) $(LIBS) main.cpp -o raytracer g++ $(FLAGS) -I$(INCLUDE) $(LIBS) main.cpp -o raytracer
make debug: make debug:

View file

@ -40,9 +40,9 @@ struct camera {
/* Methods */ /* Methods */
ray get_ray(float s, float t) const ray get_ray(float s, float t, int32_t thread_id = 0) const
{ {
vec3 rd = lens_radius * random_in_unit_disk(); vec3 rd = lens_radius * random_in_unit_disk(thread_id);
vec3 offset = u * rd.x + v * rd.y; vec3 offset = u * rd.x + v * rd.y;
return ray(origin + offset, lower_left_corner + s*horizontal + t*vertical - origin - offset); return ray(origin + offset, lower_left_corner + s*horizontal + t*vertical - origin - offset);

View file

@ -2,6 +2,7 @@
#include <stdint.h> #include <stdint.h>
#include <stdlib.h> #include <stdlib.h>
#include <getopt.h> #include <getopt.h>
#include <time.h>
#define RMT_ENABLED 0 #define RMT_ENABLED 0
@ -117,7 +118,7 @@ hittable_list<sphere> random_scene() {
} }
template<typename T> template<typename T>
color ray_color(const ray& r, hittable_list<T>& world, int32_t depth) color ray_color(const ray& r, hittable_list<T>& world, int32_t depth, int32_t thread_id)
{ {
rmt_ScopedCPUSample(Scatter, RMTSF_Aggregate | RMTSF_Recursive); rmt_ScopedCPUSample(Scatter, RMTSF_Aggregate | RMTSF_Recursive);
if (depth <= 0) if (depth <= 0)
@ -131,11 +132,11 @@ color ray_color(const ray& r, hittable_list<T>& world, int32_t depth)
ray scattered; ray scattered;
color attenuation; color attenuation;
rmt_BeginCPUSample(Scatter, RMTSF_Aggregate); rmt_BeginCPUSample(Scatter, RMTSF_Aggregate);
bool visible = rec.mat_ptr->scatter(r, rec, attenuation, scattered); bool visible = rec.mat_ptr->scatter(r, rec, attenuation, scattered, thread_id);
rmt_EndCPUSample(); rmt_EndCPUSample();
if (visible) if (visible)
{ {
return attenuation * ray_color(scattered, world, depth-1); return attenuation * ray_color(scattered, world, depth-1, thread_id);
} }
else else
{ {
@ -163,6 +164,9 @@ float hit_sphere(const point3& center, float radius, const ray& r)
int32_t main(int argc, char *argv[]) int32_t main(int argc, char *argv[])
{ {
/* Argument parsing */ /* Argument parsing */
int32_t c; int32_t c;
bool using_default_output = true; bool using_default_output = true;
@ -225,6 +229,30 @@ int32_t main(int argc, char *argv[])
//indicators::show_console_cursor(false); //indicators::show_console_cursor(false);
// Get the number of logical CPUs
int32_t ncores = sysconf(_SC_NPROCESSORS_ONLN);
// Initialize and seed the random number generators
pcg_table = (pcg32_random_t *) malloc(sizeof(pcg32_random_t) * ncores);
for (int32_t i = 0; i < ncores; ++i)
{
struct timespec ts;
if (timespec_get(&ts, TIME_UTC))
{
// Use higher quality seed
uint64_t seed = (uint64_t)(ts.tv_nsec ^ ts.tv_sec);
pcg_table[i] = { seed, seed };
}
else
{
// Error, use default seed
pcg_table[i] = default_pcg;
}
}
// Image // Image
aspect_ratio = 3.0 / 2.0; aspect_ratio = 3.0 / 2.0;
image_width = 1200; image_width = 1200;
@ -257,7 +285,7 @@ int32_t main(int argc, char *argv[])
// Render // Render
fprintf(output_file_handle, "P3\n%d %d\n255\n", image_width, image_height); fprintf(output_file_handle, "P3\n%d %d\n255\n", image_width, image_height);
int32_t ncores = sysconf(_SC_NPROCESSORS_ONLN); // Get the number of logical CPUs
std::vector<pthread_t> threads; std::vector<pthread_t> threads;
std::vector<thread_args> args; std::vector<thread_args> args;
threads.reserve(ncores); threads.reserve(ncores);
@ -381,10 +409,10 @@ void *raytrace_lines(void *arg)
color pixel_color = color(0,0,0); color pixel_color = color(0,0,0);
for (int32_t s = 0; s < samples_per_pixel; ++s) for (int32_t s = 0; s < samples_per_pixel; ++s)
{ {
float u = ((i + random_float()) / (image_width-1)); float u = ((i + random_float(thread_id)) / (image_width-1));
float v = ((j + random_float()) / (image_height-1)); float v = ((j + random_float(thread_id)) / (image_height-1));
ray r = global_camera->get_ray(u,v); ray r = global_camera->get_ray(u,v, thread_id);
pixel_color += ray_color(r, world, max_depth); pixel_color += ray_color(r, world, max_depth, thread_id);
} }
int32_t index = j * image_width + i; int32_t index = j * image_width + i;
image[index] = pixel_color; image[index] = pixel_color;

View file

@ -4,7 +4,7 @@
#include "rtweekend.hpp" #include "rtweekend.hpp"
struct material { struct material {
virtual bool scatter(const ray& r_in, const hit_record& rec, color& attenuation, ray& scattered) const = 0; virtual bool scatter(const ray& r_in, const hit_record& rec, color& attenuation, ray& scattered, int32_t thread_id = 0) const = 0;
}; };
struct lambertian : material { struct lambertian : material {
@ -15,9 +15,9 @@ struct lambertian : material {
#pragma GCC diagnostic push #pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-parameter" #pragma GCC diagnostic ignored "-Wunused-parameter"
virtual bool scatter(const ray& r_in, const hit_record& rec, color& attenuation, ray& scattered) const override virtual bool scatter(const ray& r_in, const hit_record& rec, color& attenuation, ray& scattered, int32_t thread_id = 0) const override
{ {
vec3 scatter_direction = rec.normal + random_unit_vector(); vec3 scatter_direction = rec.normal + random_unit_vector(thread_id);
/* NOTE: it is possible that the random vector we generate is exactly opposite to the normal vector, /* NOTE: it is possible that the random vector we generate is exactly opposite to the normal vector,
in which case it will sum to a near-zero scatter vector and generate degenerate results. in which case it will sum to a near-zero scatter vector and generate degenerate results.
@ -45,10 +45,10 @@ struct metal : material {
fuzz = f; fuzz = f;
}; };
virtual bool scatter(const ray& r_in, const hit_record& rec, color& attenuation, ray& scattered) const override virtual bool scatter(const ray& r_in, const hit_record& rec, color& attenuation, ray& scattered, int32_t thread_id) const override
{ {
vec3 reflected = reflect(normalize(r_in.direction), rec.normal); vec3 reflected = reflect(normalize(r_in.direction), rec.normal);
scattered = ray(rec.p, reflected + fuzz*random_in_unit_sphere()); scattered = ray(rec.p, reflected + fuzz*random_in_unit_sphere(thread_id));
attenuation = albedo; attenuation = albedo;
return (dot(scattered.direction, rec.normal) > 0); return (dot(scattered.direction, rec.normal) > 0);
} }
@ -74,7 +74,7 @@ struct dielectric : material
/* Virtual methods */ /* Virtual methods */
virtual bool scatter(const ray& r_in, const hit_record& rec, color& attenuation, ray& scattered) const override virtual bool scatter(const ray& r_in, const hit_record& rec, color& attenuation, ray& scattered, int32_t thread_id) const override
{ {
attenuation = color(1,1,1); attenuation = color(1,1,1);
float refraction_ratio = rec.front_face ? (1.0/ri) : ri; float refraction_ratio = rec.front_face ? (1.0/ri) : ri;
@ -87,7 +87,7 @@ struct dielectric : material
bool cannot_refract = refraction_ratio * sin_theta > 1.0; bool cannot_refract = refraction_ratio * sin_theta > 1.0;
vec3 direction; vec3 direction;
if (cannot_refract || reflectance(cos_theta, refraction_ratio) > random_float()) if (cannot_refract || reflectance(cos_theta, refraction_ratio) > random_float(thread_id))
direction = reflect(unit_direction, rec.normal); direction = reflect(unit_direction, rec.normal);
else else
direction = refract(unit_direction, rec.normal, refraction_ratio); direction = refract(unit_direction, rec.normal, refraction_ratio);

View file

@ -8,11 +8,8 @@
#include "timer.hpp" #include "timer.hpp"
#include "random.h" #include "random.h"
#define FAST_RANDOM 1 pcg32_random_t *pcg_table;
#ifdef FAST_RANDOM pcg32_random_t default_pcg = { 0x853c49e6748fea9bULL, 0xda3e39cb94b95bdbULL };
pcg32_random_t pcg = { 0x853c49e6748fea9bULL, 0xda3e39cb94b95bdbULL };
#define rand() pcg32_random_r(&pcg)
#endif
/* Utility functions */ /* Utility functions */
inline float degrees_to_radians(float d) inline float degrees_to_radians(float d)
@ -21,15 +18,21 @@ inline float degrees_to_radians(float d)
} }
/* Returns a float in the range [0,1) */ /* Returns a float in the range [0,1) */
inline float random_float() inline float random_float_()
{ {
return rand() * (1.0 / RAND_MAX); return rand() * (1.0 / RAND_MAX);
} }
/* Returns a float in the range [min,max) */ /* Returns a float in the range [0,1) */
inline float random_float(float min, float max) inline float random_float(int32_t thread_id = 0)
{ {
return min + (max-min) * random_float(); return pcg32_random_r(&pcg_table[thread_id]) * (1.0 / UINT32_MAX);
}
/* Returns a float in the range [min,max) */
inline float random_float(float min, float max, int32_t thread_id = 0)
{
return min + (max-min) * random_float(thread_id);
} }
/* Clamps a value between [min,max] */ /* Clamps a value between [min,max] */

View file

@ -66,15 +66,15 @@ struct vec3 {
} }
// Get a vec3 with random components in the range [0,1) // Get a vec3 with random components in the range [0,1)
inline static vec3 random() inline static vec3 random(int32_t thread_id = 0)
{ {
return vec3(random_float(), random_float(), random_float()); return vec3(random_float(thread_id), random_float(thread_id), random_float(thread_id));
} }
// Get a vec3 with random components in the range [min, max) // Get a vec3 with random components in the range [min, max)
inline static vec3 random(float min, float max) inline static vec3 random(float min, float max, int32_t thread_id = 0)
{ {
return vec3(random_float(min, max), random_float(min, max), random_float(min, max)); return vec3(random_float(min, max, thread_id), random_float(min, max, thread_id), random_float(min, max, thread_id));
} }
// Check if all vector components are near zero // Check if all vector components are near zero
@ -162,12 +162,12 @@ inline vec3 normalize(const vec3 v)
} }
// Returns a vec3 of random components between [-1,1) that is inside a unit sphere // Returns a vec3 of random components between [-1,1) that is inside a unit sphere
vec3 random_in_unit_sphere() vec3 random_in_unit_sphere(int32_t thread_id)
{ {
// Iterate until we find a vector with length < 1 // Iterate until we find a vector with length < 1
while (true) while (true)
{ {
vec3 p = vec3::random(-1,1); vec3 p = vec3::random(-1,1, thread_id);
if (p.length_squared() >= 1) if (p.length_squared() >= 1)
continue; continue;
return p; return p;
@ -175,14 +175,14 @@ vec3 random_in_unit_sphere()
} }
// Returns a normalized version of the above vector // Returns a normalized version of the above vector
vec3 random_unit_vector() vec3 random_unit_vector(int32_t thread_id)
{ {
return normalize(random_in_unit_sphere()); return normalize(random_in_unit_sphere(thread_id));
} }
vec3 random_in_hemisphere(const vec3& normal) vec3 random_in_hemisphere(const vec3& normal, int32_t thread_id)
{ {
vec3 in_unit_sphere = random_in_unit_sphere(); vec3 in_unit_sphere = random_in_unit_sphere(thread_id);
if (dot(in_unit_sphere, normal) > 0.0) if (dot(in_unit_sphere, normal) > 0.0)
return in_unit_sphere; return in_unit_sphere;
@ -204,11 +204,11 @@ vec3 refract(const vec3& uv, const vec3& n, float etai_over_etat)
return r_out_perp + r_out_parallel; return r_out_perp + r_out_parallel;
} }
vec3 random_in_unit_disk() vec3 random_in_unit_disk(int32_t thread_id)
{ {
while (true) while (true)
{ {
auto p = vec3(random_float(-1,1), random_float(-1,1), 0); auto p = vec3(random_float(-1,1,thread_id), random_float(-1,1,thread_id), 0);
if (p.length_squared() >= 1) continue; if (p.length_squared() >= 1) continue;
return p; return p;
} }