Compare commits

...

4 commits

Author SHA1 Message Date
David
52806e4457 Per-thread RNGs and RNG bugfix 2021-08-30 18:34:14 +02:00
David
a45ae025d6 Add progress bars and argument parsing 2021-08-29 21:24:17 +02:00
David
742ef283e4 Faster random implementation 2021-08-29 16:17:13 +02:00
David
321c677da2 More timing functions 2021-08-28 01:04:31 +02:00
15 changed files with 5163 additions and 170 deletions

6
.gitignore vendored
View file

@ -8,3 +8,9 @@ raytracer
# Actual output image # Actual output image
image.ppm image.ppm
# Profiler data
perf.*
# Core dumps
core

View file

@ -1,12 +1,14 @@
INCLUDE=./include INCLUDE=./include
LIBS=-pthread -lm LIBS=-pthread -lm
FLAGS=-Og -g -Wall -Wextra -Wpedantic FLAGS=-Ofast -march=native -g -Wall -Wextra -Wpedantic
raytracer: camera.hpp color.hpp hittable.hpp hittable_list.hpp main.cpp material.hpp ray.hpp rtweekend.hpp sphere.hpp vec3.hpp $(INCLUDE)/Remotery.c $(INCLUDE)/Remotery.h raytracer: camera.hpp color.hpp hittable.hpp hittable_list.hpp main.cpp material.hpp random.h ray.hpp rtweekend.hpp sphere.hpp vec3.hpp $(INCLUDE)/Remotery.c $(INCLUDE)/Remotery.h
@g++ $(FLAGS) -I$(INCLUDE) $(LIBS) main.cpp -o raytracer g++ $(FLAGS) -I$(INCLUDE) $(LIBS) main.cpp -o raytracer
make debug:
image: raytracer image: raytracer
@./raytracer > image.ppm @./raytracer -o image.ppm
@if [ $$TERM = "xterm-kitty" ]; then\ @if [ $$TERM = "xterm-kitty" ]; then\
kitty icat image.ppm;\ kitty icat image.ppm;\
fi fi

View file

@ -10,21 +10,21 @@ struct camera {
vec3 horizontal; vec3 horizontal;
vec3 vertical; vec3 vertical;
vec3 u,v,w; vec3 u,v,w;
double lens_radius; float lens_radius;
/* Constructors */ /* Constructors */
camera(point3 lookfrom, camera(point3 lookfrom,
point3 lookat, point3 lookat,
vec3 vup, vec3 vup,
double vfov, float vfov,
double aspect_ratio, float aspect_ratio,
double aperture, float aperture,
double focus_dist) float focus_dist)
{ {
double theta = degrees_to_radians(vfov); float theta = degrees_to_radians(vfov);
double h = tan(theta/2); float h = tan(theta/2);
double viewport_height = 2.0 * h; float viewport_height = 2.0 * h;
double viewport_width = aspect_ratio * viewport_height; float viewport_width = aspect_ratio * viewport_height;
w = normalize(lookfrom - lookat); w = normalize(lookfrom - lookat);
u = normalize(cross(vup,w)); u = normalize(cross(vup,w));
@ -40,10 +40,9 @@ struct camera {
/* Methods */ /* Methods */
ray get_ray(double s, double t) const ray get_ray(float s, float t, int32_t thread_id = 0) const
{ {
rmt_ScopedCPUSample(GetRay, RMTSF_Aggregate); vec3 rd = lens_radius * random_in_unit_disk(thread_id);
vec3 rd = lens_radius * random_in_unit_disk();
vec3 offset = u * rd.x + v * rd.y; vec3 offset = u * rd.x + v * rd.y;
return ray(origin + offset, lower_left_corner + s*horizontal + t*vertical - origin - offset); return ray(origin + offset, lower_left_corner + s*horizontal + t*vertical - origin - offset);

View file

@ -9,12 +9,12 @@
/* Writes color components as a space-delimited string of numbers in the range [0,255] */ /* Writes color components as a space-delimited string of numbers in the range [0,255] */
void write_color(FILE *fp, color c, uint32_t samples_per_pixel) void write_color(FILE *fp, color c, uint32_t samples_per_pixel)
{ {
double scale = 1.0 / samples_per_pixel; float scale = 1.0 / samples_per_pixel;
// Divide the color by the number of samples // Divide the color by the number of samples
double r = sqrt(c.x * scale); float r = sqrt(c.x * scale);
double g = sqrt(c.y * scale); float g = sqrt(c.y * scale);
double b = sqrt(c.z * scale); float b = sqrt(c.z * scale);
/* Write output */ /* Write output */
fprintf(fp, fprintf(fp,
@ -24,4 +24,12 @@ void write_color(FILE *fp, color c, uint32_t samples_per_pixel)
(uint8_t) (255 * clamp(b, 0, 1))); (uint8_t) (255 * clamp(b, 0, 1)));
} }
void write_image(color *image, uint64_t n, FILE *fp, uint32_t samples_per_pixel)
{
for (int64_t i = n-1; i >= 0; --i)
{
write_color(fp, image[i], samples_per_pixel);
}
}
#endif #endif

View file

@ -6,7 +6,7 @@
/* Virtual class that represents objects who could collide against a ray */ /* Virtual class that represents objects who could collide against a ray */
struct hittable { struct hittable {
virtual bool hit(const ray& r, double t_min, double t_max, hit_record& rec) const = 0; virtual bool hit(const ray& r, float t_min, float t_max, hit_record& rec) const = 0;
}; };
#endif #endif

View file

@ -2,6 +2,7 @@
#define HITTABLE_LIST_H #define HITTABLE_LIST_H
#include "hittable.hpp" #include "hittable.hpp"
#include "sphere.hpp"
#include <memory> #include <memory>
#include <vector> #include <vector>
@ -9,30 +10,34 @@
using std::shared_ptr; using std::shared_ptr;
using std::make_shared; using std::make_shared;
struct hittable_list : hittable { template <typename T = sphere>
struct hittable_list {
/* Attributes */ /* Attributes */
std::vector<shared_ptr<hittable>> objects; std::vector<T> objects;
/* Constructors */ /* Constructors */
hittable_list () {} hittable_list () {}
hittable_list(shared_ptr<hittable> h) { add(h); } hittable_list(T object) { add(object); }
/* Methods */ /* Methods */
void clear() { objects.clear(); } void clear() { objects.clear(); }
void add (shared_ptr<hittable> h) { objects.push_back(h); } void add (T h) { objects.push_back(h); }
virtual bool hit(const ray& r, double t_min, double t_max, hit_record& rec) const override; bool hit(const ray& r, float t_min, float t_max, hit_record& rec);
}; };
bool hittable_list::hit(const ray& r, double t_min, double t_max, hit_record& rec) const template <typename T>
bool hittable_list<T>::hit(const ray& r, float t_min, float t_max, hit_record& rec)
{ {
rmt_ScopedCPUSample(HittableList_Hit, RMTSF_Aggregate); rmt_ScopedCPUSample(HittableList_Hit, RMTSF_Aggregate);
hit_record temp_rec; hit_record temp_rec;
bool hit_anything = false; bool hit_anything = false;
double closest_so_far = t_max; float closest_so_far = t_max;
for (const auto& object : objects) uint32_t s = objects.size();
for (uint32_t i = 0; i < s; ++i)
{ {
T *object = &objects[i];
if (object->hit(r, t_min, closest_so_far, temp_rec)) if (object->hit(r, t_min, closest_so_far, temp_rec))
{ {
hit_anything = true; hit_anything = true;

4649
include/indicators.hpp Normal file

File diff suppressed because it is too large Load diff

378
main.cpp
View file

@ -1,7 +1,10 @@
#include <stdio.h> #include <stdio.h>
#include <stdint.h> #include <stdint.h>
#include <stdlib.h> #include <stdlib.h>
#include <getopt.h>
#include <time.h>
#define RMT_ENABLED 0
// Lib includes // Lib includes
#pragma GCC diagnostic push #pragma GCC diagnostic push
@ -11,6 +14,8 @@
#include <Remotery.c> #include <Remotery.c>
#pragma GCC diagnostic pop #pragma GCC diagnostic pop
#include <indicators.hpp>
// Internal includes // Internal includes
#include "rtweekend.hpp" #include "rtweekend.hpp"
#include "color.hpp" #include "color.hpp"
@ -18,23 +23,59 @@
#include "sphere.hpp" #include "sphere.hpp"
#include "camera.hpp" #include "camera.hpp"
#ifdef DEBUG
#define print_timers() print_timers_()
#else
#define print_timers()
#endif
// Threading structs
struct thread_args
{
int32_t thread_id;
int32_t start;
int32_t end;
};
// Function signatures
color ray_color(const ray& r, const hittable& world, int32_t depth); color ray_color(const ray& r, const hittable& world, int32_t depth);
double hit_sphere(const point3& center, double radius, const ray& r); float hit_sphere(const point3& center, float radius, const ray& r);
hittable_list random_scene(); void *raytrace_lines(void *arg);
hittable_list<sphere> random_scene();
hittable_list random_scene() { // Global vars
hittable_list world; indicators::DynamicProgress<indicators::BlockProgressBar> progress_bars;
const char *default_file = "image.ppm";
FILE *output_file_handle;
// Image
float aspect_ratio;
int32_t image_width;
int32_t image_height;
int32_t samples_per_pixel;
int32_t max_depth;
color *image;
uint64_t bytes_per_line;
uint64_t bytes_per_pixel;
// World
hittable_list<sphere> world;
camera *global_camera;
hittable_list<sphere> random_scene() {
hittable_list<sphere> world;
auto ground_material = make_shared<lambertian>(color(0.5, 0.5, 0.5)); auto ground_material = make_shared<lambertian>(color(0.5, 0.5, 0.5));
world.add(make_shared<sphere>(point3(0,-1000,0), 1000, ground_material)); world.add(sphere(point3(0,-1000,0), 1000, ground_material));
for (int32_t a = -11; a < 11; a++) for (int32_t a = -11; a < 11; a++)
{ {
for (int32_t b = -11; b < 11; b++) for (int32_t b = -11; b < 11; b++)
{ {
double choose_mat = random_double(); float choose_mat = random_float();
point3 center(a + 0.9*random_double(), 0.2, b + 0.9*random_double()); point3 center(a + 0.9*random_float(), 0.2, b + 0.9*random_float());
if ((center - point3(4, 0.2, 0)).length() > 0.9) if ((center - point3(4, 0.2, 0)).length() > 0.9)
{ {
@ -44,39 +85,40 @@ hittable_list random_scene() {
// diffuse // diffuse
color albedo = color::random() * color::random(); color albedo = color::random() * color::random();
sphere_material = make_shared<lambertian>(albedo); sphere_material = make_shared<lambertian>(albedo);
world.add(make_shared<sphere>(center, 0.2, sphere_material)); world.add(sphere(center, 0.2, sphere_material));
} }
else if (choose_mat < 0.95) else if (choose_mat < 0.95)
{ {
// metal // metal
color albedo = color::random(0.5, 1); color albedo = color::random(0.5, 1);
double fuzz = random_double(0, 0.5); float fuzz = random_float(0, 0.5);
sphere_material = make_shared<metal>(albedo, fuzz); sphere_material = make_shared<metal>(albedo, fuzz);
world.add(make_shared<sphere>(center, 0.2, sphere_material)); world.add(sphere(center, 0.2, sphere_material));
} }
else else
{ {
// glass // glass
sphere_material = make_shared<dielectric>(1.5); sphere_material = make_shared<dielectric>(1.5);
world.add(make_shared<sphere>(center, 0.2, sphere_material)); world.add(sphere(center, 0.2, sphere_material));
} }
} }
} }
} }
auto material1 = make_shared<dielectric>(1.5); auto material1 = make_shared<dielectric>(1.5);
world.add(make_shared<sphere>(point3(0, 1, 0), 1.0, material1)); world.add(sphere(point3(0, 1, 0), 1.0, material1));
auto material2 = make_shared<lambertian>(color(0.4, 0.2, 0.1)); auto material2 = make_shared<lambertian>(color(0.4, 0.2, 0.1));
world.add(make_shared<sphere>(point3(-4, 1, 0), 1.0, material2)); world.add(sphere(point3(-4, 1, 0), 1.0, material2));
auto material3 = make_shared<metal>(color(0.7, 0.6, 0.5), 0.0); auto material3 = make_shared<metal>(color(0.7, 0.6, 0.5), 0.0);
world.add(make_shared<sphere>(point3(4, 1, 0), 1.0, material3)); world.add(sphere(point3(4, 1, 0), 1.0, material3));
return world; return world;
} }
color ray_color(const ray& r, const hittable& world, int32_t depth) template<typename T>
color ray_color(const ray& r, hittable_list<T>& world, int32_t depth, int32_t thread_id)
{ {
rmt_ScopedCPUSample(Scatter, RMTSF_Aggregate | RMTSF_Recursive); rmt_ScopedCPUSample(Scatter, RMTSF_Aggregate | RMTSF_Recursive);
if (depth <= 0) if (depth <= 0)
@ -90,11 +132,11 @@ color ray_color(const ray& r, const hittable& world, int32_t depth)
ray scattered; ray scattered;
color attenuation; color attenuation;
rmt_BeginCPUSample(Scatter, RMTSF_Aggregate); rmt_BeginCPUSample(Scatter, RMTSF_Aggregate);
bool visible = rec.mat_ptr->scatter(r, rec, attenuation, scattered); bool visible = rec.mat_ptr->scatter(r, rec, attenuation, scattered, thread_id);
rmt_EndCPUSample(); rmt_EndCPUSample();
if (visible) if (visible)
{ {
return attenuation * ray_color(scattered, world, depth-1); return attenuation * ray_color(scattered, world, depth-1, thread_id);
} }
else else
{ {
@ -102,85 +144,305 @@ color ray_color(const ray& r, const hittable& world, int32_t depth)
} }
} }
vec3 unit_direction = normalize(r.direction); vec3 unit_direction = normalize(r.direction);
double t = 0.5 * (unit_direction.y + 1.0); float t = 0.5 * (unit_direction.y + 1.0);
return (1-t) * color(1,1,1) + t*color(0.5,0.7,1.0); return (1-t) * color(1,1,1) + t*color(0.5,0.7,1.0);
} }
double hit_sphere(const point3& center, double radius, const ray& r) float hit_sphere(const point3& center, float radius, const ray& r)
{ {
vec3 oc = r.origin - center; vec3 oc = r.origin - center;
double a = r.direction.length_squared(); float a = r.direction.length_squared();
double half_b = dot(oc, r.direction); float half_b = dot(oc, r.direction);
double c = oc.length_squared() - radius*radius; float c = oc.length_squared() - radius*radius;
double discriminant = half_b*half_b - a*c; float discriminant = half_b*half_b - a*c;
if (discriminant < 0) if (discriminant < 0)
return -1; return -1;
else else
return (-half_b - sqrt(discriminant)) / a; return (-half_b - sqrt(discriminant)) / a;
} }
int32_t main() int32_t main(int argc, char *argv[])
{ {
/* Argument parsing */
int32_t c;
bool using_default_output = true;
while (1)
{
static struct option long_options[] =
{
{"output", required_argument, 0, 'o'},
{0, 0, 0, 0}
};
/* getopt_long stores the option index here. */
int option_index = 0;
c = getopt_long (argc, argv, "o:",
long_options, &option_index);
/* Detect the end of the options. */
if (c == -1)
break;
switch (c)
{
case 0:
/* If this option set a flag, do nothing else now. */
if (long_options[option_index].flag != 0)
break;
printf ("option %s", long_options[option_index].name);
if (optarg)
printf (" with arg %s", optarg);
printf ("\n");
break;
case 'o':
using_default_output = false;
output_file_handle = fopen(optarg, "w");
break;
case '?':
/* getopt_long already printed an error message. */
break;
default:
abort();
}
}
if (using_default_output)
{
output_file_handle = fopen(default_file, "w");
}
/* Profiling library initialization */ /* Profiling library initialization */
Remotery *rmt; Remotery *rmt;
if (RMT_ERROR_NONE != rmt_CreateGlobalInstance(&rmt)) if (RMT_ERROR_NONE != rmt_CreateGlobalInstance(&rmt))
{ {
fprintf(stderr, "Error starting Remotery\n"); fprintf(stderr, "Error starting Remotery\n");
} }
//indicators::show_console_cursor(false);
// Get the number of logical CPUs
int32_t ncores = sysconf(_SC_NPROCESSORS_ONLN);
// Initialize and seed the random number generators
pcg_table = (pcg32_random_t *) malloc(sizeof(pcg32_random_t) * ncores);
for (int32_t i = 0; i < ncores; ++i)
{
struct timespec ts;
if (timespec_get(&ts, TIME_UTC))
{
// Use higher quality seed
uint64_t seed = (uint64_t)(ts.tv_nsec ^ ts.tv_sec);
pcg_table[i] = { seed, seed };
}
else
{
// Error, use default seed
pcg_table[i] = default_pcg;
}
}
// Image // Image
const double aspect_ratio = 3.0 / 2.0; aspect_ratio = 3.0 / 2.0;
const int32_t image_width = 1200; image_width = 1200;
const int32_t image_height = (int32_t) (image_width / aspect_ratio); image_height = (int32_t) (image_width / aspect_ratio);
int32_t samples_per_pixel = 500; samples_per_pixel = 500;
const int32_t max_depth = 50; max_depth = 50;
image = (color*) malloc(image_width * image_height * sizeof(color));
bytes_per_line = sizeof(color) * image_width;
bytes_per_pixel = sizeof(color);
if (getenv("SPP")) if (getenv("SPP"))
{ {
samples_per_pixel = strtol(getenv("SPP"), NULL, 10); samples_per_pixel = strtol(getenv("SPP"), NULL, 10);
} }
// World // World
hittable_list world = random_scene(); world = random_scene();
// Camera // Camera
point3 lookfrom(13,2,3); point3 lookfrom(13,2,3);
point3 lookat(0,0,0); point3 lookat(0,0,0);
vec3 vup(0,1,0); vec3 vup(0,1,0);
double dist_to_focus = 10.0; float dist_to_focus = 10.0;
double aperture = 0.1; float aperture = 0.1;
camera cam(lookfrom, lookat, vup, 20, aspect_ratio, aperture, dist_to_focus);
camera cam = camera(lookfrom, lookat, vup, 20, aspect_ratio, aperture, dist_to_focus);
global_camera = &cam;
// Render // Render
printf("P3\n%d %d\n255\n", image_width, image_height); fprintf(output_file_handle, "P3\n%d %d\n255\n", image_width, image_height);
std::vector<pthread_t> threads;
std::vector<thread_args> args;
threads.reserve(ncores);
args.reserve(ncores);
std::vector<indicators::BlockProgressBar*> bar_memory;
bar_memory.reserve(ncores);
for (int32_t i = 0; i < ncores; ++i)
{
bar_memory[i] = new indicators::BlockProgressBar{indicators::option::BarWidth{50},
indicators::option::ForegroundColor{indicators::Color::white},
indicators::option::ShowElapsedTime{true},
indicators::option::ShowRemainingTime{true},
indicators::option::PrefixText{"Thread #" + std::to_string(i)}
};
progress_bars.push_back(*bar_memory[i]);
int32_t start;
int32_t end;
// Divide work among cores
start = image_height/ncores * i;
end = image_height/ncores * (i+1);
// Make sure we complete the whole picture even if the work is not perfectly divisible
if (i == ncores)
end = image_height;
args[i].start = start;
args[i].end = end;
args[i].thread_id = i;
// TODO: Check for errors
pthread_create(&threads[i], NULL, raytrace_lines, &args[i]);
}
for (int32_t i = 0; i < ncores; ++i)
{
switch (pthread_join(threads[i], NULL))
{
case EDEADLK:
fprintf(stderr, "A deadlock was detected (e.g., two threads tried to join with each other); or thread specifies the calling thread.\n");
break;
case EINVAL:
fprintf(stderr, "thread is not a joinable thread OR\n"
"Another thread is already waiting to join with this thread.\n");
break;
case ESRCH:
fprintf(stderr, "No thread with the ID thread could be found.\n");
break;
default:
break;
}
}
write_image(image, image_width*image_height, output_file_handle, samples_per_pixel);
/* Obsolete non-threaded implementation */
// for (int32_t j = image_height - 1; j >= 0; --j)
// {
// rmt_ScopedCPUSample(OuterLoop, RMTSF_Aggregate);
// fprintf(stderr, "\rScanlines remaining: %d ", j);
// print_timers();
// fflush(stderr);
for (int32_t j = image_height - 1; j >= 0; --j) // for (int32_t i = 0; i < image_width; ++i)
{ // {
rmt_ScopedCPUSample(OuterLoop, RMTSF_Aggregate); // rmt_ScopedCPUSample(InnerLoop, RMTSF_Aggregate);
fprintf(stderr, "\rScanlines remaining: %d ", j); // color pixel_color = color(0,0,0);
fflush(stderr);
for (int32_t i = 0; i < image_width; ++i)
{
rmt_ScopedCPUSample(InnerLoop, RMTSF_Aggregate);
color pixel_color = color(0,0,0);
for (int32_t s = 0; s < samples_per_pixel; ++s) // for (int32_t s = 0; s < samples_per_pixel; ++s)
{ // {
double u = ((i + random_double()) / (image_width-1)); // float u = ((i + random_float()) / (image_width-1));
double v = ((j + random_double()) / (image_height-1)); // float v = ((j + random_float()) / (image_height-1));
ray r = cam.get_ray(u,v); // ray r = cam.get_ray(u,v);
pixel_color += ray_color(r, world, max_depth); // pixel_color += ray_color(r, world, max_depth);
} // }
write_color(stdout, pixel_color, samples_per_pixel); // write_color(output_file_handle, pixel_color, samples_per_pixel);
} // }
} // }
fprintf(stderr, "\nDone\n"); fprintf(stderr, "\nDone\n");
rmt_DestroyGlobalInstance(rmt); rmt_DestroyGlobalInstance(rmt);
free(image);
fclose(output_file_handle);
//indicators::show_console_cursor(true);
} }
void *raytrace_lines(void *arg)
{
thread_args arguments = *((thread_args*)arg);
int32_t start = arguments.start;
int32_t end = arguments.end;
int32_t thread_id = arguments.thread_id;
for (int32_t j = end - 1; j >= start; --j)
{
int32_t lines_expected = end-start;
int32_t lines_completed = end-j;
progress_bars[thread_id].set_option(indicators::option::PostfixText{std::to_string(lines_completed) + "/" + std::to_string(lines_expected)});
progress_bars[thread_id].set_progress(((float)lines_completed/lines_expected)*100);
rmt_ScopedCPUSample(OuterLoop, RMTSF_Aggregate);
for (int32_t i = 0; i < image_width; ++i)
{
color pixel_color = color(0,0,0);
for (int32_t s = 0; s < samples_per_pixel; ++s)
{
float u = ((i + random_float(thread_id)) / (image_width-1));
float v = ((j + random_float(thread_id)) / (image_height-1));
ray r = global_camera->get_ray(u,v, thread_id);
pixel_color += ray_color(r, world, max_depth, thread_id);
}
int32_t index = j * image_width + i;
image[index] = pixel_color;
}
}
return nullptr;
}
#ifdef DEBUG
debug_record debug_record_array[__COUNTER__];
void print_timers_()
{
for (uint32_t i = 0;
i < sizeof(debug_record_array) / sizeof(debug_record_array[0]);
++i)
{
debug_record *record = &debug_record_array[i];
fprintf(stderr,
"%d: %s:%s:%d; "
"Cycles = %ld; "
"Hit count %ld; "
"Cycles/hit %f; "
"Time %f",
i, record->filename, record->function_name, record->line_number,
record->cycles,
record->hit_count,
(double)record->cycles / record->hit_count,
(double)record->cycles / CLOCKS_PER_SEC);
}
}
#endif

View file

@ -4,7 +4,7 @@
#include "rtweekend.hpp" #include "rtweekend.hpp"
struct material { struct material {
virtual bool scatter(const ray& r_in, const hit_record& rec, color& attenuation, ray& scattered) const = 0; virtual bool scatter(const ray& r_in, const hit_record& rec, color& attenuation, ray& scattered, int32_t thread_id = 0) const = 0;
}; };
struct lambertian : material { struct lambertian : material {
@ -15,9 +15,9 @@ struct lambertian : material {
#pragma GCC diagnostic push #pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-parameter" #pragma GCC diagnostic ignored "-Wunused-parameter"
virtual bool scatter(const ray& r_in, const hit_record& rec, color& attenuation, ray& scattered) const override virtual bool scatter(const ray& r_in, const hit_record& rec, color& attenuation, ray& scattered, int32_t thread_id = 0) const override
{ {
vec3 scatter_direction = rec.normal + random_unit_vector(); vec3 scatter_direction = rec.normal + random_unit_vector(thread_id);
/* NOTE: it is possible that the random vector we generate is exactly opposite to the normal vector, /* NOTE: it is possible that the random vector we generate is exactly opposite to the normal vector,
in which case it will sum to a near-zero scatter vector and generate degenerate results. in which case it will sum to a near-zero scatter vector and generate degenerate results.
@ -37,18 +37,18 @@ struct lambertian : material {
struct metal : material { struct metal : material {
/* Attributes */ /* Attributes */
color albedo; color albedo;
double fuzz; float fuzz;
// Constructor // Constructor
metal(const color& c, double f) metal(const color& c, float f)
{ {
albedo = c; albedo = c;
fuzz = f; fuzz = f;
}; };
virtual bool scatter(const ray& r_in, const hit_record& rec, color& attenuation, ray& scattered) const override virtual bool scatter(const ray& r_in, const hit_record& rec, color& attenuation, ray& scattered, int32_t thread_id) const override
{ {
vec3 reflected = reflect(normalize(r_in.direction), rec.normal); vec3 reflected = reflect(normalize(r_in.direction), rec.normal);
scattered = ray(rec.p, reflected + fuzz*random_in_unit_sphere()); scattered = ray(rec.p, reflected + fuzz*random_in_unit_sphere(thread_id));
attenuation = albedo; attenuation = albedo;
return (dot(scattered.direction, rec.normal) > 0); return (dot(scattered.direction, rec.normal) > 0);
} }
@ -57,37 +57,37 @@ struct metal : material {
struct dielectric : material struct dielectric : material
{ {
/* Attributes */ /* Attributes */
double ri; // refraction index float ri; // refraction index
// Constructor // Constructor
dielectric(double refraction_index) { ri = refraction_index; } dielectric(float refraction_index) { ri = refraction_index; }
/* Methods */ /* Methods */
// Schlick's approximation of reflectance // Schlick's approximation of reflectance
static double reflectance(double cosine, double ref_idx) static float reflectance(float cosine, float ref_idx)
{ {
double r0 = (1-ref_idx) / (1+ref_idx); float r0 = (1-ref_idx) / (1+ref_idx);
r0 = r0*r0; r0 = r0*r0;
return r0 + (1-r0)*pow((1 - cosine), 5); return r0 + (1-r0)*pow((1 - cosine), 5);
} }
/* Virtual methods */ /* Virtual methods */
virtual bool scatter(const ray& r_in, const hit_record& rec, color& attenuation, ray& scattered) const override virtual bool scatter(const ray& r_in, const hit_record& rec, color& attenuation, ray& scattered, int32_t thread_id) const override
{ {
attenuation = color(1,1,1); attenuation = color(1,1,1);
double refraction_ratio = rec.front_face ? (1.0/ri) : ri; float refraction_ratio = rec.front_face ? (1.0/ri) : ri;
vec3 unit_direction = normalize(r_in.direction); vec3 unit_direction = normalize(r_in.direction);
double cos_theta = fmin(dot(-unit_direction, rec.normal), 1); float cos_theta = fmin(dot(-unit_direction, rec.normal), 1);
double sin_theta = sqrt(1.0 - cos_theta*cos_theta); float sin_theta = sqrt(1.0 - cos_theta*cos_theta);
bool cannot_refract = refraction_ratio * sin_theta > 1.0; bool cannot_refract = refraction_ratio * sin_theta > 1.0;
vec3 direction; vec3 direction;
if (cannot_refract || reflectance(cos_theta, refraction_ratio) > random_double()) if (cannot_refract || reflectance(cos_theta, refraction_ratio) > random_float(thread_id))
direction = reflect(unit_direction, rec.normal); direction = reflect(unit_direction, rec.normal);
else else
direction = refract(unit_direction, rec.normal, refraction_ratio); direction = refract(unit_direction, rec.normal, refraction_ratio);

15
random.h Normal file
View file

@ -0,0 +1,15 @@
// *Really* minimal PCG32 code / (c) 2014 M.E. O'Neill / pcg-random.org
// Licensed under Apache License 2.0 (NO WARRANTY, etc. see website)
typedef struct { uint64_t state; uint64_t inc; } pcg32_random_t;
uint32_t pcg32_random_r(pcg32_random_t* rng)
{
uint64_t oldstate = rng->state;
// Advance internal state
rng->state = oldstate * 6364136223846793005ULL + (rng->inc|1);
// Calculate output function (XSH RR), uses old state for max ILP
uint32_t xorshifted = ((oldstate >> 18u) ^ oldstate) >> 27u;
uint32_t rot = oldstate >> 59u;
return (xorshifted >> rot) | (xorshifted << ((-rot) & 31));
}

View file

@ -24,7 +24,7 @@ struct ray {
} }
// Returns position after time t // Returns position after time t
point3 at(double t) const point3 at(float t) const
{ {
return origin + t*direction; return origin + t*direction;
} }

View file

@ -3,36 +3,40 @@
#include <math.h> #include <math.h>
#include <memory> #include <memory>
#include <pthread.h>
#include <unistd.h>
#include "timer.hpp"
#include "random.h"
/* Utility macros */ pcg32_random_t *pcg_table;
pcg32_random_t default_pcg = { 0x853c49e6748fea9bULL, 0xda3e39cb94b95bdbULL };
#define TIMED_BLOCK_2(c, flags) rmt_ScopedCPUSample(Counter##c, flags)
#define TIMED_BLOCK_1(c, flags) TIMED_BLOCK_2(c, flags)
#define TIMED_BLOCK(flags) TIMED_BLOCK_1(__COUNTER__, flags)
// #define TIMED_BLOCK_(counter, flags) rmt_ScopedCPUSample(counter, flags)
// #define TIMED_BLOCK(flags) TIMED_BLOCK_(__COUNTER__, flags)
/* Utility functions */ /* Utility functions */
double degrees_to_radians(double d) inline float degrees_to_radians(float d)
{ {
return d * M_PI / 180; return d * M_PI / 180;
} }
/* Returns a double in the range [0,1) */ /* Returns a float in the range [0,1) */
inline double random_double() inline float random_float_()
{ {
return rand() * (1.0 / RAND_MAX); return rand() * (1.0 / RAND_MAX);
} }
/* Returns a double in the range [min,max) */ /* Returns a float in the range [0,1) */
inline double random_double(double min, double max) inline float random_float(int32_t thread_id = 0)
{ {
return min + (max-min) * random_double(); return pcg32_random_r(&pcg_table[thread_id]) * (1.0 / UINT32_MAX);
}
/* Returns a float in the range [min,max) */
inline float random_float(float min, float max, int32_t thread_id = 0)
{
return min + (max-min) * random_float(thread_id);
} }
/* Clamps a value between [min,max] */ /* Clamps a value between [min,max] */
inline double clamp(double v, double min, double max) inline float clamp(float v, float min, float max)
{ {
return v < min ? min : v > max ? max : v; return v < min ? min : v > max ? max : v;
} }
@ -50,7 +54,7 @@ struct hit_record {
point3 p; point3 p;
vec3 normal; vec3 normal;
std::shared_ptr<material> mat_ptr; std::shared_ptr<material> mat_ptr;
double t; float t;
bool front_face; bool front_face;
inline void set_face_normal(const ray& r, const vec3& outward_normal) inline void set_face_normal(const ray& r, const vec3& outward_normal)

View file

@ -7,11 +7,11 @@
struct sphere : hittable { struct sphere : hittable {
/* Attributes */ /* Attributes */
point3 center; point3 center;
double radius; float radius;
std::shared_ptr<material> mat_ptr; std::shared_ptr<material> mat_ptr;
/* Contructor */ /* Contructor */
sphere(point3 c, double r, std::shared_ptr<material> m) sphere(point3 c, float r, std::shared_ptr<material> m)
{ {
center = c; center = c;
radius = r; radius = r;
@ -19,49 +19,42 @@ struct sphere : hittable {
} }
/* Virtual methods declaration */ /* Virtual methods declaration */
virtual bool hit(const ray& r, double t_min, double t_max, hit_record& rec) const override; bool hit(const ray& r, float t_min, float t_max, hit_record& rec) const;
}; };
/* Virtual method implementations */ /* Virtual method implementations */
bool sphere::hit(const ray& r, double t_min, double t_max, hit_record& rec) const bool sphere::hit(const ray& r, float t_min, float t_max, hit_record& rec) const
{ {
rmt_ScopedCPUSample(Sphere_Hit, RMTSF_Aggregate); /* NOTE: This function is called too many times (and too fast) for it to be
profiled in a usual way using Remotery. */
// Part 1
TIMED_BLOCK();
vec3 oc = r.origin - center; vec3 oc = r.origin - center;
double a = r.direction.length_squared(); float a = r.direction.length_squared();
double half_b = dot(oc, r.direction); float half_b = dot(oc, r.direction);
double c = oc.length_squared() - radius*radius; float c = oc.length_squared() - radius*radius;
float discriminant = half_b*half_b - a*c;
// Part 2
double discriminant = half_b*half_b - a*c;
if (discriminant < 0) if (discriminant < 0)
return false; return false;
double sqrtd = sqrt(discriminant); float sqrtd = sqrt(discriminant);
// Find the nearest root that lies in the acceptable range // Find the nearest root that lies in the acceptable range
// Part 3 float root = (-half_b - sqrtd) / a;
double root = (-half_b - sqrtd) / a;
if (root < t_min || t_max < root) if (root < t_min || t_max < root)
{ {
root = (-half_b + sqrtd) / a; root = (-half_b + sqrtd) / a;
if (root < t_min || t_max < root) if (root < t_min || t_max < root)
return false; return false;
} }
// Part 4
rec.t = root; rec.t = root;
rec.p = r.at(rec.t); rec.p = r.at(rec.t);
vec3 outward_normal = (rec.p - center) / radius; vec3 outward_normal = (rec.p - center) / radius;
rec.set_face_normal(r, outward_normal); rec.set_face_normal(r, outward_normal);
rec.mat_ptr = mat_ptr; rec.mat_ptr = mat_ptr;
return true; return true;
} }

45
timer.hpp Normal file
View file

@ -0,0 +1,45 @@
#include <x86intrin.h>
#include "rtweekend.hpp"
#ifdef DEBUG
#define TIMED_BLOCK__(number, ...) timed_block timed_block_##Number(__COUNTER__, __FILE__, __LINE__, __FUNCTION__, ##__VA_ARGS__)
#define TIMED_BLOCK_(number, ...) TIMED_BLOCK__(number, ##__VA_ARGS__)
#define TIMED_BLOCK(...) TIMED_BLOCK_(__LINE__, ##__VA_ARGS__)
#else
#define TIMED_BLOCK(...)
#endif
#ifdef DEBUG
struct debug_record
{
uint64_t cycles;
uint64_t hit_count;
const char *filename;
const char *function_name;
uint32_t line_number;
};
extern debug_record debug_record_array[];
struct timed_block {
// TODO: Thread safety
debug_record *record;
timed_block(int counter, const char *filename, int line_number, const char *function_name, int hit_count = 1)
{
record = debug_record_array + counter;
record->filename = filename;
record->line_number = line_number;
record->function_name = function_name;
record->cycles -= __rdtsc();
record->hit_count += hit_count;
}
~timed_block()
{
record->cycles += __rdtsc();
}
};
#endif

View file

@ -5,12 +5,12 @@
struct vec3 { struct vec3 {
/* Members */ /* Members */
double x; float x;
double y; float y;
double z; float z;
// Constructor proper. Values default to 0 // Constructor proper. Values default to 0
vec3(double x = 0, double y = 0, double z = 0) vec3(float x = 0, float y = 0, float z = 0)
{ {
this->x = x; this->x = x;
this->y = y; this->y = y;
@ -35,7 +35,7 @@ struct vec3 {
} }
// Scalar multiplication // Scalar multiplication
vec3& operator*=(const double t) vec3& operator*=(const float t)
{ {
x *= t; x *= t;
y *= t; y *= t;
@ -44,7 +44,7 @@ struct vec3 {
} }
// Division by a scalar t // Division by a scalar t
vec3& operator/=(const double t) vec3& operator/=(const float t)
{ {
x /= t; x /= t;
y /= t; y /= t;
@ -54,33 +54,33 @@ struct vec3 {
/* Methods */ /* Methods */
double length() const float length() const
{ {
return sqrt(x*x + y*y + z*z); return sqrt(x*x + y*y + z*z);
} }
// Length squared, useful for some calculations // Length squared, useful for some calculations
double length_squared() const float length_squared() const
{ {
return x*x + y*y + z*z; return x*x + y*y + z*z;
} }
// Get a vec3 with random components in the range [0,1) // Get a vec3 with random components in the range [0,1)
inline static vec3 random() inline static vec3 random(int32_t thread_id = 0)
{ {
return vec3(random_double(), random_double(), random_double()); return vec3(random_float(thread_id), random_float(thread_id), random_float(thread_id));
} }
// Get a vec3 with random components in the range [min, max) // Get a vec3 with random components in the range [min, max)
inline static vec3 random(double min, double max) inline static vec3 random(float min, float max, int32_t thread_id = 0)
{ {
return vec3(random_double(min, max), random_double(min, max), random_double(min, max)); return vec3(random_float(min, max, thread_id), random_float(min, max, thread_id), random_float(min, max, thread_id));
} }
// Check if all vector components are near zero // Check if all vector components are near zero
bool near_zero() const bool near_zero() const
{ {
double s = 1e-8; float s = 1e-8;
return (fabs(x) < s) && (fabs(y) < s) && (fabs(z) < s); return (fabs(x) < s) && (fabs(y) < s) && (fabs(z) < s);
} }
}; };
@ -92,6 +92,11 @@ typedef vec3 color;
/* More overloads */ /* More overloads */
inline bool operator==(const vec3 &u, const vec3 &v)
{
return u.x == v.x && u.y == v.y && u.z == v.z;
}
// Straightforward vector sum // Straightforward vector sum
inline vec3 operator+(const vec3 &u, const vec3 &v) inline vec3 operator+(const vec3 &u, const vec3 &v)
{ {
@ -117,24 +122,24 @@ inline vec3 operator*(const vec3 &u, const vec3 &v)
} }
// Scalar product // Scalar product
inline vec3 operator*(double t,const vec3 &v) inline vec3 operator*(float t,const vec3 &v)
{ {
return vec3(t*v.x, t*v.y, t*v.z); return vec3(t*v.x, t*v.y, t*v.z);
} }
inline vec3 operator*(const vec3 &v, double t) inline vec3 operator*(const vec3 &v, float t)
{ {
return t * v; return t * v;
} }
// Vector division by scalar. Note that we redefine it as multiplying by 1/t to avoid division by 0 // Vector division by scalar. Note that we redefine it as multiplying by 1/t to avoid division by 0
inline vec3 operator/(vec3 v, double t) inline vec3 operator/(vec3 v, float t)
{ {
return 1/t * v; return 1/t * v;
} }
// Straightforward dot product // Straightforward dot product
inline double dot(const vec3 &u, const vec3 &v) inline float dot(const vec3 &u, const vec3 &v)
{ {
return u.x*v.x + u.y*v.y + u.z*v.z; return u.x*v.x + u.y*v.y + u.z*v.z;
@ -157,12 +162,12 @@ inline vec3 normalize(const vec3 v)
} }
// Returns a vec3 of random components between [-1,1) that is inside a unit sphere // Returns a vec3 of random components between [-1,1) that is inside a unit sphere
vec3 random_in_unit_sphere() vec3 random_in_unit_sphere(int32_t thread_id)
{ {
// Iterate until we find a vector with length < 1 // Iterate until we find a vector with length < 1
while (true) while (true)
{ {
vec3 p = vec3::random(-1,1); vec3 p = vec3::random(-1,1, thread_id);
if (p.length_squared() >= 1) if (p.length_squared() >= 1)
continue; continue;
return p; return p;
@ -170,14 +175,14 @@ vec3 random_in_unit_sphere()
} }
// Returns a normalized version of the above vector // Returns a normalized version of the above vector
vec3 random_unit_vector() vec3 random_unit_vector(int32_t thread_id)
{ {
return normalize(random_in_unit_sphere()); return normalize(random_in_unit_sphere(thread_id));
} }
vec3 random_in_hemisphere(const vec3& normal) vec3 random_in_hemisphere(const vec3& normal, int32_t thread_id)
{ {
vec3 in_unit_sphere = random_in_unit_sphere(); vec3 in_unit_sphere = random_in_unit_sphere(thread_id);
if (dot(in_unit_sphere, normal) > 0.0) if (dot(in_unit_sphere, normal) > 0.0)
return in_unit_sphere; return in_unit_sphere;
@ -191,19 +196,19 @@ vec3 reflect(const vec3& v, const vec3 n)
return v - 2*dot(v,n)*n; return v - 2*dot(v,n)*n;
} }
vec3 refract (const vec3& uv, const vec3& n, double etai_over_etat) vec3 refract(const vec3& uv, const vec3& n, float etai_over_etat)
{ {
double cos_theta = fmin(dot(-uv, n), 1.0); float cos_theta = fmin(dot(-uv, n), 1.0);
vec3 r_out_perp = etai_over_etat * (uv + cos_theta*n); vec3 r_out_perp = etai_over_etat * (uv + cos_theta*n);
vec3 r_out_parallel = -sqrt(fabs(1.0 - r_out_perp.length_squared())) * n; vec3 r_out_parallel = -sqrt(fabs(1.0 - r_out_perp.length_squared())) * n;
return r_out_perp + r_out_parallel; return r_out_perp + r_out_parallel;
} }
vec3 random_in_unit_disk() vec3 random_in_unit_disk(int32_t thread_id)
{ {
while (true) while (true)
{ {
auto p = vec3(random_double(-1,1), random_double(-1,1), 0); auto p = vec3(random_float(-1,1,thread_id), random_float(-1,1,thread_id), 0);
if (p.length_squared() >= 1) continue; if (p.length_squared() >= 1) continue;
return p; return p;
} }