448 lines
12 KiB
C++
448 lines
12 KiB
C++
#include <stdio.h>
|
|
#include <stdint.h>
|
|
#include <stdlib.h>
|
|
#include <getopt.h>
|
|
#include <time.h>
|
|
|
|
#define RMT_ENABLED 0
|
|
|
|
// Lib includes
|
|
#pragma GCC diagnostic push
|
|
#pragma GCC diagnostic ignored "-Wunused-parameter"
|
|
#pragma GCC diagnostic ignored "-Wunused-variable"
|
|
#pragma GCC diagnostic ignored "-Wsign-compare"
|
|
#include <Remotery.c>
|
|
#pragma GCC diagnostic pop
|
|
|
|
#include <indicators.hpp>
|
|
|
|
// Internal includes
|
|
#include "rtweekend.hpp"
|
|
#include "color.hpp"
|
|
#include "hittable_list.hpp"
|
|
#include "sphere.hpp"
|
|
#include "camera.hpp"
|
|
|
|
#ifdef DEBUG
|
|
#define print_timers() print_timers_()
|
|
#else
|
|
#define print_timers()
|
|
#endif
|
|
|
|
// Threading structs
|
|
struct thread_args
|
|
{
|
|
int32_t thread_id;
|
|
int32_t start;
|
|
int32_t end;
|
|
};
|
|
|
|
// Function signatures
|
|
|
|
color ray_color(const ray& r, const hittable& world, int32_t depth);
|
|
float hit_sphere(const point3& center, float radius, const ray& r);
|
|
void *raytrace_lines(void *arg);
|
|
hittable_list<sphere> random_scene();
|
|
|
|
// Global vars
|
|
indicators::DynamicProgress<indicators::BlockProgressBar> progress_bars;
|
|
const char *default_file = "image.ppm";
|
|
FILE *output_file_handle;
|
|
|
|
// Image
|
|
float aspect_ratio;
|
|
int32_t image_width;
|
|
int32_t image_height;
|
|
int32_t samples_per_pixel;
|
|
int32_t max_depth;
|
|
|
|
color *image;
|
|
uint64_t bytes_per_line;
|
|
uint64_t bytes_per_pixel;
|
|
|
|
// World
|
|
hittable_list<sphere> world;
|
|
camera *global_camera;
|
|
|
|
hittable_list<sphere> random_scene() {
|
|
hittable_list<sphere> world;
|
|
|
|
auto ground_material = make_shared<lambertian>(color(0.5, 0.5, 0.5));
|
|
world.add(sphere(point3(0,-1000,0), 1000, ground_material));
|
|
|
|
for (int32_t a = -11; a < 11; a++)
|
|
{
|
|
for (int32_t b = -11; b < 11; b++)
|
|
{
|
|
float choose_mat = random_float();
|
|
point3 center(a + 0.9*random_float(), 0.2, b + 0.9*random_float());
|
|
|
|
if ((center - point3(4, 0.2, 0)).length() > 0.9)
|
|
{
|
|
shared_ptr<material> sphere_material;
|
|
if (choose_mat < 0.8)
|
|
{
|
|
// diffuse
|
|
color albedo = color::random() * color::random();
|
|
sphere_material = make_shared<lambertian>(albedo);
|
|
world.add(sphere(center, 0.2, sphere_material));
|
|
}
|
|
else if (choose_mat < 0.95)
|
|
{
|
|
// metal
|
|
color albedo = color::random(0.5, 1);
|
|
float fuzz = random_float(0, 0.5);
|
|
sphere_material = make_shared<metal>(albedo, fuzz);
|
|
world.add(sphere(center, 0.2, sphere_material));
|
|
}
|
|
else
|
|
{
|
|
// glass
|
|
sphere_material = make_shared<dielectric>(1.5);
|
|
world.add(sphere(center, 0.2, sphere_material));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
auto material1 = make_shared<dielectric>(1.5);
|
|
world.add(sphere(point3(0, 1, 0), 1.0, material1));
|
|
|
|
auto material2 = make_shared<lambertian>(color(0.4, 0.2, 0.1));
|
|
world.add(sphere(point3(-4, 1, 0), 1.0, material2));
|
|
|
|
auto material3 = make_shared<metal>(color(0.7, 0.6, 0.5), 0.0);
|
|
world.add(sphere(point3(4, 1, 0), 1.0, material3));
|
|
|
|
return world;
|
|
}
|
|
|
|
template<typename T>
|
|
color ray_color(const ray& r, hittable_list<T>& world, int32_t depth, int32_t thread_id)
|
|
{
|
|
rmt_ScopedCPUSample(Scatter, RMTSF_Aggregate | RMTSF_Recursive);
|
|
if (depth <= 0)
|
|
{
|
|
return color(0,0,0);
|
|
}
|
|
|
|
hit_record rec;
|
|
if (world.hit(r, 0.001, INFINITY, rec))
|
|
{
|
|
ray scattered;
|
|
color attenuation;
|
|
rmt_BeginCPUSample(Scatter, RMTSF_Aggregate);
|
|
bool visible = rec.mat_ptr->scatter(r, rec, attenuation, scattered, thread_id);
|
|
rmt_EndCPUSample();
|
|
if (visible)
|
|
{
|
|
return attenuation * ray_color(scattered, world, depth-1, thread_id);
|
|
}
|
|
else
|
|
{
|
|
return color(0,0,0);
|
|
}
|
|
}
|
|
vec3 unit_direction = normalize(r.direction);
|
|
float t = 0.5 * (unit_direction.y + 1.0);
|
|
return (1-t) * color(1,1,1) + t*color(0.5,0.7,1.0);
|
|
}
|
|
|
|
float hit_sphere(const point3& center, float radius, const ray& r)
|
|
{
|
|
vec3 oc = r.origin - center;
|
|
float a = r.direction.length_squared();
|
|
float half_b = dot(oc, r.direction);
|
|
float c = oc.length_squared() - radius*radius;
|
|
float discriminant = half_b*half_b - a*c;
|
|
|
|
if (discriminant < 0)
|
|
return -1;
|
|
else
|
|
return (-half_b - sqrt(discriminant)) / a;
|
|
}
|
|
int32_t main(int argc, char *argv[])
|
|
{
|
|
|
|
|
|
|
|
|
|
/* Argument parsing */
|
|
int32_t c;
|
|
bool using_default_output = true;
|
|
|
|
while (1)
|
|
{
|
|
static struct option long_options[] =
|
|
{
|
|
{"output", required_argument, 0, 'o'},
|
|
{0, 0, 0, 0}
|
|
};
|
|
/* getopt_long stores the option index here. */
|
|
int option_index = 0;
|
|
|
|
c = getopt_long (argc, argv, "o:",
|
|
long_options, &option_index);
|
|
|
|
/* Detect the end of the options. */
|
|
if (c == -1)
|
|
break;
|
|
|
|
switch (c)
|
|
{
|
|
case 0:
|
|
/* If this option set a flag, do nothing else now. */
|
|
if (long_options[option_index].flag != 0)
|
|
break;
|
|
printf ("option %s", long_options[option_index].name);
|
|
if (optarg)
|
|
printf (" with arg %s", optarg);
|
|
printf ("\n");
|
|
break;
|
|
|
|
case 'o':
|
|
using_default_output = false;
|
|
output_file_handle = fopen(optarg, "w");
|
|
break;
|
|
|
|
case '?':
|
|
/* getopt_long already printed an error message. */
|
|
break;
|
|
|
|
default:
|
|
abort();
|
|
}
|
|
}
|
|
|
|
if (using_default_output)
|
|
{
|
|
output_file_handle = fopen(default_file, "w");
|
|
}
|
|
|
|
|
|
/* Profiling library initialization */
|
|
Remotery *rmt;
|
|
if (RMT_ERROR_NONE != rmt_CreateGlobalInstance(&rmt))
|
|
{
|
|
fprintf(stderr, "Error starting Remotery\n");
|
|
}
|
|
|
|
//indicators::show_console_cursor(false);
|
|
|
|
|
|
// Get the number of logical CPUs
|
|
int32_t ncores = sysconf(_SC_NPROCESSORS_ONLN);
|
|
// Initialize and seed the random number generators
|
|
pcg_table = (pcg32_random_t *) malloc(sizeof(pcg32_random_t) * ncores);
|
|
for (int32_t i = 0; i < ncores; ++i)
|
|
{
|
|
struct timespec ts;
|
|
if (timespec_get(&ts, TIME_UTC))
|
|
{
|
|
// Use higher quality seed
|
|
uint64_t seed = (uint64_t)(ts.tv_nsec ^ ts.tv_sec);
|
|
pcg_table[i] = { seed, seed };
|
|
}
|
|
else
|
|
{
|
|
// Error, use default seed
|
|
pcg_table[i] = default_pcg;
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
// Image
|
|
aspect_ratio = 3.0 / 2.0;
|
|
image_width = 1200;
|
|
image_height = (int32_t) (image_width / aspect_ratio);
|
|
samples_per_pixel = 500;
|
|
max_depth = 50;
|
|
|
|
image = (color*) malloc(image_width * image_height * sizeof(color));
|
|
bytes_per_line = sizeof(color) * image_width;
|
|
bytes_per_pixel = sizeof(color);
|
|
|
|
if (getenv("SPP"))
|
|
{
|
|
samples_per_pixel = strtol(getenv("SPP"), NULL, 10);
|
|
}
|
|
|
|
// World
|
|
world = random_scene();
|
|
|
|
// Camera
|
|
point3 lookfrom(13,2,3);
|
|
point3 lookat(0,0,0);
|
|
vec3 vup(0,1,0);
|
|
float dist_to_focus = 10.0;
|
|
float aperture = 0.1;
|
|
|
|
camera cam = camera(lookfrom, lookat, vup, 20, aspect_ratio, aperture, dist_to_focus);
|
|
global_camera = &cam;
|
|
|
|
// Render
|
|
fprintf(output_file_handle, "P3\n%d %d\n255\n", image_width, image_height);
|
|
|
|
|
|
std::vector<pthread_t> threads;
|
|
std::vector<thread_args> args;
|
|
threads.reserve(ncores);
|
|
args.reserve(ncores);
|
|
|
|
std::vector<indicators::BlockProgressBar*> bar_memory;
|
|
bar_memory.reserve(ncores);
|
|
|
|
for (int32_t i = 0; i < ncores; ++i)
|
|
{
|
|
|
|
bar_memory[i] = new indicators::BlockProgressBar{indicators::option::BarWidth{50},
|
|
indicators::option::ForegroundColor{indicators::Color::white},
|
|
indicators::option::ShowElapsedTime{true},
|
|
indicators::option::ShowRemainingTime{true},
|
|
indicators::option::PrefixText{"Thread #" + std::to_string(i)}
|
|
};
|
|
|
|
progress_bars.push_back(*bar_memory[i]);
|
|
|
|
int32_t start;
|
|
int32_t end;
|
|
|
|
// Divide work among cores
|
|
start = image_height/ncores * i;
|
|
end = image_height/ncores * (i+1);
|
|
|
|
// Make sure we complete the whole picture even if the work is not perfectly divisible
|
|
if (i == ncores)
|
|
end = image_height;
|
|
|
|
args[i].start = start;
|
|
args[i].end = end;
|
|
args[i].thread_id = i;
|
|
|
|
// TODO: Check for errors
|
|
pthread_create(&threads[i], NULL, raytrace_lines, &args[i]);
|
|
|
|
}
|
|
|
|
for (int32_t i = 0; i < ncores; ++i)
|
|
{
|
|
switch (pthread_join(threads[i], NULL))
|
|
{
|
|
case EDEADLK:
|
|
fprintf(stderr, "A deadlock was detected (e.g., two threads tried to join with each other); or thread specifies the calling thread.\n");
|
|
break;
|
|
|
|
case EINVAL:
|
|
fprintf(stderr, "thread is not a joinable thread OR\n"
|
|
"Another thread is already waiting to join with this thread.\n");
|
|
break;
|
|
|
|
case ESRCH:
|
|
fprintf(stderr, "No thread with the ID thread could be found.\n");
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
write_image(image, image_width*image_height, output_file_handle, samples_per_pixel);
|
|
|
|
|
|
|
|
/* Obsolete non-threaded implementation */
|
|
|
|
// for (int32_t j = image_height - 1; j >= 0; --j)
|
|
// {
|
|
// rmt_ScopedCPUSample(OuterLoop, RMTSF_Aggregate);
|
|
// fprintf(stderr, "\rScanlines remaining: %d ", j);
|
|
// print_timers();
|
|
// fflush(stderr);
|
|
|
|
|
|
|
|
// for (int32_t i = 0; i < image_width; ++i)
|
|
// {
|
|
// rmt_ScopedCPUSample(InnerLoop, RMTSF_Aggregate);
|
|
// color pixel_color = color(0,0,0);
|
|
|
|
// for (int32_t s = 0; s < samples_per_pixel; ++s)
|
|
// {
|
|
// float u = ((i + random_float()) / (image_width-1));
|
|
// float v = ((j + random_float()) / (image_height-1));
|
|
// ray r = cam.get_ray(u,v);
|
|
// pixel_color += ray_color(r, world, max_depth);
|
|
// }
|
|
|
|
// write_color(output_file_handle, pixel_color, samples_per_pixel);
|
|
// }
|
|
// }
|
|
|
|
fprintf(stderr, "\nDone\n");
|
|
rmt_DestroyGlobalInstance(rmt);
|
|
free(image);
|
|
fclose(output_file_handle);
|
|
//indicators::show_console_cursor(true);
|
|
}
|
|
|
|
void *raytrace_lines(void *arg)
|
|
{
|
|
|
|
thread_args arguments = *((thread_args*)arg);
|
|
|
|
int32_t start = arguments.start;
|
|
int32_t end = arguments.end;
|
|
int32_t thread_id = arguments.thread_id;
|
|
|
|
for (int32_t j = end - 1; j >= start; --j)
|
|
{
|
|
|
|
int32_t lines_expected = end-start;
|
|
int32_t lines_completed = end-j;
|
|
progress_bars[thread_id].set_option(indicators::option::PostfixText{std::to_string(lines_completed) + "/" + std::to_string(lines_expected)});
|
|
|
|
progress_bars[thread_id].set_progress(((float)lines_completed/lines_expected)*100);
|
|
rmt_ScopedCPUSample(OuterLoop, RMTSF_Aggregate);
|
|
for (int32_t i = 0; i < image_width; ++i)
|
|
{
|
|
color pixel_color = color(0,0,0);
|
|
for (int32_t s = 0; s < samples_per_pixel; ++s)
|
|
{
|
|
float u = ((i + random_float(thread_id)) / (image_width-1));
|
|
float v = ((j + random_float(thread_id)) / (image_height-1));
|
|
ray r = global_camera->get_ray(u,v, thread_id);
|
|
pixel_color += ray_color(r, world, max_depth, thread_id);
|
|
}
|
|
int32_t index = j * image_width + i;
|
|
image[index] = pixel_color;
|
|
}
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
debug_record debug_record_array[__COUNTER__];
|
|
|
|
void print_timers_()
|
|
{
|
|
for (uint32_t i = 0;
|
|
i < sizeof(debug_record_array) / sizeof(debug_record_array[0]);
|
|
++i)
|
|
{
|
|
debug_record *record = &debug_record_array[i];
|
|
fprintf(stderr,
|
|
"%d: %s:%s:%d; "
|
|
"Cycles = %ld; "
|
|
"Hit count %ld; "
|
|
"Cycles/hit %f; "
|
|
"Time %f",
|
|
i, record->filename, record->function_name, record->line_number,
|
|
record->cycles,
|
|
record->hit_count,
|
|
(double)record->cycles / record->hit_count,
|
|
(double)record->cycles / CLOCKS_PER_SEC);
|
|
}
|
|
}
|
|
|
|
#endif
|