#include #include #include #include #define RMT_ENABLED 0 // Lib includes #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" #pragma GCC diagnostic ignored "-Wunused-variable" #pragma GCC diagnostic ignored "-Wsign-compare" #include #pragma GCC diagnostic pop #include // Internal includes #include "rtweekend.hpp" #include "color.hpp" #include "hittable_list.hpp" #include "sphere.hpp" #include "camera.hpp" #ifdef DEBUG #define print_timers() print_timers_() #else #define print_timers() #endif // Threading structs struct thread_args { int32_t thread_id; int32_t start; int32_t end; }; // Function signatures color ray_color(const ray& r, const hittable& world, int32_t depth); float hit_sphere(const point3& center, float radius, const ray& r); void *raytrace_lines(void *arg); hittable_list random_scene(); // Global vars indicators::DynamicProgress progress_bars; const char *default_file = "image.ppm"; FILE *output_file_handle; // Image float aspect_ratio; int32_t image_width; int32_t image_height; int32_t samples_per_pixel; int32_t max_depth; color *image; uint64_t bytes_per_line; uint64_t bytes_per_pixel; // World hittable_list world; camera *global_camera; hittable_list random_scene() { hittable_list world; auto ground_material = make_shared(color(0.5, 0.5, 0.5)); world.add(sphere(point3(0,-1000,0), 1000, ground_material)); for (int32_t a = -11; a < 11; a++) { for (int32_t b = -11; b < 11; b++) { float choose_mat = random_float(); point3 center(a + 0.9*random_float(), 0.2, b + 0.9*random_float()); if ((center - point3(4, 0.2, 0)).length() > 0.9) { shared_ptr sphere_material; if (choose_mat < 0.8) { // diffuse color albedo = color::random() * color::random(); sphere_material = make_shared(albedo); world.add(sphere(center, 0.2, sphere_material)); } else if (choose_mat < 0.95) { // metal color albedo = color::random(0.5, 1); float fuzz = random_float(0, 0.5); sphere_material = make_shared(albedo, fuzz); world.add(sphere(center, 0.2, sphere_material)); } else { // glass sphere_material = make_shared(1.5); world.add(sphere(center, 0.2, sphere_material)); } } } } auto material1 = make_shared(1.5); world.add(sphere(point3(0, 1, 0), 1.0, material1)); auto material2 = make_shared(color(0.4, 0.2, 0.1)); world.add(sphere(point3(-4, 1, 0), 1.0, material2)); auto material3 = make_shared(color(0.7, 0.6, 0.5), 0.0); world.add(sphere(point3(4, 1, 0), 1.0, material3)); return world; } template color ray_color(const ray& r, hittable_list& world, int32_t depth) { rmt_ScopedCPUSample(Scatter, RMTSF_Aggregate | RMTSF_Recursive); if (depth <= 0) { return color(0,0,0); } hit_record rec; if (world.hit(r, 0.001, INFINITY, rec)) { ray scattered; color attenuation; rmt_BeginCPUSample(Scatter, RMTSF_Aggregate); bool visible = rec.mat_ptr->scatter(r, rec, attenuation, scattered); rmt_EndCPUSample(); if (visible) { return attenuation * ray_color(scattered, world, depth-1); } else { return color(0,0,0); } } vec3 unit_direction = normalize(r.direction); float t = 0.5 * (unit_direction.y + 1.0); return (1-t) * color(1,1,1) + t*color(0.5,0.7,1.0); } float hit_sphere(const point3& center, float radius, const ray& r) { vec3 oc = r.origin - center; float a = r.direction.length_squared(); float half_b = dot(oc, r.direction); float c = oc.length_squared() - radius*radius; float discriminant = half_b*half_b - a*c; if (discriminant < 0) return -1; else return (-half_b - sqrt(discriminant)) / a; } int32_t main(int argc, char *argv[]) { /* Argument parsing */ int32_t c; bool using_default_output = true; while (1) { static struct option long_options[] = { {"output", required_argument, 0, 'o'}, {0, 0, 0, 0} }; /* getopt_long stores the option index here. */ int option_index = 0; c = getopt_long (argc, argv, "o:", long_options, &option_index); /* Detect the end of the options. */ if (c == -1) break; switch (c) { case 0: /* If this option set a flag, do nothing else now. */ if (long_options[option_index].flag != 0) break; printf ("option %s", long_options[option_index].name); if (optarg) printf (" with arg %s", optarg); printf ("\n"); break; case 'o': using_default_output = false; output_file_handle = fopen(optarg, "w"); break; case '?': /* getopt_long already printed an error message. */ break; default: abort(); } } if (using_default_output) { output_file_handle = fopen(default_file, "w"); } /* Profiling library initialization */ Remotery *rmt; if (RMT_ERROR_NONE != rmt_CreateGlobalInstance(&rmt)) { fprintf(stderr, "Error starting Remotery\n"); } //indicators::show_console_cursor(false); // Image aspect_ratio = 3.0 / 2.0; image_width = 1200; image_height = (int32_t) (image_width / aspect_ratio); samples_per_pixel = 500; max_depth = 50; image = (color*) malloc(image_width * image_height * sizeof(color)); bytes_per_line = sizeof(color) * image_width; bytes_per_pixel = sizeof(color); if (getenv("SPP")) { samples_per_pixel = strtol(getenv("SPP"), NULL, 10); } // World world = random_scene(); // Camera point3 lookfrom(13,2,3); point3 lookat(0,0,0); vec3 vup(0,1,0); float dist_to_focus = 10.0; float aperture = 0.1; camera cam = camera(lookfrom, lookat, vup, 20, aspect_ratio, aperture, dist_to_focus); global_camera = &cam; // Render fprintf(output_file_handle, "P3\n%d %d\n255\n", image_width, image_height); int32_t ncores = sysconf(_SC_NPROCESSORS_ONLN); // Get the number of logical CPUs std::vector threads; std::vector args; threads.reserve(ncores); args.reserve(ncores); std::vector bar_memory; bar_memory.reserve(ncores); for (int32_t i = 0; i < ncores; ++i) { bar_memory[i] = new indicators::BlockProgressBar{indicators::option::BarWidth{50}, indicators::option::ForegroundColor{indicators::Color::white}, indicators::option::ShowElapsedTime{true}, indicators::option::ShowRemainingTime{true}, indicators::option::PrefixText{"Thread #" + std::to_string(i)} }; progress_bars.push_back(*bar_memory[i]); int32_t start; int32_t end; // Divide work among cores start = image_height/ncores * i; end = image_height/ncores * (i+1); // Make sure we complete the whole picture even if the work is not perfectly divisible if (i == ncores) end = image_height; args[i].start = start; args[i].end = end; args[i].thread_id = i; // TODO: Check for errors pthread_create(&threads[i], NULL, raytrace_lines, &args[i]); } for (int32_t i = 0; i < ncores; ++i) { switch (pthread_join(threads[i], NULL)) { case EDEADLK: fprintf(stderr, "A deadlock was detected (e.g., two threads tried to join with each other); or thread specifies the calling thread.\n"); break; case EINVAL: fprintf(stderr, "thread is not a joinable thread OR\n" "Another thread is already waiting to join with this thread.\n"); break; case ESRCH: fprintf(stderr, "No thread with the ID thread could be found.\n"); break; default: break; } } write_image(image, image_width*image_height, output_file_handle, samples_per_pixel); /* Obsolete non-threaded implementation */ // for (int32_t j = image_height - 1; j >= 0; --j) // { // rmt_ScopedCPUSample(OuterLoop, RMTSF_Aggregate); // fprintf(stderr, "\rScanlines remaining: %d ", j); // print_timers(); // fflush(stderr); // for (int32_t i = 0; i < image_width; ++i) // { // rmt_ScopedCPUSample(InnerLoop, RMTSF_Aggregate); // color pixel_color = color(0,0,0); // for (int32_t s = 0; s < samples_per_pixel; ++s) // { // float u = ((i + random_float()) / (image_width-1)); // float v = ((j + random_float()) / (image_height-1)); // ray r = cam.get_ray(u,v); // pixel_color += ray_color(r, world, max_depth); // } // write_color(output_file_handle, pixel_color, samples_per_pixel); // } // } fprintf(stderr, "\nDone\n"); rmt_DestroyGlobalInstance(rmt); free(image); fclose(output_file_handle); //indicators::show_console_cursor(true); } void *raytrace_lines(void *arg) { thread_args arguments = *((thread_args*)arg); int32_t start = arguments.start; int32_t end = arguments.end; int32_t thread_id = arguments.thread_id; for (int32_t j = end - 1; j >= start; --j) { int32_t lines_expected = end-start; int32_t lines_completed = end-j; progress_bars[thread_id].set_option(indicators::option::PostfixText{std::to_string(lines_completed) + "/" + std::to_string(lines_expected)}); progress_bars[thread_id].set_progress(((float)lines_completed/lines_expected)*100); rmt_ScopedCPUSample(OuterLoop, RMTSF_Aggregate); for (int32_t i = 0; i < image_width; ++i) { color pixel_color = color(0,0,0); for (int32_t s = 0; s < samples_per_pixel; ++s) { float u = ((i + random_float()) / (image_width-1)); float v = ((j + random_float()) / (image_height-1)); ray r = global_camera->get_ray(u,v); pixel_color += ray_color(r, world, max_depth); } int32_t index = j * image_width + i; image[index] = pixel_color; } } return nullptr; } #ifdef DEBUG debug_record debug_record_array[__COUNTER__]; void print_timers_() { for (uint32_t i = 0; i < sizeof(debug_record_array) / sizeof(debug_record_array[0]); ++i) { debug_record *record = &debug_record_array[i]; fprintf(stderr, "%d: %s:%s:%d; " "Cycles = %ld; " "Hit count %ld; " "Cycles/hit %f; " "Time %f", i, record->filename, record->function_name, record->line_number, record->cycles, record->hit_count, (double)record->cycles / record->hit_count, (double)record->cycles / CLOCKS_PER_SEC); } } #endif