Cache block positions at startup
Description
In our last change, we were uploading all block positions at once into a vertex buffer. This drastically cut down on the number of glDraw calls we needed to make.
However, we're spending a lot of time compiling this list of positions each frame, even though they aren't changing. The flame graph shows that we're spending most of our CPU time collecting these into a Rust vector.
With this change, we upload the block positions once at startup and cache the number of blocks to render. Each frame we just set the camera position and instruct OpenGL to render the same number of blocks. I'm able to comfortably render 1000x1000 = 1 million blocks per frame at 60 FPS.
The new flame graph is interesting as well. A large portion is dedicated
to setting up the window and OpenGL config (which does not take a
noticeable amount of time), and the rest is a smattering of OpenGL calls
in the event loop. I suspect this means that our application isn't
spending much time on the CPU, probably because we've offloaded all of
the work to the GPU. Each time we're calling present
, we're waiting
for the monitor refresh rate to do anything (if VSync is enabled).
Commands
git clone git@github.com:atsheehan/iridium
cd iridium
git checkout 2e62a8266cacb3d1c045aaa63b152789714edc06
cargo run --release
Code Changes
Modified analysis/flamegraph.svgGitHub
Modified src/main.rsGitHub
@@ -24,7 +24,8 @@2424 let event_loop = EventLoop::new().unwrap();
2525 let mut renderer = Renderer::new(&event_loop, options.windowed);
2626
27- let mut world = World::new(400, 400);
27+ let mut world = World::new(1000, 1000);
28+ renderer.update_block_cache(world.block_positions());
2829
2930 let mut last_instant = Instant::now();
3031 let mut fps_counter = FrameCounter::new(last_instant);
@@ -117,7 +118,7 @@117118 renderer.set_camera(world.camera());
118119 renderer.clear();
119120
120- renderer.draw_cubes(world.block_positions());
121+ renderer.draw_cubes();
121122
122123 renderer.present();
123124 fps_counter.finish_frame(current_instant);
@@ -24,7 +24,8 @@24 let event_loop = EventLoop::new().unwrap();
25 let mut renderer = Renderer::new(&event_loop, options.windowed);
26
27- let mut world = World::new(400, 400);
28
29 let mut last_instant = Instant::now();
30 let mut fps_counter = FrameCounter::new(last_instant);
@@ -117,7 +118,7 @@117 renderer.set_camera(world.camera());
118 renderer.clear();
119
120- renderer.draw_cubes(world.block_positions());
121
122 renderer.present();
123 fps_counter.finish_frame(current_instant);
@@ -24,7 +24,8 @@24 let event_loop = EventLoop::new().unwrap();
25 let mut renderer = Renderer::new(&event_loop, options.windowed);
26
27+ let mut world = World::new(1000, 1000);
28+ renderer.update_block_cache(world.block_positions());
29
30 let mut last_instant = Instant::now();
31 let mut fps_counter = FrameCounter::new(last_instant);
@@ -117,7 +118,7 @@118 renderer.set_camera(world.camera());
119 renderer.clear();
120
121+ renderer.draw_cubes();
122
123 renderer.present();
124 fps_counter.finish_frame(current_instant);
Modified src/render.rsGitHub
@@ -34,6 +34,8 @@3434 cube_program: Program,
3535 cube_vertex_array_id: GLuint,
3636 cube_texture_id: GLuint,
37+ position_array_buffer_id: GLuint,
38+ cube_count: usize,
3739 }
3840
3941 impl Renderer {
@@ -94,14 +96,15 @@9496 cube_vertex_array_id
9597 };
9698
97- unsafe {
99+ let position_array_buffer_id = unsafe {
98100 let mut position_array_id = 0;
99101 gl::GenBuffers(1, &mut position_array_id);
100102 gl::BindBuffer(gl::ARRAY_BUFFER, position_array_id);
101103 gl::EnableVertexAttribArray(0);
102104 gl::VertexAttribPointer(0, 3, gl::FLOAT, gl::FALSE, 0, std::ptr::null());
103105 gl::VertexAttribDivisor(0, 1);
104- }
106+ position_array_id
107+ };
105108
106109 let cube_texture_id = unsafe {
107110 let mut cube_texture_id = 0;
@@ -154,6 +157,8 @@154157 cube_program,
155158 cube_vertex_array_id,
156159 cube_texture_id,
160+ position_array_buffer_id,
161+ cube_count: 0,
157162 }
158163 }
159164
@@ -167,26 +172,30 @@167172 }
168173 }
169174
170- pub(crate) fn draw_cubes(&mut self, positions: impl Iterator<Item = Vec3>) {
175+ pub(crate) fn draw_cubes(&mut self) {
176+ unsafe {
177+ gl::UseProgram(self.cube_program.gl_id());
178+ gl::BindVertexArray(self.cube_vertex_array_id);
179+ gl::BindTexture(gl::TEXTURE_2D, self.cube_texture_id);
180+ gl::DrawArraysInstanced(gl::TRIANGLES, 0, 36, self.cube_count as GLint);
181+ }
182+ }
183+
184+ pub(crate) fn update_block_cache(&mut self, positions: impl Iterator<Item = Vec3>) {
171185 let position_buffer: Vec<f32> = positions
172186 .flat_map(|position| [position.x(), position.y(), position.z()])
173187 .collect();
174188
175- let num_instances = position_buffer.len() / 3;
189+ self.cube_count = position_buffer.len() / 3;
176190
177191 unsafe {
178- gl::UseProgram(self.cube_program.gl_id());
179- gl::BindVertexArray(self.cube_vertex_array_id);
180- gl::BindTexture(gl::TEXTURE_2D, self.cube_texture_id);
181-
192+ gl::BindBuffer(gl::ARRAY_BUFFER, self.position_array_buffer_id);
182193 gl::BufferData(
183194 gl::ARRAY_BUFFER,
184- (std::mem::size_of::<f32>() * 3 * num_instances) as isize,
195+ (std::mem::size_of::<f32>() * 3 * self.cube_count) as isize,
185196 position_buffer.as_ptr() as *const c_void,
186197 gl::STATIC_DRAW,
187198 );
188-
189- gl::DrawArraysInstanced(gl::TRIANGLES, 0, 36, num_instances as GLint);
190199 }
191200 }
192201
@@ -34,6 +34,8 @@34 cube_program: Program,
35 cube_vertex_array_id: GLuint,
36 cube_texture_id: GLuint,
37 }
38
39 impl Renderer {
@@ -94,14 +96,15 @@94 cube_vertex_array_id
95 };
96
97- unsafe {
98 let mut position_array_id = 0;
99 gl::GenBuffers(1, &mut position_array_id);
100 gl::BindBuffer(gl::ARRAY_BUFFER, position_array_id);
101 gl::EnableVertexAttribArray(0);
102 gl::VertexAttribPointer(0, 3, gl::FLOAT, gl::FALSE, 0, std::ptr::null());
103 gl::VertexAttribDivisor(0, 1);
104- }
105
106 let cube_texture_id = unsafe {
107 let mut cube_texture_id = 0;
@@ -154,6 +157,8 @@154 cube_program,
155 cube_vertex_array_id,
156 cube_texture_id,
157 }
158 }
159
@@ -167,26 +172,30 @@167 }
168 }
169
170- pub(crate) fn draw_cubes(&mut self, positions: impl Iterator<Item = Vec3>) {
171 let position_buffer: Vec<f32> = positions
172 .flat_map(|position| [position.x(), position.y(), position.z()])
173 .collect();
174
175- let num_instances = position_buffer.len() / 3;
176
177 unsafe {
178- gl::UseProgram(self.cube_program.gl_id());
179- gl::BindVertexArray(self.cube_vertex_array_id);
180- gl::BindTexture(gl::TEXTURE_2D, self.cube_texture_id);
181-
182 gl::BufferData(
183 gl::ARRAY_BUFFER,
184- (std::mem::size_of::<f32>() * 3 * num_instances) as isize,
185 position_buffer.as_ptr() as *const c_void,
186 gl::STATIC_DRAW,
187 );
188-
189- gl::DrawArraysInstanced(gl::TRIANGLES, 0, 36, num_instances as GLint);
190 }
191 }
192
@@ -34,6 +34,8 @@34 cube_program: Program,
35 cube_vertex_array_id: GLuint,
36 cube_texture_id: GLuint,
37+ position_array_buffer_id: GLuint,
38+ cube_count: usize,
39 }
40
41 impl Renderer {
@@ -94,14 +96,15 @@96 cube_vertex_array_id
97 };
98
99+ let position_array_buffer_id = unsafe {
100 let mut position_array_id = 0;
101 gl::GenBuffers(1, &mut position_array_id);
102 gl::BindBuffer(gl::ARRAY_BUFFER, position_array_id);
103 gl::EnableVertexAttribArray(0);
104 gl::VertexAttribPointer(0, 3, gl::FLOAT, gl::FALSE, 0, std::ptr::null());
105 gl::VertexAttribDivisor(0, 1);
106+ position_array_id
107+ };
108
109 let cube_texture_id = unsafe {
110 let mut cube_texture_id = 0;
@@ -154,6 +157,8 @@157 cube_program,
158 cube_vertex_array_id,
159 cube_texture_id,
160+ position_array_buffer_id,
161+ cube_count: 0,
162 }
163 }
164
@@ -167,26 +172,30 @@172 }
173 }
174
175+ pub(crate) fn draw_cubes(&mut self) {
176+ unsafe {
177+ gl::UseProgram(self.cube_program.gl_id());
178+ gl::BindVertexArray(self.cube_vertex_array_id);
179+ gl::BindTexture(gl::TEXTURE_2D, self.cube_texture_id);
180+ gl::DrawArraysInstanced(gl::TRIANGLES, 0, 36, self.cube_count as GLint);
181+ }
182+ }
183+
184+ pub(crate) fn update_block_cache(&mut self, positions: impl Iterator<Item = Vec3>) {
185 let position_buffer: Vec<f32> = positions
186 .flat_map(|position| [position.x(), position.y(), position.z()])
187 .collect();
188
189+ self.cube_count = position_buffer.len() / 3;
190
191 unsafe {
192+ gl::BindBuffer(gl::ARRAY_BUFFER, self.position_array_buffer_id);
193 gl::BufferData(
194 gl::ARRAY_BUFFER,
195+ (std::mem::size_of::<f32>() * 3 * self.cube_count) as isize,
196 position_buffer.as_ptr() as *const c_void,
197 gl::STATIC_DRAW,
198 );
199 }
200 }
201