Hi,
I'm running many iterations of a compute shader and reading / writing values to memory using imageLoad / imageStore.
A call to glMemoryBarrier after every iteration should ensure memory access is synchronized between compute calls. Works fine for pixel shaders, but not with compute shaders.
Shader:
#version 430 layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; layout(binding=0, rgba32f) uniform image2DRect data; void main() { ivec2 c = ivec2(gl_WorkGroupID.xy * 8 + gl_LocalInvocationID.xy); vec4 v = imageLoad(data, c); imageStore(data, c, v + vec4(1)); }
Program:
float* data = new float[16 * 16 * 4]; memset(data, 0, 16 * 16 * 4 * sizeof(float)); glGenTextures(1, &image); glBindTexture(GL_TEXTURE_RECTANGLE, image); glTexImage2D(GL_TEXTURE_RECTANGLE, 0, GL_RGBA32F, 16, 16, 0, GL_RGBA, GL_FLOAT, data); glBindTexture(GL_TEXTURE_RECTANGLE, 0); glUseProgram(program); glBindImageTexture(0, image, 0, false, 0, GL_READ_WRITE, GL_RGBA32F); for (int i = 0; i < 99; ++i) { glDispatchCompute(2, 2, 1); glMemoryBarrier(GL_ALL_BARRIER_BITS); } glBindTexture(GL_TEXTURE_RECTANGLE, image); glGetTexImage(GL_TEXTURE_RECTANGLE, 0, GL_RGBA, GL_FLOAT, data); for (int y = 0; y < 16; ++y) { for (int x = 0; x < 16; ++x) { int i = y * 16 * 4 + x * 4; cout << data[i] << " "; } cout << endl; }
Results (texel values):
Expected: 99
On HD 5770: 99
On HD 7850: 24
On HD 7970: 13
When using glFlush() every iteration: 99 (all cards)
Tested with Catalyst 13.6 beta / 13.8 beta