diff --git a/hw/xbox/nv2a/nv2a_int.h b/hw/xbox/nv2a/nv2a_int.h index 17c47301985..fead4fd277d 100644 --- a/hw/xbox/nv2a/nv2a_int.h +++ b/hw/xbox/nv2a/nv2a_int.h @@ -263,6 +263,7 @@ typedef struct PGRAPHState { GLuint display_size_loc; GLuint line_offset_loc; GLuint tex_loc; + GLuint vga_framebuffer_tex; GLuint pvideo_tex; GLint pvideo_enable_loc; GLint pvideo_tex_loc; diff --git a/hw/xbox/nv2a/pgraph.c b/hw/xbox/nv2a/pgraph.c index 657aba7027d..0713276a177 100644 --- a/hw/xbox/nv2a/pgraph.c +++ b/hw/xbox/nv2a/pgraph.c @@ -5008,7 +5008,7 @@ static void pgraph_init_display_renderer(NV2AState *d) "{\n" " vec2 texCoord = gl_FragCoord.xy/display_size;\n" " float rel = display_size.y/textureSize(tex, 0).y/line_offset;\n" - " texCoord.y = 1 + rel*(texCoord.y - 1);" + " texCoord.y = 1 + rel*(texCoord.y - 1);\n" " out_Color.rgba = texture(tex, texCoord);\n" " if (pvideo_enable) {\n" " vec2 screenCoord = gl_FragCoord.xy - 0.5;\n" @@ -5041,6 +5041,7 @@ static void pgraph_init_display_renderer(NV2AState *d) glBindBuffer(GL_ARRAY_BUFFER, pg->disp_rndr.vbo); glBufferData(GL_ARRAY_BUFFER, 0, NULL, GL_STATIC_DRAW); glGenFramebuffers(1, &pg->disp_rndr.fbo); + glGenTextures(1, &pg->disp_rndr.vga_framebuffer_tex); glGenTextures(1, &pg->disp_rndr.pvideo_tex); assert(glGetError() == GL_NO_ERROR); } @@ -5146,6 +5147,132 @@ static void pgraph_render_display_pvideo_overlay(NV2AState *d) out_x, out_y, out_width, out_height); } +static void surface_copy_expand_row(uint8_t *out, uint8_t *in, + unsigned int width, + unsigned int bytes_per_pixel, + unsigned int factor) +{ + if (bytes_per_pixel == 4) { + for (unsigned int x = 0; x < width; x++) { + for (unsigned int i = 0; i < factor; i++) { + *(uint32_t *)out = *(uint32_t *)in; + out += bytes_per_pixel; + } + in += bytes_per_pixel; + } + } else if (bytes_per_pixel == 2) { + for (unsigned int x = 0; x < width; x++) { + for (unsigned int i = 0; i < factor; i++) { + *(uint16_t *)out = *(uint16_t *)in; + out += bytes_per_pixel; + } + in += bytes_per_pixel; + } + } else { + for (unsigned int x = 0; x < width; x++) { + for (unsigned int i = 0; i < factor; i++) { + memcpy(out, in, bytes_per_pixel); + out += bytes_per_pixel; + } + in += bytes_per_pixel; + } + } +} + +static void surface_copy_expand(uint8_t *out, uint8_t *in, unsigned int width, + unsigned int height, + unsigned int bytes_per_pixel, + unsigned int factor) +{ + size_t out_pitch = width * bytes_per_pixel * factor; + + for (unsigned int y = 0; y < height; y++) { + surface_copy_expand_row(out, in, width, bytes_per_pixel, factor); + uint8_t *row_in = out; + for (unsigned int i = 1; i < factor; i++) { + out += out_pitch; + memcpy(out, row_in, out_pitch); + } + in += width * bytes_per_pixel; + out += out_pitch; + } +} + +// Uploads the pixel data at the given VRAM address into the currently bound +// texture. +static void upload_vram_to_bound_texture(NV2AState *d, + hwaddr vram_addr, + bool swizzle, + unsigned int surface_width, + unsigned int surface_height, + unsigned int pitch, + size_t size, + const SurfaceFormatInfo *fmt) +{ + PGRAPHState *pg = &d->pgraph; + uint8_t *data = d->vram_ptr; + uint8_t *buf = data + vram_addr; + + if (swizzle) { + buf = (uint8_t*)g_malloc(size); + unswizzle_rect(data + vram_addr, + surface_width, surface_height, + buf, + pitch, + fmt->bytes_per_pixel); + } + + /* FIXME: Replace this flip/scaling */ + + // This is VRAM so we can't do this inplace! + unsigned int compact_pitch = surface_width * fmt->bytes_per_pixel; + uint8_t *flipped_buf = (uint8_t *)g_malloc(surface_height * compact_pitch); + unsigned int irow; + for (irow = 0; irow < surface_height; irow++) { + memcpy(&flipped_buf[compact_pitch * (surface_height - irow - 1)], + &buf[pitch * irow], + compact_pitch); + } + + uint8_t *gl_read_buf = flipped_buf; + unsigned int width = surface_width; + unsigned int height = surface_height; + + if (pg->surface_scale_factor > 1) { + pgraph_apply_scaling_factor(pg, &width, &height); + pg->scale_buf = (uint8_t *)g_realloc( + pg->scale_buf, width * height * fmt->bytes_per_pixel); + gl_read_buf = pg->scale_buf; + uint8_t *out = gl_read_buf, *in = flipped_buf; + surface_copy_expand(out, in, surface_width, surface_height, + fmt->bytes_per_pixel, + d->pgraph.surface_scale_factor); + } + + glTexImage2D(GL_TEXTURE_2D, 0, fmt->gl_internal_format, width, + height, 0, fmt->gl_format, fmt->gl_type, + gl_read_buf); + g_free(flipped_buf); + if (swizzle) { + g_free(buf); + } +} + +static void pgraph_download_overlapping_surfaces(NV2AState *d, + hwaddr start, + hwaddr end) +{ + SurfaceBinding *surface; + QTAILQ_FOREACH (surface, &d->pgraph.surfaces, entry) { + hwaddr surf_vram_end = surface->vram_addr + surface->size - 1; + bool overlapping = !(surface->vram_addr >= end || + start >= surf_vram_end); + if (overlapping) { + pgraph_download_surface_data_if_dirty(d, surface); + } + } +} + static void pgraph_render_display(NV2AState *d, SurfaceBinding *surface) { struct PGRAPHState *pg = &d->pgraph; @@ -5154,8 +5281,6 @@ static void pgraph_render_display(NV2AState *d, SurfaceBinding *surface) uint32_t pline_offset, pstart_addr, pline_compare; d->vga.get_resolution(&d->vga, (int*)&width, (int*)&height); d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare); - int line_offset = surface->pitch / pline_offset; - /* Adjust viewport height for interlaced mode, used only in 1080i */ if (d->vga.cr[NV_PRMCIO_INTERLACE_MODE] != NV_PRMCIO_INTERLACE_MODE_DISABLED) { height *= 2; @@ -5163,16 +5288,52 @@ static void pgraph_render_display(NV2AState *d, SurfaceBinding *surface) pgraph_apply_scaling_factor(pg, &width, &height); + int line_offset = 1; + const SurfaceFormatInfo *format = + &kelvin_surface_color_format_map[NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8]; + if (surface + && surface->color + && surface->width == width + && surface->height == height + && surface->pitch == (width * format->bytes_per_pixel)) { + + line_offset = surface->pitch / pline_offset; + format = &surface->fmt; + } else { + surface = NULL; + // TODO: Check VGA dirty bitmap and skip upload if possible. + nv2a_profile_inc_counter(NV2A_PROF_SURF_UPLOAD); + unsigned int pitch = width * format->bytes_per_pixel; + hwaddr framebuffer = d->pcrtc.start + pline_offset; + size_t length = pitch * height; + hwaddr framebuffer_end = framebuffer + length - 1; + pgraph_download_overlapping_surfaces(d, framebuffer, framebuffer_end); + + glBindTexture(GL_TEXTURE_2D, pg->disp_rndr.vga_framebuffer_tex); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + + upload_vram_to_bound_texture(d, + framebuffer, + false, + width, + height, + pitch, + pitch * height, + format); + assert(glGetError() == GL_NO_ERROR); + } + + bool recreate = width != pg->gl_display_buffer_width + || height != pg->gl_display_buffer_height + || format->gl_internal_format != pg->gl_display_buffer_internal_format + || format->gl_format != pg->gl_display_buffer_format + || format->gl_type != pg->gl_display_buffer_type; + glBindFramebuffer(GL_FRAMEBUFFER, d->pgraph.disp_rndr.fbo); glActiveTexture(GL_TEXTURE0); glBindTexture(GL_TEXTURE_2D, pg->gl_display_buffer); - bool recreate = ( - surface->fmt.gl_internal_format != pg->gl_display_buffer_internal_format - || width != pg->gl_display_buffer_width - || height != pg->gl_display_buffer_height - || surface->fmt.gl_format != pg->gl_display_buffer_format - || surface->fmt.gl_type != pg->gl_display_buffer_type - ); if (recreate) { /* XXX: There's apparently a bug in some Intel OpenGL drivers for @@ -5186,11 +5347,11 @@ static void pgraph_render_display(NV2AState *d, SurfaceBinding *surface) glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - pg->gl_display_buffer_internal_format = surface->fmt.gl_internal_format; pg->gl_display_buffer_width = width; pg->gl_display_buffer_height = height; - pg->gl_display_buffer_format = surface->fmt.gl_format; - pg->gl_display_buffer_type = surface->fmt.gl_type; + pg->gl_display_buffer_internal_format = format->gl_internal_format; + pg->gl_display_buffer_format = format->gl_format; + pg->gl_display_buffer_type = format->gl_type; glTexImage2D(GL_TEXTURE_2D, 0, pg->gl_display_buffer_internal_format, pg->gl_display_buffer_width, @@ -5207,13 +5368,18 @@ static void pgraph_render_display(NV2AState *d, SurfaceBinding *surface) glDrawBuffers(1, DrawBuffers); assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE); - glBindTexture(GL_TEXTURE_2D, surface->gl_buffer); + if (surface) { + glBindTexture(GL_TEXTURE_2D, surface->gl_buffer); + } else { + glBindTexture(GL_TEXTURE_2D, pg->disp_rndr.vga_framebuffer_tex); + } + glBindVertexArray(pg->disp_rndr.vao); glBindBuffer(GL_ARRAY_BUFFER, pg->disp_rndr.vbo); glUseProgram(pg->disp_rndr.prog); - glProgramUniform1i(pg->disp_rndr.prog, pg->disp_rndr.tex_loc, 0); - glUniform2f(d->pgraph.disp_rndr.display_size_loc, width, height); - glUniform1f(d->pgraph.disp_rndr.line_offset_loc, line_offset); + glUniform1i(pg->disp_rndr.tex_loc, 0); + glUniform2f(pg->disp_rndr.display_size_loc, width, height); + glUniform1f(pg->disp_rndr.line_offset_loc, line_offset); pgraph_render_display_pvideo_overlay(d); glViewport(0, 0, width, height); @@ -5236,16 +5402,18 @@ void pgraph_gl_sync(NV2AState *d) { uint32_t pline_offset, pstart_addr, pline_compare; d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare); - SurfaceBinding *surface = pgraph_surface_get_within(d, d->pcrtc.start + pline_offset); - if (surface == NULL) { + hwaddr framebuffer = d->pcrtc.start + pline_offset; + if (!framebuffer) { qemu_event_set(&d->pgraph.gl_sync_complete); return; } + SurfaceBinding *surface = pgraph_surface_get_within(d, framebuffer); + if (surface) { + /* FIXME: Sanity check surface dimensions */ - /* FIXME: Sanity check surface dimensions */ - - /* Wait for queued commands to complete */ - pgraph_upload_surface_data(d, surface, !tcg_enabled()); + /* Wait for queued commands to complete */ + pgraph_upload_surface_data(d, surface, !tcg_enabled()); + } pgraph_gl_fence(); assert(glGetError() == GL_NO_ERROR); @@ -5267,6 +5435,18 @@ const uint8_t *nv2a_get_dac_palette(void) return g_nv2a->puserdac.palette; } +static void pgraph_download_overlapping_surfaces(NV2AState *d, hwaddr start, hwaddr end) { + SurfaceBinding *surface; + QTAILQ_FOREACH (surface, &d->pgraph.surfaces, entry) { + hwaddr surf_vram_end = surface->vram_addr + surface->size - 1; + bool overlapping = !(surface->vram_addr >= end || + start >= surf_vram_end); + if (overlapping) { + pgraph_download_surface_data_if_dirty(d, surface); + } + } +} + int nv2a_get_framebuffer_surface(void) { NV2AState *d = g_nv2a; @@ -5276,21 +5456,24 @@ int nv2a_get_framebuffer_surface(void) // FIXME: Possible race condition with pgraph, consider lock uint32_t pline_offset, pstart_addr, pline_compare; d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare); - SurfaceBinding *surface = pgraph_surface_get_within(d, d->pcrtc.start + pline_offset); - if (surface == NULL || !surface->color) { + const hwaddr framebuffer = d->pcrtc.start + pline_offset; + if (!framebuffer) { qemu_mutex_unlock(&d->pfifo.lock); return 0; } - assert(surface->color); - assert(surface->fmt.gl_attachment == GL_COLOR_ATTACHMENT0); - assert(surface->fmt.gl_format == GL_RGBA - || surface->fmt.gl_format == GL_RGB - || surface->fmt.gl_format == GL_BGR - || surface->fmt.gl_format == GL_BGRA + SurfaceBinding *surface = pgraph_surface_get_within(d, framebuffer); + if (surface && surface->color) { + assert(surface->fmt.gl_attachment == GL_COLOR_ATTACHMENT0); + assert(surface->fmt.gl_format == GL_RGBA + || surface->fmt.gl_format == GL_RGB + || surface->fmt.gl_format == GL_BGR + || surface->fmt.gl_format == GL_BGRA ); - surface->frame_time = pg->frame_time; + surface->frame_time = pg->frame_time; + } + qemu_event_reset(&d->pgraph.gl_sync_complete); qatomic_set(&pg->gl_sync_pending, true); pfifo_kick(d); @@ -5709,58 +5892,6 @@ void pgraph_download_dirty_surfaces(NV2AState *d) qemu_event_set(&d->pgraph.dirty_surfaces_download_complete); } - -static void surface_copy_expand_row(uint8_t *out, uint8_t *in, - unsigned int width, - unsigned int bytes_per_pixel, - unsigned int factor) -{ - if (bytes_per_pixel == 4) { - for (unsigned int x = 0; x < width; x++) { - for (unsigned int i = 0; i < factor; i++) { - *(uint32_t *)out = *(uint32_t *)in; - out += bytes_per_pixel; - } - in += bytes_per_pixel; - } - } else if (bytes_per_pixel == 2) { - for (unsigned int x = 0; x < width; x++) { - for (unsigned int i = 0; i < factor; i++) { - *(uint16_t *)out = *(uint16_t *)in; - out += bytes_per_pixel; - } - in += bytes_per_pixel; - } - } else { - for (unsigned int x = 0; x < width; x++) { - for (unsigned int i = 0; i < factor; i++) { - memcpy(out, in, bytes_per_pixel); - out += bytes_per_pixel; - } - in += bytes_per_pixel; - } - } -} - -static void surface_copy_expand(uint8_t *out, uint8_t *in, unsigned int width, - unsigned int height, - unsigned int bytes_per_pixel, - unsigned int factor) -{ - size_t out_pitch = width * bytes_per_pixel * factor; - - for (unsigned int y = 0; y < height; y++) { - surface_copy_expand_row(out, in, width, bytes_per_pixel, factor); - uint8_t *row_in = out; - for (unsigned int i = 1; i < factor; i++) { - out += out_pitch; - memcpy(out, row_in, out_pitch); - } - in += width * bytes_per_pixel; - out += out_pitch; - } -} - static void pgraph_upload_surface_data(NV2AState *d, SurfaceBinding *surface, bool force) { @@ -5793,53 +5924,15 @@ static void pgraph_upload_surface_data(NV2AState *d, SurfaceBinding *surface, glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); - uint8_t *data = d->vram_ptr; - uint8_t *buf = data + surface->vram_addr; - - if (surface->swizzle) { - buf = (uint8_t*)g_malloc(surface->size); - unswizzle_rect(data + surface->vram_addr, - surface->width, surface->height, - buf, - surface->pitch, - surface->fmt.bytes_per_pixel); - } - - /* FIXME: Replace this flip/scaling */ - - // This is VRAM so we can't do this inplace! - uint8_t *flipped_buf = (uint8_t *)g_malloc( - surface->height * surface->width * surface->fmt.bytes_per_pixel); - unsigned int irow; - for (irow = 0; irow < surface->height; irow++) { - memcpy(&flipped_buf[surface->width * (surface->height - irow - 1) - * surface->fmt.bytes_per_pixel], - &buf[surface->pitch * irow], - surface->width * surface->fmt.bytes_per_pixel); - } - - uint8_t *gl_read_buf = flipped_buf; - unsigned int width = surface->width, height = surface->height; - - if (pg->surface_scale_factor > 1) { - pgraph_apply_scaling_factor(pg, &width, &height); - pg->scale_buf = (uint8_t *)g_realloc( - pg->scale_buf, width * height * surface->fmt.bytes_per_pixel); - gl_read_buf = pg->scale_buf; - uint8_t *out = gl_read_buf, *in = flipped_buf; - surface_copy_expand(out, in, surface->width, surface->height, - surface->fmt.bytes_per_pixel, - d->pgraph.surface_scale_factor); - } - glBindTexture(GL_TEXTURE_2D, surface->gl_buffer); - glTexImage2D(GL_TEXTURE_2D, 0, surface->fmt.gl_internal_format, width, - height, 0, surface->fmt.gl_format, surface->fmt.gl_type, - gl_read_buf); - g_free(flipped_buf); - if (surface->swizzle) { - g_free(buf); - } + upload_vram_to_bound_texture(d, + surface->vram_addr, + surface->swizzle, + surface->width, + surface->height, + surface->pitch, + surface->size, + &surface->fmt); // Rebind previous framebuffer binding glBindTexture(GL_TEXTURE_2D, last_texture_binding); @@ -6623,14 +6716,7 @@ static void pgraph_bind_textures(NV2AState *d) // Writeback any surfaces which this texture may index hwaddr tex_vram_end = texture_vram_offset + length - 1; - QTAILQ_FOREACH(surface, &d->pgraph.surfaces, entry) { - hwaddr surf_vram_end = surface->vram_addr + surface->size - 1; - bool overlapping = !(surface->vram_addr >= tex_vram_end - || texture_vram_offset >= surf_vram_end); - if (overlapping) { - pgraph_download_surface_data_if_dirty(d, surface); - } - } + pgraph_download_overlapping_surfaces(d, texture_vram_offset, tex_vram_end); } TextureKey key;