From 161cea492089597e4172e5cc304d12265b6c7761 Mon Sep 17 00:00:00 2001 From: Kieron Morris Date: Sat, 29 Jan 2022 01:39:45 +0000 Subject: [PATCH] SSE MOVUPS/128bit Memcpy + Fixed Doublebuffer --- src/driver/video/doublebuffer.pas | 23 +++++++++++++++-------- src/driver/video/video.pas | 15 +++++++++------ src/include/util.pas | 9 +++++++++ 3 files changed, 33 insertions(+), 14 deletions(-) diff --git a/src/driver/video/doublebuffer.pas b/src/driver/video/doublebuffer.pas index 2fa942bd..dfff1206 100644 --- a/src/driver/video/doublebuffer.pas +++ b/src/driver/video/doublebuffer.pas @@ -22,7 +22,7 @@ unit doublebuffer; interface uses - lmemorymanager, tracer, videotypes; + lmemorymanager, tracer, videotypes, serial, util; //Init the driver, and register with the video interface in a state ready for execution. procedure init(Register : FRegisterDriver); @@ -54,21 +54,28 @@ end; procedure Flush(FrontBuffer : PVideoBuffer; BackBuffer : PVideoBuffer); var - X,Y : uint32; + idx : uint32; Back,Front : PuInt64; + BufferSize : uint32; + +const + COPY_WIDTH = 64; begin - tracer.push_trace('doublebuffer.Flush.enter'); + //tracer.push_trace('doublebuffer.Flush.enter'); if not(BackBuffer^.Initialized) then exit; if ((FrontBuffer^.Width > BackBuffer^.Width) or (FrontBuffer^.Height > BackBuffer^.Height)) then exit; Back:= PUint64(BackBuffer^.Location); Front:= PuInt64(FrontBuffer^.Location); - for X:=0 to (BackBuffer^.Width-1) div 2 do begin - for Y:=0 to (BackBuffer^.Height-1) div 2 do begin - Front[(Y * BackBuffer^.Width) + X]:= Back[(Y * BackBuffer^.Width) + X]; - end; + BufferSize:= ( ( BackBuffer^.Width * BackBuffer^.Height * BackBuffer^.BitsPerPixel) div COPY_WIDTH ) - 1; + for idx:=0 to BufferSize do begin + Front[idx]:= Back[idx]; + // -- TODO: Get SSE working here for 128bit copies -- + // __SSE_128_memcpy(uint32(Front), uint32(Back)); + // Front:= PUint64(uint32(Front) + 16); + // Back:= PUint64(uint32(Back) + 16); end; - tracer.push_trace('doublebuffer.Flush.exit'); + //tracer.push_trace('doublebuffer.Flush.exit'); end; function enable(VideoInterface : PVideoInterface) : boolean; diff --git a/src/driver/video/video.pas b/src/driver/video/video.pas index 62fdce87..1c9b6f46 100644 --- a/src/driver/video/video.pas +++ b/src/driver/video/video.pas @@ -51,19 +51,22 @@ end; Procedure basicFFlush(FrontBuffer : PVideoBuffer; BackBuffer : PVideoBuffer); var - x, y : uint32; + idx : uint32; Back,Front : puint32; + BufferSize : uint32; + +const + COPY_WIDTH = 32; begin - tracer.push_trace('video.basicFFlush.enter'); + //tracer.push_trace('video.basicFFlush.enter'); If not(FrontBuffer^.Initialized and BackBuffer^.Initialized) then exit; if (BackBuffer^.Width > FrontBuffer^.Width) or (BackBuffer^.Height > FrontBuffer^.Height) then exit; Back:= puint32(BackBuffer^.Location); Front:= puint32(FrontBuffer^.Location); - for x:=0 to BackBuffer^.Width-1 do begin - for y:=0 to BackBuffer^.Height-1 do begin - Front[(Y * BackBuffer^.Width) + X]:= Back[(Y * BackBuffer^.Width) + X]; - end; + BufferSize:= ( (BackBuffer^.Width * BackBuffer^.Height * BackBuffer^.BitsPerPixel ) div COPY_WIDTH ) - 1; + for idx:=0 to BufferSize do begin + Front[idx]:= Back[idx]; end; end; diff --git a/src/include/util.pas b/src/include/util.pas index 39093f8f..ac648a11 100644 --- a/src/include/util.pas +++ b/src/include/util.pas @@ -49,6 +49,7 @@ procedure io_wait; procedure memset(location : uint32; value : uint8; size : uint32); procedure memcpy(source : uint32; dest : uint32; size : uint32); +procedure __SSE_128_memcpy(source : uint32; dest : uint32); procedure printmemory(source : uint32; length : uint32; col : uint32; delim : PChar; offset_row : boolean); procedure printmemoryWND(source : uint32; length : uint32; col : uint32; delim : PChar; offset_row : boolean; WND : HWND); @@ -127,6 +128,14 @@ begin div6432:= (r0 SHL 32) OR r4; end; +procedure __SSE_128_memcpy(source : uint32; dest : uint32); assembler; +asm + MOV EAX, Source + MOV ECX, Dest + MOVUPS XMM0, [EAX] + MOVUPS [ECX], XMM0 +end; + function switchendian16(b : uint16) : uint16; begin switchendian16:= ((b AND $FF00) SHR 8) OR ((b AND $00FF) SHL 8);