;The following put/get directly to video memory
; Mouse MUST be hiden while using these functions!

.data
align 4
gd_get_types dd offset gd_get_modex,offset gd_get_vesa,offset gd_get_vesax
             dd offset gd_get_linear,offset gd_get_linearx

.code
align 4
gd_get proc,buf:dword,x1:dword,y1:dword,x2:dword,y2:dword
  local dadd:dword
  local sadd:dword
  local saddr:dword
  local xl:dword,yl:dword
;ModeX locals
  local _pc:byte,_xl2c:byte
  local dstart:dword

  pushad

  mov eax,_v_acctype
  jmp dptr[eax+offset gd_get_types]

gd_get_modex::
  g_setup _v_bpsl,sadd,dadd,esi,edi,,,1  ;Not Fast!  No shifting at all!

  ;adjust esi
  add saddr,esi
  mov dstart,edi

  mov ecx,saddr
  and cl,3
  mov al,1
  shl al,cl
  mov modex_plane_rd,al
  shr saddr,2
  shr sadd,2
  mov eax,xl
  mov ebx,eax
  and ebx,3
  mov _xl2c,bl  ; xl2 counter
  .if bl   ;NOTE : I don't know why - but this fixed it...wierd
    inc sadd
  .endif
  add dadd,ebx  ;adjust dadd
  shr eax,2
  mov ebx,eax   ; XL/4
  mov _pc,4     ; Plane counter
  mov dx,vga_GC_INDEX 
  mov al,04h
  mov ah,modex_plane_rd
  out dx,ax     ; enable reads to 1st plane
  .if _xl2c
    inc ebx       ;inc x len
    dec sadd      ;dec src additions
    sub dadd,4    ;dec dest additions
  .endif
mx_l2:
  test ebx,ebx   ;this can happen when xl=3 or less
  jz mx_z1

  mov esi,_v_linear
  add esi,saddr
  mov edi,dstart
  add edi,buf

  mov eax,yl
mx_l1:
  mov ecx,ebx
@@:
  movsb
  add edi,3
  dec ecx
  jnz @b
  add esi,sadd
  add edi,dadd
  dec eax
  jnz mx_l1

mx_z1:
  .if _xl2c
    dec _xl2c
    .if zero?
      dec ebx
      inc sadd
      add dadd,4
    .endif
  .endif

  dec _pc
  jz done
  inc dstart
  mov ah,modex_plane_rd
  shl ah,1
  .if ah==16
    mov ah,1
    inc saddr
  .endif
  mov modex_plane_rd,ah
  mov al,04h
  out dx,ax     ; enable reads to plane
  jmp mx_l2

gd_get_vesa::
  g_setup _v_bpsl,sadd,dadd,esi,edi,1    ;Fast!

  add saddr,esi
  mov esi,_v_linear

  add edi,buf
  mov eax,saddr
  mov edx,eax
  and edx,0ffffh
  shr eax,16
  add esi,edx
  .if al!=vesa_current_bank
    callp vesa_setbank,al
  .endif
  mov eax,64*1024 ;when this reaches 0 then we must switch to next bank
  sub eax,edx
  mov edx,eax
  ;copy a scan line
  mov ebx,yl
@@:
  mov ecx,xl
  .if edx<=ecx
    mov ecx,edx  ;use rest of bank
    copyECX
    mov esi,_v_linear
    mov ecx,xl
    sub ecx,edx
    mov edx,64*1024
    sub edx,ecx
    call vesa_nextbank
    copyECX      ;copy rest of scan line
  .else
    sub edx,ecx
    copyECX      ;copy entire scan line
  .endif
  add edi,dadd
  ;Add sadd to ESI
  .if edx<=sadd
    call vesa_nextbank
    mov esi,_v_linear
    mov eax,sadd
    sub eax,edx
    add esi,eax
    mov edx,64*1024
    sub edx,eax
  .else
    sub edx,sadd
    add esi,sadd
  .endif
  dec ebx
  jnz @b
  jmp done

gd_get_vesax::
  g_setup _v_bpsl,sadd,dadd,esi,edi,1    ;Fast!

  add saddr,esi
  mov esi,_v_linear

  mov eax,_v_xbpsl
  add sadd,eax

  add edi,buf
  mov eax,saddr
  mov edx,eax
  and edx,0ffffh
  shr eax,16
  add esi,edx
  .if al!=vesa_current_bank
    callp vesa_setbank,al
  .endif
  mov eax,64*1024 ;when this reaches 0 then we must switch to next bank
  sub eax,edx
  mov edx,eax
  ;copy a scan line
  mov ebx,yl
@@:
  mov ecx,xl
  .if edx<=ecx
    mov ecx,edx  ;use rest of bank
    copyECX
    mov esi,_v_linear
    mov ecx,xl
    sub ecx,edx
    mov edx,64*1024
    sub edx,ecx
    call vesa_nextbank
    copyECX      ;copy rest of scan line
  .else
    sub edx,ecx
    copyECX      ;copy entire scan line
  .endif
  add edi,dadd
  ;Add sadd to ESI
  .if edx<=sadd
    call vesa_nextbank
    mov esi,_v_linear
    mov eax,sadd
    sub eax,edx
    add esi,eax
    mov edx,64*1024
    sub edx,eax
  .else
    sub edx,sadd
    add esi,sadd
  .endif
  dec ebx
  jnz @b
  jmp done

gd_get_linearx::

  g_setup _v_bpsl,sadd,dadd,esi,edi,1    ;Fast!

  add edi,buf
  add esi,_v_linear
  add esi,saddr

  mov eax,_v_xbpsl
  add sadd,eax

  ;time to copy image
@@:
  mov ecx,xl
  copyECX
  add esi,sadd
  add edi,dadd
  dec yl
  jnz @b
  jmp done

gd_get_linear::

  g_setup _v_bpsl,sadd,dadd,esi,edi,1    ;Fast!

  add edi,buf
  add esi,_v_linear
  add esi,saddr

  ;time to copy image
@@:
  mov ecx,xl
  copyECX
  add esi,sadd
  add edi,dadd
  dec yl
  jnz @b

done:
  popad
  xor eax,eax
  ret

niv: ;not in view
  popad
  mov eax,ERROR
  ret
gd_get endp

.data
align 4
gd_put_types dd offset gd_put_modex,offset gd_put_vesa,offset gd_put_vesax
             dd offset gd_put_linear,offset gd_put_linearx

.code
align 4
gd_put proc,buf:dword,x1:dword,y1:dword,x2:dword,y2:dword
  local dadd:dword
  local sadd:dword
  local saddr:dword
  local xl:dword,yl:dword
;ModeX locals
  local _pc:byte,_xl2c:byte
  local sstart:dword

  pushad

  mov eax,_v_acctype
  jmp dptr[eax+offset gd_put_types]

gd_put_modex::
  g_setup _v_bpsl,dadd,sadd,edi,esi,,,1  ;Not Fast!  No shifting at all!

  ;adjust edi
  add saddr,edi
  mov sstart,esi

  mov ecx,saddr
  and cl,3
  mov al,1
  shl al,cl
  mov modex_plane_wr,al
  shr saddr,2
  shr dadd,2
  mov eax,xl
  mov ebx,eax
  and ebx,3
  mov _xl2c,bl
  .if bl
    inc dadd
  .endif
  add sadd,ebx
  shr eax,2
  mov ebx,eax   ; XL/4
  mov _pc,4     ; Plane counter
  mov dx,vga_SC_INDEX 
  mov al,02h
  mov ah,modex_plane_wr
  out dx,ax     ; enable reads to 1st plane

  .if _xl2c
    inc ebx
    dec dadd
    sub sadd,4
  .endif
mx_l2:
  test ebx,ebx   ;this can happen when xl=3 or less
  jz mx_z1

  mov edi,_v_linear
  add edi,saddr
  mov esi,sstart
  add esi,buf

  mov eax,yl
mx_l1:
  mov ecx,ebx
@@:
  movsb
  add esi,3
  dec ecx
  jnz @b
  add esi,sadd
  add edi,dadd
  dec eax
  jnz mx_l1

mx_z1:
  .if _xl2c
    dec _xl2c
    .if zero?
      dec ebx
      inc dadd
      add sadd,4
    .endif
  .endif

  dec _pc
  jz done
  inc sstart
  mov ah,modex_plane_wr
  shl ah,1
  .if ah==16
    mov ah,1
    inc saddr
  .endif
  mov modex_plane_wr,ah
  mov al,02h
  out dx,ax     ; enable writes to plane
  jmp mx_l2

gd_put_vesa::
  g_setup _v_bpsl,dadd,sadd,edi,esi,1    ;Fast!

  add saddr,edi
  mov edi,_v_linear

  add esi,buf
  mov eax,saddr
  mov edx,eax
  and edx,0ffffh
  shr eax,16
  add edi,edx
  .if al!=vesa_current_bank
    callp vesa_setbank,al
  .endif
  mov eax,64*1024 ;when this reaches 0 then we must switch to next bank
  sub eax,edx
  mov edx,eax
  ;copy a scan line
  mov ebx,yl
@@:
  mov ecx,xl
  .if edx<=ecx
    mov ecx,edx  ;use rest of bank
    copyECX
    mov edi,_v_linear
    mov ecx,xl
    sub ecx,edx
    mov edx,64*1024
    sub edx,ecx
    call vesa_nextbank
    copyECX      ;copy rest of scan line
  .else
    sub edx,ecx
    copyECX      ;copy entire scan line
  .endif
  add esi,sadd
  ;Add dadd to EDI
  .if edx<=dadd
    call vesa_nextbank
    mov edi,_v_linear
    mov eax,dadd
    sub eax,edx
    add edi,eax
    mov edx,64*1024
    sub edx,eax
  .else
    sub edx,dadd
    add edi,dadd
  .endif
  dec ebx
  jnz @b
  jmp done

gd_put_vesax::
  g_setup _v_bpsl,dadd,sadd,edi,esi,1    ;Fast!

  add saddr,edi
  mov edi,_v_linear

  mov eax,_v_xbpsl
  add dadd,eax

  add esi,buf
  mov eax,saddr
  mov edx,eax
  and edx,0ffffh
  shr eax,16
  add edi,edx
  .if al!=vesa_current_bank
    callp vesa_setbank,al
  .endif
  mov eax,64*1024 ;when this reaches 0 then we must switch to next bank
  sub eax,edx
  mov edx,eax
  ;copy a scan line
  mov ebx,yl
@@:
  mov ecx,xl
  .if edx<=ecx
    mov ecx,edx  ;use rest of bank
    copyECX
    mov edi,_v_linear
    mov ecx,xl
    sub ecx,edx
    mov edx,64*1024
    sub edx,ecx
    call vesa_nextbank
    copyECX      ;copy rest of scan line
  .else
    sub edx,ecx
    copyECX      ;copy entire scan line
  .endif
  add esi,sadd
  ;Add dadd to EDI
  .if edx<=dadd
    call vesa_nextbank
    mov edi,_v_linear
    mov eax,dadd
    sub eax,edx
    add edi,eax
    mov edx,64*1024
    sub edx,eax
  .else
    sub edx,dadd
    add edi,dadd
  .endif
  dec ebx
  jnz @b
  jmp done

gd_put_linearx::
  g_setup _v_bpsl,dadd,sadd,edi,esi,1    ;Fast!

  add edi,_v_linear
  add edi,saddr
  add esi,buf

  mov eax,_v_xbpsl
  add dadd,eax

  ;time to copy image
line:
  mov ecx,xl
  copyECX
  add esi,sadd
  add edi,dadd
  dec yl
  jnz line
  jmp done

gd_put_linear::
  g_setup _v_bpsl,dadd,sadd,edi,esi,1    ;Fast!

  add edi,_v_linear
  add edi,saddr
  add esi,buf

  ;time to copy image
@@:
  mov ecx,xl
  copyECX
  add esi,sadd
  add edi,dadd
  dec yl
  jnz @b

done:
  popad
  xor eax,eax
  ret

niv: ;not in view
  popad
  mov eax,ERROR
  ret
gd_put endp

.data
align 4
gd_put0_types dd offset gd_put0_modex,offset gd_put0_vesa,offset gd_put0_vesax
             dd offset gd_put0_linear,offset gd_put0_linearx

.code
align 4
gd_put0 proc,buf:dword,x1:dword,y1:dword,x2:dword,y2:dword
  local dadd:dword
  local sadd:dword
  local saddr:dword
  local xl:dword,yl:dword
;ModeX locals
  local _pc:byte,_xl2c:byte
  local sstart:dword

  pushad

  mov eax,_v_acctype
  jmp dptr[eax+offset gd_put0_types]

gd_put0_modex::
  g_setup _v_bpsl,dadd,sadd,edi,esi,,,1  ;Not Fast!  No shifting at all!

  ;adjust edi
  add saddr,edi
  mov sstart,esi

  mov ecx,saddr
  and cl,3
  mov al,1
  shl al,cl
  mov modex_plane_wr,al
  shr saddr,2
  shr dadd,2
  mov eax,xl
  mov ebx,eax
  and ebx,3
  mov _xl2c,bl
  .if bl
    inc dadd
  .endif
  add sadd,ebx
  shr eax,2
  mov ebx,eax   ; XL/4
  mov _pc,4     ; Plane counter
  mov dx,vga_SC_INDEX 
  mov al,02h
  mov ah,modex_plane_wr
  out dx,ax     ; enable reads to 1st plane

  .if _xl2c
    inc ebx
    dec dadd
    sub sadd,4
  .endif
mx_l2:
  test ebx,ebx   ;this can happen when xl=3 or less
  jz mx_z1

  mov edi,_v_linear
  add edi,saddr
  mov esi,sstart
  add esi,buf

  mov eax,yl
mx_l1:
  mov ecx,ebx
@@:
  .if bptr[esi]
    movsb
  .else
    inc esi
    inc edi
  .endif
  add esi,3
  dec ecx
  jnz @b
  add esi,sadd
  add edi,dadd
  dec eax
  jnz mx_l1

mx_z1:
  .if _xl2c
    dec _xl2c
    .if zero?
      dec ebx
      add sadd,4
      inc dadd
    .endif
  .endif

  dec _pc
  jz done
  inc sstart
  mov ah,modex_plane_wr
  shl ah,1
  .if ah==16
    mov ah,1
    inc saddr
  .endif
  mov modex_plane_wr,ah
  mov al,02h
  out dx,ax     ; enable writes to plane
  jmp mx_l2

gd_put0_vesa::
  g_setup _v_bpsl,dadd,sadd,edi,esi,1    ;Fast!

  add saddr,edi
  mov edi,_v_linear

  add esi,buf
  mov eax,saddr
  mov edx,eax
  and edx,0ffffh
  shr eax,16
  add edi,edx
  .if al!=vesa_current_bank
    callp vesa_setbank,al
  .endif
  mov eax,64*1024 ;when this reaches 0 then we must switch to next bank
  sub eax,edx
  mov edx,eax
  ;time to copy image
  .if cl==1
    mov ebx,0ffh
  .elseif cl==2
    mov ebx,0ffffh
  .elseif cl==3
    mov ebx,0ffffffh
  .else
    mov ebx,-1
  .endif
@@:
  mov ecx,xl
  .if edx<=ecx
    mov ecx,edx  ;use rest of bank
    copyECX0 _v_bypp,ecx
    mov edi,_v_linear
    mov ecx,xl
    sub ecx,edx
    mov edx,64*1024
    sub edx,ecx
    call vesa_nextbank
    copyECX0 _v_bypp,ecx      ;copy rest of scan line
  .else
    sub edx,ecx
    copyECX0 _v_bypp,ecx      ;copy entire scan line
  .endif
  add esi,sadd
  ;Add dadd to EDI
  .if edx<=dadd
    call vesa_nextbank
    mov edi,_v_linear
    mov eax,dadd
    sub eax,edx
    add edi,eax
    mov edx,64*1024
    sub edx,eax
  .else
    sub edx,dadd
    add edi,dadd
  .endif
  dec yl
  jnz @b
  jmp done

gd_put0_vesax::
  g_setup _v_bpsl,dadd,sadd,edi,esi,1    ;Fast!

  add saddr,edi
  mov edi,_v_linear

  mov eax,_v_xbpsl
  add dadd,eax

  add esi,buf
  mov eax,saddr
  mov edx,eax
  and edx,0ffffh
  shr eax,16
  add edi,edx
  .if al!=vesa_current_bank
    callp vesa_setbank,al
  .endif
  mov eax,64*1024 ;when this reaches 0 then we must switch to next bank
  sub eax,edx
  mov edx,eax
  ;time to copy image
  .if cl==1
    mov ebx,0ffh
  .elseif cl==2
    mov ebx,0ffffh
  .elseif cl==3
    mov ebx,0ffffffh
  .else
    mov ebx,-1
  .endif
@@:
  mov ecx,xl
  .if edx<=ecx
    mov ecx,edx  ;use rest of bank
    copyECX0 _v_bypp,ecx
    mov edi,_v_linear
    mov ecx,xl
    sub ecx,edx
    mov edx,64*1024
    sub edx,ecx
    call vesa_nextbank
    copyECX0 _v_bypp,ecx      ;copy rest of scan line
  .else
    sub edx,ecx
    copyECX0 _v_bypp,ecx      ;copy entire scan line
  .endif
  add esi,sadd
  ;Add dadd to EDI
  .if edx<=dadd
    call vesa_nextbank
    mov edi,_v_linear
    mov eax,dadd
    sub eax,edx
    add edi,eax
    mov edx,64*1024
    sub edx,eax
  .else
    sub edx,dadd
    add edi,dadd
  .endif
  dec yl
  jnz @b
  jmp done

gd_put0_linearx::
  g_setup _v_bpsl,dadd,sadd,edi,esi,1    ;Fast!

  add edi,_v_linear
  add edi,saddr
  add esi,buf

  mov eax,_v_xbpsl
  add dadd,eax

  ;time to copy image
  .if cl==1
    mov ebx,0ffh
  .elseif cl==2
    mov ebx,0ffffh
  .elseif cl==3
    mov ebx,0ffffffh
  .else
    mov ebx,-1
  .endif
  mov edx,ecx
_l2:
  mov eax,xl
  copyECX0 edx,eax,1
  add esi,sadd
  add edi,dadd
  dec yl
  jnz _l2
  jmp done

gd_put0_linear::
  g_setup _v_bpsl,dadd,sadd,edi,esi,1    ;Fast!

  add edi,_v_linear
  add edi,saddr
  add esi,buf

  ;time to copy image
  .if cl==1
    mov ebx,0ffh
  .elseif cl==2
    mov ebx,0ffffh
  .elseif cl==3
    mov ebx,0ffffffh
  .else
    mov ebx,-1
  .endif
  mov edx,ecx
_l1:
  mov eax,xl
  copyECX0 edx,eax,1
  add esi,sadd
  add edi,dadd
  dec yl
  jnz _l1

done:
  popad
  xor eax,eax
  ret

niv: ;not in view
  popad
  mov eax,ERROR
  ret
gd_put0 endp

.data
align 4
gd_putb0_types dd offset gd_putb0_modex,offset gd_putb0_vesa,offset gd_putb0_vesax
             dd offset gd_putb0_linear,offset gd_putb0_linearx

.code
align 4
gd_putb0 proc,buf:dword,x1:dword,y1:dword,x2:dword,y2:dword,clr:dword
  local dadd:dword
  local sadd:dword
  local saddr:dword
  local xl:dword,yl:dword
;ModeX locals
  local _pc:byte,_xl2c:byte,_xl4:dword
  local saddrot:byte ;src add (rotation optimization)
  local sstart:dword,sstartrot:byte
  pushad

  mov eax,_v_acctype
  jmp dptr[eax+offset gd_putb0_types]

gd_putb0_modex::
  g_setup _v_bpsl,dadd,sadd,edi,esi,,,1  ;Not Fast!  No shifting at all!

  ;adjust edi
  add saddr,edi

  ;Convert ESI => bitpack format
  mov ebx,esi
  and bl,7
  shr esi,3
  mov sstart,esi

  ; bh = 2 ^ bl
  mov bh,1
@@:
  .if bl
    dec bl
    shl bh,1
    jmp @b
  .endif
  mov sstartrot,bh

  mov ecx,saddr
  and cl,3
  mov al,1
  shl al,cl
  mov modex_plane_wr,al
  shr saddr,2
  shr dadd,2
  mov eax,xl
  mov ebx,eax
  and ebx,3
  mov _xl2c,bl
  .if bl
    inc dadd
  .endif
  add sadd,ebx
  shr eax,2
  mov _xl4,eax   ; XL/4
  mov _pc,4      ; Plane counter
  mov dx,vga_SC_INDEX 
  mov al,02h
  mov ah,modex_plane_wr
  out dx,ax     ; enable reads to 1st plane

  ;Convert sadd => bitpack format
  mov eax,sadd
  shr sadd,3
  and al,7
  mov saddrot,al

  .if _xl2c
    inc _xl4
    dec dadd
    sub saddrot,4
    .if carry?
      add saddrot,8
      dec sadd
    .endif
  .endif
mx_l2:
  cmp _xl4,0   ;this can happen when xl=3 or less
  je mx_z1

  mov edi,_v_linear
  add edi,saddr
  mov esi,sstart
  add esi,buf
  mov bh,sstartrot

  mov eax,yl
mx_l1:
  mov ecx,_xl4
pixel:
  test [esi],bh
  .if !zero?
    mov bl,bptr[clr]
    mov [edi],bl
  .endif
  inc edi
  mov bl,4    ;add 4 to ESI
@@:
  shl bh,1
  .if zero?
    inc bh
    inc esi
  .endif
  dec bl
  jnz @b
  dec ecx
  jnz pixel

  add esi,sadd
  add edi,dadd

  mov bl,bh
  mov cl,saddrot
  rol bh,cl
  cmp bh,bl
  .if carry?
    inc esi
  .endif

  dec eax
  jnz mx_l1

mx_z1:
  .if _xl2c
    dec _xl2c
    .if zero?
      dec _xl4
      inc dadd
      add saddrot,4
      .if saddrot >= 8
        sub saddrot,8
        inc sadd
      .endif
    .endif
  .endif

  dec _pc
  jz done

  shl sstartrot,1   ;inc sstart
  .if zero?
    inc sstartrot
    inc sstart
  .endif
  mov ah,modex_plane_wr
  shl ah,1
  .if ah==16
    mov ah,1
    inc saddr
  .endif
  mov modex_plane_wr,ah
  mov al,02h
  out dx,ax     ; enable writes to plane
  jmp mx_l2

gd_putb0_vesa::
  g_setup _v_bpsl,dadd,sadd,edi,esi,1,1  ;Fast!  Do not shift ESI,SADD

  add saddr,edi
  mov edi,_v_linear

  ;Convert sadd => bitpack format
  mov eax,sadd
  shr sadd,3
  and al,7
  mov saddrot,al

  ;Convert ESI => bitpack format
  mov ebx,esi
  and bl,7
  shr esi,3
  add esi,buf

  ; bh = 2 ^ bl
  mov bh,1
@@:
  .if bl
    dec bl
    shl bh,1
    jmp @b
  .endif
  
  mov eax,saddr
  mov edx,eax
  and edx,0ffffh
  shr eax,16
  add edi,edx
  .if al!=vesa_current_bank
    callp vesa_setbank,al
  .endif
  mov eax,64*1024 ;when this reaches 0 then we must switch to next bank
  sub eax,edx
  mov edx,eax
  ;copy a scan line
@@:
  mov ecx,xl
  .if edx<=ecx
    mov ecx,edx  ;use rest of bank
    copyECXb0 _v_bypp,ecx
    mov edi,_v_linear
    mov ecx,xl
    sub ecx,edx
    mov edx,64*1024
    sub edx,ecx
    call vesa_nextbank
    copyECXb0 _v_bypp,ecx      ;copy rest of scan line
  .else
    sub edx,ecx
    copyECXb0 _v_bypp,ecx      ;copy entire scan line
  .endif
  add esi,sadd
  mov bl,bh
  mov cl,saddrot
  rol bh,cl
  cmp bh,bl
  .if carry?
    inc esi
  .endif
  ;Add dadd to EDI
  .if edx<=dadd
    call vesa_nextbank
    mov edi,_v_linear
    mov eax,dadd
    sub eax,edx
    add edi,eax
    mov edx,64*1024
    sub edx,eax
  .else
    sub edx,dadd
    add edi,dadd
  .endif
  dec yl
  jnz @b
  jmp done

gd_putb0_vesax::
  g_setup _v_bpsl,dadd,sadd,edi,esi,1,1  ;Fast!  Do not shift ESI,SADD

  add saddr,edi
  mov edi,_v_linear

  mov eax,_v_xbpsl
  add dadd,eax

  ;Convert sadd => bitpack format
  mov eax,sadd
  shr sadd,3
  and al,7
  mov saddrot,al

  ;Convert ESI => bitpack format
  mov ebx,esi
  and bl,7
  shr esi,3
  add esi,buf

  ; bh = 2 ^ bl
  mov bh,1
@@:
  .if bl
    dec bl
    shl bh,1
    jmp @b
  .endif
  
  mov eax,saddr
  mov edx,eax
  and edx,0ffffh
  shr eax,16
  add edi,edx
  .if al!=vesa_current_bank
    callp vesa_setbank,al
  .endif
  mov eax,64*1024 ;when this reaches 0 then we must switch to next bank
  sub eax,edx
  mov edx,eax
  ;copy a scan line
@@:
  mov ecx,xl
  .if edx<=ecx
    mov ecx,edx  ;use rest of bank
    copyECXb0 _v_bypp,ecx
    mov edi,_v_linear
    mov ecx,xl
    sub ecx,edx
    mov edx,64*1024
    sub edx,ecx
    call vesa_nextbank
    copyECXb0 _v_bypp,ecx      ;copy rest of scan line
  .else
    sub edx,ecx
    copyECXb0 _v_bypp,ecx      ;copy entire scan line
  .endif
  add esi,sadd
  mov bl,bh
  mov cl,saddrot
  rol bh,cl
  cmp bh,bl
  .if carry?
    inc esi
  .endif
  ;Add dadd to EDI
  .if edx<=dadd
    call vesa_nextbank
    mov edi,_v_linear
    mov eax,dadd
    sub eax,edx
    add edi,eax
    mov edx,64*1024
    sub edx,eax
  .else
    sub edx,dadd
    add edi,dadd
  .endif
  dec yl
  jnz @b
  jmp done

gd_putb0_linearx::
  g_setup _v_bpsl,dadd,sadd,edi,esi,1,1  ;Fast!  Do not shift ESI,SADD

  add edi,_v_linear
  add edi,saddr

  mov eax,_v_xbpsl
  add dadd,eax

  ;Convert sadd => bitpack format
  mov eax,sadd
  shr sadd,3
  and al,7
  mov saddrot,al

  ;Convert ESI => bitpack format
  mov ebx,esi
  and bl,7
  shr esi,3
  add esi,buf

  ; bh = 2 ^ bl
  mov bh,1
@@:
  .if bl
    dec bl
    shl bh,1
    jmp @b
  .endif

  mov edx,ecx
  ;time to copy image
line3:
  mov eax,xl
  copyECXb0 edx,eax,1
  add esi,sadd
  mov bl,bh
  mov cl,saddrot
  rol bh,cl
  cmp bh,bl
  .if carry?
    inc esi
  .endif
  add edi,dadd
  dec yl
  jnz line3
  jmp done

gd_putb0_linear::
  g_setup _v_bpsl,dadd,sadd,edi,esi,1,1  ;Fast!  Do not shift ESI,SADD

  add edi,_v_linear
  add edi,saddr

  ;Convert sadd => bitpack format
  mov eax,sadd
  shr sadd,3
  and al,7
  mov saddrot,al

  ;Convert ESI => bitpack format
  mov ebx,esi
  and bl,7
  shr esi,3
  add esi,buf

  ; bh = 2 ^ bl
  mov bh,1
@@:
  .if bl
    dec bl
    shl bh,1
    jmp @b
  .endif

  mov edx,ecx
  ;time to copy image
line2:
  mov eax,xl
  copyECXb0 edx,eax,1
  add esi,sadd
  mov bl,bh
  mov cl,saddrot
  rol bh,cl
  cmp bh,bl
  .if carry?
    inc esi
  .endif
  add edi,dadd
  dec yl
  jnz line2

done:
  popad
  xor eax,eax
  ret

niv: ;not in view
  popad
  mov eax,ERROR
  ret
gd_putb0 endp

