;
; Copyright © 2013 Raspberry Pi Foundation
; Copyright © 2013 RISC OS Open Ltd
;
; Permission to use, copy, modify, distribute, and sell this software and its
; documentation for any purpose is hereby granted without fee, provided that
; the above copyright notice appear in all copies and that both that
; copyright notice and this permission notice appear in supporting
; documentation, and that the name of the copyright holders not be used in
; advertising or publicity pertaining to distribution of the software without
; specific, written prior permission. The copyright holders make no
; representations about the suitability of this software for any purpose. It
; is provided "as is" without express or implied warranty.
;
; THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
; SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
; FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
; SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
; OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
; SOFTWARE.
;
; Debug options
GBLL DebugData
;DebugData SETL {TRUE}
GBLL DebugPld
;DebugPld SETL {TRUE}
GBLL VerboseBuild
;VerboseBuild SETL {TRUE}
GET BitBltArmSimdAsm.hdr
AREA |BitBltArmSimdPixPaint$$Code|, CODE, READONLY
ARM
; ********************************************************************
MACRO
PixPaint1_1_32bits $src, $dst, $fixed_skew
[ "$dst" = "memory"
; Operate on memory, referenced by src/dst registers
Read1Word src, 0, carry, $fixed_skew, skew, $wk1
ADD dst, dst, #1*4
TEQ $wk0, #0
BEQ %FT01 ; don't read or write dest unless we really have to
LDR $wk1, [dst, #-4]
ORR $wk0, $wk0, $wk1
Write1Word dst, 0
01
|
; Operate in registers, input from $src/$dst, return in $src
ORR $src, $src, $dst
]
MEND
MACRO
PixPaint1_1_64bits $src, $fixed_skew
Read2Words src, 0, carry, $fixed_skew, skew, $wk2
ADD dst, dst, #2*4
TEQ $wk0, #0
TEQEQ $wk1, #0
BEQ %FT01 ; don't read or write dest unless we really have to
LDMDB dst, {$wk2, $wk3}
ORR $wk0, $wk0, $wk2
ORR $wk1, $wk1, $wk3
Write2Words dst, 0
01
MEND
MACRO
PixPaint1_1_128bits_head $src, $fixed_skew, $intra_preloads
Read4Words src, 0, carry, $fixed_skew, skew, $wk6
MEND
MACRO
PixPaint1_1_128bits_tail $src
ADD dst, dst, #4*4
TEQ $wk0, #0
TEQEQ $wk1, #0
TEQEQ $wk2, #0
TEQEQ $wk3, #0
BEQ %FT01 ; don't read or write dest unless we really have to
LDMDB dst, {$wk4, $wk5, $wk6, $wk7}
ORR $wk0, $wk0, $wk4
ORR $wk1, $wk1, $wk5
ORR $wk2, $wk2, $wk6
ORR $wk3, $wk3, $wk7
Write4Words dst, 0
01
MEND
;$op GenerateFunctions $src_bpp, $dst_w_bpp, $qualifier, $flags, $prefetch_distance,
; $work_regs, $line_saved_regs, $leading_pixels_reg, $preload_offset_reg, $init, $newline, $cleanup
PixPaint GenerateFunctions 1, 1,, \
FLAG_DST_READWRITE :OR: FLAG_SPILL_LINE_VARS :OR: FLAG_NO_EXPAND_SKEW :OR: FLAG_PROCESS_PARALLEL :OR: FLAG_NO_PRELOAD_DST, 2, \
"y,stride_d,stride_s,ht_info,map,bitptrs,orig_w,scratch", \
"x,y,stride_d,stride_s,bitptrs", map
; ********************************************************************
MACRO
PixPaint2_2_init
LDR ht, =&55555555
B %FT00
LTORG
00
MEND
MACRO
PixPaint2_2_16pixels $src, $dst, $pattern, $tmp, $src_known_nonzero
ORRS $tmp, $src, $src, LSR #1
[ "$src_known_nonzero" = ""
BEQ %FT03
]
AND $tmp, $tmp, $pattern
ORR $tmp, $tmp, $tmp, LSL #1
BIC $tmp, $dst, $tmp
ORR $src, $src, $tmp
[ "$src_known_nonzero" = ""
B %FT04
03 MOV $src, $dst
04
]
MEND
MACRO
PixPaint2_2_32pixels $src0, $src1, $dst0, $dst1, $pattern, $tmp0, $tmp1
ORR $tmp0, $src0, $src0, LSR #1
ORR $tmp1, $src1, $src1, LSR #1
AND $tmp0, $tmp0, $pattern
AND $tmp1, $tmp1, $pattern
ORR $tmp0, $tmp0, $tmp0, LSL #1
ORR $tmp1, $tmp1, $tmp1, LSL #1
BIC $tmp0, $dst0, $tmp0
BIC $tmp1, $dst1, $tmp1
ORR $src0, $src0, $tmp0
ORR $src1, $src1, $tmp1
MEND
MACRO
PixPaint2_2_32bits $src, $dst, $fixed_skew
[ "$dst" = "memory"
; Operate on memory, referenced by src/dst registers
Read1Word src, 0, carry, $fixed_skew, skew, $wk1
ADD dst, dst, #1*4
TEQ $wk0, #0
BEQ %FT01 ; don't read or write dest unless we really have to
LDR $wk1, [dst, #-4]
PixPaint2_2_16pixels $wk0, $wk1, ht, $wk6, src_known_nonzero
Write1Word dst, 0
01
|
; Operate in registers, input from $src/$dst, return in $src
PixPaint2_2_16pixels $src, $dst, ht, $wk6
]
MEND
MACRO
PixPaint2_2_64bits $src, $fixed_skew
Read2Words src, 0, carry, $fixed_skew, skew, $wk2
ADD dst, dst, #2*4
TEQ $wk0, #0
TEQEQ $wk1, #0
BEQ %FT01 ; don't read or write dest unless we really have to
LDMDB dst, {$wk2, $wk3}
PixPaint2_2_32pixels $wk0, $wk1, $wk2, $wk3, ht, $wk6, $wk7
Write2Words dst, 0
01
MEND
MACRO
PixPaint2_2_128bits_head $src, $fixed_skew, $intra_preloads
Read4Words src, 0, carry, $fixed_skew, skew, $wk6
MEND
MACRO
PixPaint2_2_128bits_tail $src
ADD dst, dst, #4*4
TEQ $wk0, #0
TEQEQ $wk1, #0
TEQEQ $wk2, #0
TEQEQ $wk3, #0
BEQ %FT01 ; don't read or write dest unless we really have to
LDRD $wk4, $wk5, [dst, #-4*4]
PixPaint2_2_32pixels $wk0, $wk1, $wk4, $wk5, ht, $wk6, $wk7
LDRD $wk4, $wk5, [dst, #-2*4]
PixPaint2_2_32pixels $wk2, $wk3, $wk4, $wk5, ht, $wk6, $wk7
Write4Words dst, 0
01
MEND
;$op GenerateFunctions $src_bpp, $dst_w_bpp, $qualifier, $flags, $prefetch_distance,
; $work_regs, $line_saved_regs, $leading_pixels_reg, $preload_offset_reg, $init, $newline, $cleanup
PixPaint GenerateFunctions 2, 2,, \
FLAG_DST_READWRITE :OR: FLAG_SPILL_LINE_VARS :OR: FLAG_NO_EXPAND_SKEW :OR: FLAG_PROCESS_PARALLEL :OR: FLAG_NO_PRELOAD_DST, 2, \
"y,stride_d,stride_s,ht_info,map,bitptrs,orig_w,scratch", \
"x,y,stride_d,stride_s,bitptrs", map,, init
; ********************************************************************
MACRO
PixPaint4_4_init
LDR ht, =&11111111
B %FT00
LTORG
00
MEND
MACRO
PixPaint4_4_8pixels $src, $dst, $pattern, $tmp, $src_known_nonzero
ORRS $tmp, $src, $src, LSR #1
[ "$src_known_nonzero" = ""
BEQ %FT03
]
ORR $tmp, $tmp, $tmp, LSR #2
AND $tmp, $tmp, $pattern
ORR $tmp, $tmp, $tmp, LSL #2
ORR $tmp, $tmp, $tmp, LSL #1
BIC $tmp, $dst, $tmp
ORR $src, $src, $tmp
[ "$src_known_nonzero" = ""
B %FT04
03 MOV $src, $dst
04
]
MEND
MACRO
PixPaint4_4_16pixels $src0, $src1, $dst0, $dst1, $pattern, $tmp0, $tmp1
ORR $tmp0, $src0, $src0, LSR #1
ORR $tmp1, $src1, $src1, LSR #1
ORR $tmp0, $tmp0, $tmp0, LSR #2
ORR $tmp1, $tmp1, $tmp1, LSR #2
AND $tmp0, $tmp0, $pattern
AND $tmp1, $tmp1, $pattern
ORR $tmp0, $tmp0, $tmp0, LSL #2
ORR $tmp1, $tmp1, $tmp1, LSL #2
ORR $tmp0, $tmp0, $tmp0, LSL #1
ORR $tmp1, $tmp1, $tmp1, LSL #1
BIC $tmp0, $dst0, $tmp0
BIC $tmp1, $dst1, $tmp1
ORR $src0, $src0, $tmp0
ORR $src1, $src1, $tmp1
MEND
MACRO
PixPaint4_4_32bits $src, $dst, $fixed_skew
[ "$dst" = "memory"
; Operate on memory, referenced by src/dst registers
Read1Word src, 0, carry, $fixed_skew, skew, $wk1
ADD dst, dst, #1*4
TEQ $wk0, #0
BEQ %FT01 ; don't read or write dest unless we really have to
LDR $wk1, [dst, #-4]
PixPaint4_4_8pixels $wk0, $wk1, ht, $wk6, src_known_nonzero
Write1Word dst, 0
01
|
; Operate in registers, input from $src/$dst, return in $src
PixPaint4_4_8pixels $src, $dst, ht, $wk6
]
MEND
MACRO
PixPaint4_4_64bits $src, $fixed_skew
Read2Words src, 0, carry, $fixed_skew, skew, $wk2
ADD dst, dst, #2*4
TEQ $wk0, #0
TEQEQ $wk1, #0
BEQ %FT01 ; don't read or write dest unless we really have to
LDMDB dst, {$wk2, $wk3}
PixPaint4_4_16pixels $wk0, $wk1, $wk2, $wk3, ht, $wk6, $wk7
Write2Words dst, 0
01
MEND
MACRO
PixPaint4_4_128bits_head $src, $fixed_skew, $intra_preloads
Read4Words src, 0, carry, $fixed_skew, skew, $wk6
MEND
MACRO
PixPaint4_4_128bits_tail $src
ADD dst, dst, #4*4
TEQ $wk0, #0
TEQEQ $wk1, #0
TEQEQ $wk2, #0
TEQEQ $wk3, #0
BEQ %FT01 ; don't read or write dest unless we really have to
LDRD $wk4, $wk5, [dst, #-4*4]
PixPaint4_4_16pixels $wk0, $wk1, $wk4, $wk5, ht, $wk6, $wk7
LDRD $wk4, $wk5, [dst, #-2*4]
PixPaint4_4_16pixels $wk2, $wk3, $wk4, $wk5, ht, $wk6, $wk7
Write4Words dst, 0
01
MEND
;$op GenerateFunctions $src_bpp, $dst_w_bpp, $qualifier, $flags, $prefetch_distance,
; $work_regs, $line_saved_regs, $leading_pixels_reg, $preload_offset_reg, $init, $newline, $cleanup
PixPaint GenerateFunctions 4, 4,, \
FLAG_DST_READWRITE :OR: FLAG_SPILL_LINE_VARS :OR: FLAG_NO_EXPAND_SKEW :OR: FLAG_PROCESS_PARALLEL :OR: FLAG_NO_PRELOAD_DST, 3, \
"y,stride_d,stride_s,ht_info,map,bitptrs,orig_w,scratch", \
"x,y,stride_d,stride_s,bitptrs", map,, init
; ********************************************************************
MACRO
PixPaint8_8_init
MOV ht, #0
MEND
MACRO
PixPaint8_8_4pixels $src, $dst, $zero, $tmp
USUB8 $tmp, $zero, $src ; set GE bit for each zero byte
SEL $src, $dst, $src
MEND
MACRO
PixPaint8_8_32bits $src, $dst, $fixed_skew
[ "$dst" = "memory"
; Operate on memory, referenced by src/dst registers
Read1Word src, 0, carry, $fixed_skew, skew, $wk1
ADD dst, dst, #1*4
TEQ $wk0, #0
BEQ %FT01 ; don't read or write dest unless we really have to
LDR $wk1, [dst, #-4]
PixPaint8_8_4pixels $wk0, $wk1, ht, $wk6
Write1Word dst, 0
01
|
; Operate in registers, input from $src/$dst, return in $src
PixPaint8_8_4pixels $src, $dst, ht, $wk6
]
MEND
MACRO
PixPaint8_8_64bits $src, $fixed_skew
Read2Words src, 0, carry, $fixed_skew, skew, $wk2
ADD dst, dst, #2*4
TEQ $wk0, #0
TEQEQ $wk1, #0
BEQ %FT01 ; don't read or write dest unless we really have to
LDMDB dst, {$wk2, $wk3}
PixPaint8_8_4pixels $wk0, $wk2, ht, $wk6
PixPaint8_8_4pixels $wk1, $wk3, ht, $wk6
Write2Words dst, 0
01
MEND
MACRO
PixPaint8_8_128bits_head $src, $fixed_skew, $intra_preloads
Read4Words src, 0, carry, $fixed_skew, skew, $wk6
MEND
MACRO
PixPaint8_8_128bits_tail $src
ADD dst, dst, #4*4
TEQ $wk0, #0
TEQEQ $wk1, #0
TEQEQ $wk2, #0
TEQEQ $wk3, #0
BEQ %FT01 ; don't read or write dest unless we really have to
LDRD $wk5, $wk6, [dst, #-4*4]
PixPaint8_8_4pixels $wk0, $wk5, ht, $wk4
PixPaint8_8_4pixels $wk1, $wk6, ht, $wk4
LDRD $wk5, $wk6, [dst, #-2*4]
PixPaint8_8_4pixels $wk2, $wk5, ht, $wk4
PixPaint8_8_4pixels $wk3, $wk6, ht, $wk4
Write4Words dst, 0
01
MEND
;$op GenerateFunctions $src_bpp, $dst_w_bpp, $qualifier, $flags, $prefetch_distance,
; $work_regs, $line_saved_regs, $leading_pixels_reg, $preload_offset_reg, $init, $newline, $cleanup
PixPaint GenerateFunctions 8, 8,, \
FLAG_DST_READWRITE :OR: FLAG_SPILL_LINE_VARS :OR: FLAG_PROCESS_PARALLEL :OR: FLAG_NO_PRELOAD_DST, 2, \
"stride_d,stride_s,ht_info,map,bitptrs,skew,orig_w", \
"x,stride_d,stride_s", bitptrs, scratch, init
; ********************************************************************
MACRO
PixPaint16_16_init
MOV ht, #0
MEND
MACRO
PixPaint16_16_2pixels $src, $dst, $zero, $tmp
USUB16 $tmp, $zero, $src ; set GE bit pair for each zero halfword
SEL $src, $dst, $src
MEND
MACRO
PixPaint16_16_32bits $src, $dst, $fixed_skew
[ "$dst" = "memory"
; Operate on memory, referenced by src/dst registers
Read1Word src, 0, carry, $fixed_skew, skew, $wk1
ADD dst, dst, #1*4
TEQ $wk0, #0
BEQ %FT01 ; don't read or write dest unless we really have to
LDR $wk1, [dst, #-4]
PixPaint16_16_2pixels $wk0, $wk1, ht, $wk6
Write1Word dst, 0
01
|
; Operate in registers, input from $src/$dst, return in $src
PixPaint16_16_2pixels $src, $dst, ht, $wk6
]
MEND
MACRO
PixPaint16_16_64bits $src, $fixed_skew
Read2Words src, 0, carry, $fixed_skew, skew, $wk2
ADD dst, dst, #2*4
TEQ $wk0, #0
TEQEQ $wk1, #0
BEQ %FT01 ; don't read or write dest unless we really have to
LDMDB dst, {$wk2, $wk3}
PixPaint16_16_2pixels $wk0, $wk2, ht, $wk6
PixPaint16_16_2pixels $wk1, $wk3, ht, $wk6
Write2Words dst, 0
01
MEND
MACRO
PixPaint16_16_128bits_head $src, $fixed_skew, $intra_preloads
Read4Words src, 0, carry, $fixed_skew, skew, $wk6
MEND
MACRO
PixPaint16_16_128bits_tail $src
ADD dst, dst, #4*4
TEQ $wk0, #0
TEQEQ $wk1, #0
TEQEQ $wk2, #0
TEQEQ $wk3, #0
BEQ %FT01 ; don't read or write dest unless we really have to
LDRD $wk5, $wk6, [dst, #-4*4]
PixPaint16_16_2pixels $wk0, $wk5, ht, $wk4
PixPaint16_16_2pixels $wk1, $wk6, ht, $wk4
LDRD $wk5, $wk6, [dst, #-2*4]
PixPaint16_16_2pixels $wk2, $wk5, ht, $wk4
PixPaint16_16_2pixels $wk3, $wk6, ht, $wk4
Write4Words dst, 0
01
MEND
;$op GenerateFunctions $src_bpp, $dst_w_bpp, $qualifier, $flags, $prefetch_distance,
; $work_regs, $line_saved_regs, $leading_pixels_reg, $preload_offset_reg, $init, $newline, $cleanup
PixPaint GenerateFunctions 16, 16,, \
FLAG_DST_READWRITE :OR: FLAG_SPILL_LINE_VARS :OR: FLAG_PROCESS_PARALLEL :OR: FLAG_NO_PRELOAD_DST, 2, \
"stride_d,stride_s,ht_info,map,bitptrs,skew,orig_w", \
"x,stride_d,stride_s", bitptrs, scratch, init
; ********************************************************************
MACRO
PixPaint32_32_32bits $src, $dst, $fixed_skew
Read1Word src, 0, carry, $fixed_skew, skew, $wk1
ADD dst, dst, #1*4
TEQ $wk0, #0
STRNE $wk0, [dst, #-4]
MEND
MACRO
PixPaint32_32_64bits $src, $fixed_skew
Read2Words src, 0, carry, $fixed_skew, skew, $wk2
ADD dst, dst, #2*4
TEQ $wk0, #0
TEQEQ $wk1, #0
BEQ %FT01 ; don't read or write dest unless we really have to
TEQ $wk0, #0
LDREQ $wk0, [dst, #-2*4]
TEQ $wk1, #0
LDREQ $wk1, [dst, #-1*4]
Write2Words dst, 0
01
MEND
MACRO
PixPaint32_32_128bits_head $src, $fixed_skew, $intra_preloads
Read4Words src, 0, carry, $fixed_skew, skew, scratch
MEND
MACRO
PixPaint32_32_128bits_tail $src
ADD dst, dst, #4*4
TEQ $wk0, #0
TEQEQ $wk1, #0
TEQEQ $wk2, #0
TEQEQ $wk3, #0
BEQ %FT01 ; don't read or write dest unless we really have to
TEQ $wk0, #0
LDREQ $wk0, [dst, #-4*4]
TEQ $wk1, #0
LDREQ $wk1, [dst, #-3*4]
TEQ $wk2, #0
LDREQ $wk2, [dst, #-2*4]
TEQ $wk3, #0
LDREQ $wk3, [dst, #-1*4]
Write4Words dst, 0
01
MEND
;$op GenerateFunctions $src_bpp, $dst_w_bpp, $qualifier, $flags, $prefetch_distance,
; $work_regs, $line_saved_regs, $leading_pixels_reg, $preload_offset_reg, $init, $newline, $cleanup
PixPaint GenerateFunctions 32, 32,, \
FLAG_DST_READWRITE :OR: FLAG_PROCESS_PARALLEL :OR: FLAG_NO_PRELOAD_DST, 2, \
"ht,ht_info,map,bitptrs", \
"", bitptrs, scratch
; ********************************************************************
END
|