/*************************************************************************** * __________ __ ___. * Open \______ \ ____ ____ | | _\_ |__ _______ ___ * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ * \/ \/ \/ \/ \/ * $Id$ * * Copyright (C) 2008 by Jens Arnold * Copyright (C) 2009 by Andrew Mahone * * Optimised replacements for libgcc functions * * Based on: libgcc routines for ARM cpu, additional algorithms from ARM System * Developer's Guide * Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk) * Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005 * Free Software Foundation, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY * KIND, either express or implied. * ****************************************************************************/ #define ASM_FILE #include "global.h" .syntax unified .macro ARM_SDIV32_PRE numerator, divisor, sign /* sign[31] = divisor sign */ ands \sign, \divisor, #1<<31 rsbeq \divisor, \divisor, #0 /* sign[31] = result sign, sign[0:30], C = numerator sign */ eors \sign, \sign, \numerator, asr #32 rsbcs \numerator, \numerator, #0 .endm .macro ARM_SDIV32_POST quotient, remainder, sign movs \sign, \sign, lsl #1 .ifnc "", "\quotient" rsbcs \quotient, \quotient, #0 .endif .ifnc "", "\remainder" rsbmi \remainder, \remainder, #0 .endif .endm #if CPU_ARM_ARCH < 5 .macro ARMV4_UDIV32_BODY numerator, divisor, quotient, remainder, tmp, bits, div0label, return .ifnc "", "\div0label" rsbs \divisor, \divisor, #0 beq \div0label .else rsb \divisor, \divisor, #0 .endif /* This SWAR divider requires a numerator less than 1<<31, because it must be able to shift the remainder left at each step without shifting out topmost bit. Since a shift might be needed for the aligned remainder to exceed the divisor, the topmost bit must be unset at the start to avoid this overflow case. The original numerator is saved so that the result can be corrected after the reduced division completes. */ cmn \numerator, \divisor .ifc "", "\quotient" .ifc "\numerator", "\remainder" .if \return bxcc lr .else b 99f .endif .else bcc 20f .endif .else bcc 20f .endif movs \tmp, \numerator movmi \numerator, \numerator, lsr #1 mov \bits, #30 .set shift, 16 .rept 5 cmn \divisor, \numerator, lsr #shift subcs \bits, \bits, #shift movcs \divisor, \divisor, lsl #shift .set shift, shift >> 1 .endr adds \numerator, \numerator, \divisor subcc \numerator, \numerator, \divisor add pc, pc, \bits, lsl #3 nop .rept 30 adcs \numerator, \divisor, \numerator, lsl #1 subcc \numerator, \numerator, \divisor .endr adc \numerator, \numerator, \numerator movs \tmp, \tmp, asr #1 rsb \bits, \bits, #31 bmi 10f .ifc "", "\quotient" mov \remainder, \numerator, lsr \bits .else .ifc "", "\remainder" mov \divisor, \numerator, lsr \bits eor \quotient, \numerator, \divisor, lsl \bits .else mov \remainder, \numerator, lsr \bits eor \quotient, \numerator, \remainder, lsl \bits .endif .endif .ifne \return bx lr .else b 99f .endif 10: mov \tmp, \numerator, lsr \bits eor \numerator, \numerator, \tmp, lsl \bits sub \bits, \bits, #1 adc \tmp, \tmp, \tmp adds \tmp, \tmp, \divisor, asr \bits .ifnc "", "\quotient" adc \quotient, \numerator, \numerator .endif .ifnc "", "\remainder" subcc \remainder, \tmp, \divisor, asr \bits movcs \remainder, \tmp .endif .ifne \return bx lr .else b 99f .endif 20: .ifnc "", "\remainder" .ifnc "\remainder", "\numerator" mov \remainder, \numerator .endif .endif .ifnc "", "\quotient" mov \quotient, #0 .endif .ifne \return bx lr .else 99: .endif .endm .macro ARMV4_SDIV32_BODY numerator, divisor, quotient, remainder, bits, sign, div0label, return /* When this is wrapped for signed division, the wrapper code will handle inverting the divisor, and also the zero divisor test. */ ARM_SDIV32_PRE \numerator, \divisor, \sign .ifnc "", "\div0label" tst \divisor, \divisor beq \div0label .endif /* This SWAR divider requires a numerator less than 1<<31, because it must be able to shift the remainder left at each step without shifting out topmost bit. With signed inputs, whose absolute value may not exceed 1<<31,this may be accomplished simply by subtracting the divisor before beginning division, and adding 1 to the quotient. */ adds \numerator, \numerator, \divisor bcc 20f mov \bits, #30 .set shift, 16 .rept 5 cmn \divisor, \numerator, lsr #shift subcs \bits, \bits, #shift movcs \divisor, \divisor, lsl #shift .set shift, shift >> 1 .endr adds \numerator, \numerator, \divisor subcc \numerator, \numerator, \divisor add pc, pc, \bits, lsl #3 nop .rept 30 adcs \numerator, \divisor, \numerator, lsl #1 subcc \numerator, \numerator, \divisor .endr rsb \bits, \bits, #31 adc \numerator, \numerator, \numerator .ifc "", "\quotient" mov \remainder, \numerator, lsr \bits .else .ifc "", "\remainder" mov \divisor, \numerator, lsr \bits add \numerator, \numerator, #1 sub \quotient, \numerator, \divisor, lsl \bits .else mov \remainder, \numerator, lsr \bits add \numerator, \numerator, #1 sub \quotient, \numerator, \remainder, lsl \bits .endif .endif .ifne \return ARM_SDIV32_POST \quotient, \remainder, \sign bx lr .else b 99f .endif 20: .ifnc "", "\remainder" sub \remainder, \numerator, \divisor .endif .ifnc "", "\quotient" mov \quotient, #0 .endif .ifne \return ARM_SDIV32_POST "", \remainder, \sign bx lr .else 99: ARM_SDIV32_POST \quotient, \remainder, \sign .endif .endm #else .macro ARMV5_UDIV32_BODY numerator, divisor, quotient, remainder, bits, inv, neg, div0label, return cmp \numerator, \divisor clz \bits, \divisor bcc 30f mov \inv, \divisor, lsl \bits add \neg, pc, \inv, lsr #25 /* Test whether divisor is 2^N */ cmp \inv, #1<<31 /* Load approximate reciprocal */ ldrbhi \inv, [\neg, #.L_udiv_est_table-.-64] bls 20f subs \bits, \bits, #7 rsb \neg, \divisor, #0 /* Scale approximate reciprocal, or else branch to large-divisor path */ movpl \divisor, \inv, lsl \bits bmi 10f /* Newton-Raphson iteration to improve reciprocal accuracy */ mul \inv, \divisor, \neg smlawt \divisor, \divisor, \inv, \divisor mul \inv, \divisor, \neg /* Complete N-R math and produce approximate quotient. Use smmla/smmul on ARMv6. */ #if CPU_ARM_ARCH >= 6 tst \numerator, \numerator smmla \divisor, \divisor, \inv, \divisor /* Branch to large-numerator handler, or else use smmul if sign bit is not set. This wins on average with random numerators, and should be no slower than using umull for small numerator, even if prediction fails. */ bmi 40f smmul \inv, \numerator, \divisor #else /* ARMv5e lacks smmul, so always uses umull. */ mov \bits, #0 smlal \bits, \divisor, \inv, \divisor umull \bits, \inv, \numerator, \divisor #endif /* Calculate remainder and correct result. */ add \numerator, \numerator, \neg .ifnc "", "\remainder" mla \remainder, \inv, \neg, \numerator .ifnc "", "\quotient" mov \quotient, \inv cmn \remainder, \neg subcs \remainder, \remainder, \neg addpl \remainder, \remainder, \neg, lsl #1 addcc \quotient, \quotient, #1 addpl \quotient, \quotient, #2 .else cmn \remainder, \neg subcs \remainder, \remainder, \neg addpl \remainder, \remainder, \neg, lsl #1 .endif .else mla \divisor, \inv, \neg, \numerator mov \quotient, \inv cmn \divisor, \neg addcc \quotient, \quotient, #1 addpl \quotient, \quotient, #2 .endif .if \return bx lr .else b 99f .endif 10: /* Very large divisors can be handled without further improving the reciprocal. First the reciprocal must be reduced to ensure that it underestimates the correct value. */ rsb \bits, \bits, #0 sub \inv, \inv, #4 mov \divisor, \inv, lsr \bits /* Calculate approximate quotient and remainder */ umull \bits, \inv, \numerator, \divisor /* Correct quotient and remainder */ .ifnc "", "\remainder" mla \remainder, \inv, \neg, \numerator .ifnc "", "\quotient" mov \quotient, \inv cmn \neg, \remainder, lsr #1 addcs \remainder, \remainder, \neg, lsl #1 addcs \quotient, \quotient, #2 cmn \neg, \remainder addcs \remainder, \remainder, \neg addcs \quotient, \quotient, #1 .else cmn \neg, \remainder, lsr #1 addcs \remainder, \remainder, \neg, lsl #1 cmn \neg, \remainder addcs \remainder, \remainder, \neg .endif .else mla \divisor, \inv, \neg, \numerator mov \quotient, \inv cmn \neg, \divisor, lsr #1 addcs \divisor, \divisor, \neg, lsl #1 addcs \quotient, \quotient, #2 cmn \neg, \divisor addcs \quotient, \quotient, #1 .endif .if \return bx lr .else b 99f .endif 20: /* Handle division by powers of two by shifting right. Mod is handled by using divisor-1 as a bitmask. */ .ifnc "", "\remainder" .ifnc "", "\div0label" bne \div0label .endif .ifnc "", "\quotient" sub \divisor, \divisor, #1 rsb \bits, \bits, #31 and \remainder, \numerator, \divisor mov \quotient, \numerator, lsr \bits .else sub \divisor, \divisor, #1 and \remainder, \numerator, \divisor .endif .else rsb \bits, \bits, #31 .ifnc "", "\div0label" bne \div0label .endif mov \quotient, \numerator, lsr \bits .endif .if \return bx lr .else b 99f .endif 30: /* Handle numerator < divisor - quotient is zero, remainder is numerator, which must be restored to its original value on ARMv6. */ .ifnc "", "\remainder" mov \remainder, \numerator .endif .ifnc "", "\quotient" mov \quotient, #0 .endif .if \return bx lr .endif #if CPU_ARM_ARCH >= 6 40: /* Handle large (sign bit set) numerators. Works exactly as the ARMv5e code above 10:. */ umull \bits, \inv, \numerator, \divisor add \numerator, \numerator, \neg .ifnc "", "\remainder" mla \remainder, \inv, \neg, \numerator .ifnc "", "\quotient" mla \remainder, \inv, \neg, \numerator mov \quotient, \inv cmn \remainder, \neg subcs \remainder, \remainder, \neg addpl \remainder, \remainder, \neg, lsl #1 addcc \quotient, \quotient, #1 addpl \quotient, \quotient, #2 .else cmn \remainder, \neg subcs \remainder, \remainder, \neg addpl \remainder, \remainder, \neg, lsl #1 .endif .else mla \divisor, \inv, \neg, \numerator mov \quotient, \inv cmn \divisor, \neg addcc \quotient, \quotient, #1 addpl \quotient, \quotient, #2 .endif .if \return bx lr .else b 99f .endif #endif 99: .endm .macro ARMV5_SDIV32_BODY numerator, divisor, quotient, remainder, bits, inv, neg, sign, div0label, return /* sign[31] = divisor sign */ ands \sign, \divisor, #1<<31 rsbne \divisor, \divisor, #0 /* sign[31] = result sign, sign[0:30], C = numerator sign */ eors \sign, \sign, \numerator, asr #32 clz \bits, \divisor rsbcs \numerator, \numerator, #0 /* On ARMv6, subtract divisor before performing division, which ensures numerator sign bit is clear and smmul may be used in place of umull. The fixup for the results can be fit entirely into existing delay slots on the main division paths. It costs 1c in the num