more tweak
authorNIIBE Yutaka <gniibe@fsij.org>
Tue, 24 Dec 2013 04:46:22 +0000 (13:46 +0900)
committerNIIBE Yutaka <gniibe@fsij.org>
Tue, 24 Dec 2013 04:46:22 +0000 (13:46 +0900)
polarssl/include/polarssl/bn_mul.h
polarssl/library/bignum.c

index 504c70a..dfb7a9b 100644 (file)
   asm( "tst    %3, #0xfe0           \n\t"        \
        "beq    0f                   \n"          \
 "1:    sub    %3, %3, #32          \n\t"        \
-       "ldmia  %0!, { r5, r6, r7 }  \n\t"        \
-       "ldmia  %1, { r8, r9, r10 }  \n\t"        \
-       "adds   r8, r8, %2           \n\t"        \
-       "umull  r11, r12, %4, r5     \n\t"        \
+       "ldmia  %0!, { r8, r9, r10 } \n\t"        \
+       "ldmia  %1, { r5, r6, r7 }   \n\t"        \
+       "adds   r5, r5, %2           \n\t"        \
+       "umull  r11, r12, %4, r8     \n\t"        \
        "adc    %2, r12, #0          \n\t"        \
-       "adds   r8, r8, r11          \n\t"        \
-       "adcs   r9, r9, %2           \n\t"        \
-       "umull  r11, r12, %4, r6     \n\t"        \
+       "adds   r5, r5, r11          \n\t"        \
+       "adcs   r6, r6, %2           \n\t"        \
+       "umull  r11, r12, %4, r9     \n\t"        \
        "adc    %2, r12, #0          \n\t"        \
-       "adds   r9, r9, r11          \n\t"        \
-       "adcs   r10, r10, %2         \n\t"        \
-       "umull  r11, r12, %4, r    \n\t"        \
+       "adds   r6, r6, r11          \n\t"        \
+       "adcs   r7, r7, %2           \n\t"        \
+       "umull  r11, r12, %4, r10    \n\t"        \
        "adc    %2, r12, #0          \n\t"        \
-       "adds   r10, r10, r11        \n\t"        \
-       "stmia  %1!, { r8, r9, r10 } \n\t"        \
+       "adds   r7, r7, r11          \n\t"        \
+       "stmia  %1!, { r5, r6, r7 }  \n\t"        \
        MULADDC_1024_CORE MULADDC_1024_CORE       \
        MULADDC_1024_CORE MULADDC_1024_CORE       \
        MULADDC_1024_CORE MULADDC_1024_CORE       \
index 684445c..ea5b828 100644 (file)
@@ -1471,7 +1471,7 @@ static void mpi_montsqr( size_t n, const t_uint *np, t_uint mm, t_uint *d )
            "umull  r6, r11, %[x_i], %[x_i]\n\t"
            "adds   r5, r5, r6\n\t"
            "adc    r4, r8, r11\n\t"
-           "subs   r11, %[xj], %[x_max1]\n\t" /* could use "CMP" but slower */
+           "cmp    %[xj], %[x_max1]\n\t"
            "str    r5, [%[wij]], #4\n\t"
            "beq    1f\n\t"
            "bhi    0f\n"