a bit improvement on bignum multiply
authorNIIBE Yutaka <gniibe@fsij.org>
Fri, 20 Dec 2013 02:55:46 +0000 (11:55 +0900)
committerNIIBE Yutaka <gniibe@fsij.org>
Fri, 20 Dec 2013 02:55:46 +0000 (11:55 +0900)
ChangeLog
polarssl/include/polarssl/bn_mul.h

index 5e0b86a..5bbfaad 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2013-12-20  Niibe Yutaka  <gniibe@fsij.org>
+
+       * polarssl/include/polarssl/bn_mul.h (MULADDC_1024_CORE)
+       (MULADDC_CORE): Reorder instructions for more speed up.
+
 2013-12-19  Niibe Yutaka  <gniibe@fsij.org>
 
        * src/configure (--enable-hid-card-change): New (experimental).
index 444c503..9dd5bf6 100644 (file)
 #define MULADDC_1024_CORE                        \
        "ldmia  %0!, { r5, r6, r7 }  \n\t"        \
        "ldmia  %1, { r8, r9, r10 }  \n\t"        \
-       "umull  r11, r12, %4, r5     \n\t"        \
-       "adcs   r11, r11, %2         \n\t"        \
+       "adcs   r8, r8, %2           \n\t"        \
+       "umull  r11, r12, r5, %4     \n\t"        \
        "adc    %2, r12, #0          \n\t"        \
        "adds   r8, r8, r11          \n\t"        \
-       "umull  r11, r12, %4, r6     \n\t"        \
-       "adcs   r11, r11, %2         \n\t"        \
+       "adcs   r9, r9, %2           \n\t"        \
+       "umull  r11, r12, r6, %4     \n\t"        \
        "adc    %2, r12, #0          \n\t"        \
        "adds   r9, r9, r11          \n\t"        \
-       "umull  r11, r12, %4, r7     \n\t"        \
-       "adcs   r11, r11, %2         \n\t"        \
+       "adcs   r10, r10, %2         \n\t"        \
+       "umull  r11, r12, r7, %4     \n\t"        \
        "adc    %2, r12, #0          \n\t"        \
        "adds   r10, r10, r11        \n\t"        \
        "stmia  %1!, { r8, r9, r10 } \n\t"
 "1:    sub    %3, %3, #32          \n\t"        \
        "ldmia  %0!, { r5, r6, r7 }  \n\t"        \
        "ldmia  %1, { r8, r9, r10 }  \n\t"        \
+       "adds   r8, r8, %2           \n\t"        \
        "umull  r11, r12, %4, r5     \n\t"        \
-       "adds   r11, r11, %2         \n\t"        \
        "adc    %2, r12, #0          \n\t"        \
        "adds   r8, r8, r11          \n\t"        \
+       "adcs   r9, r9, %2           \n\t"        \
        "umull  r11, r12, %4, r6     \n\t"        \
-       "adcs   r11, r11, %2         \n\t"        \
        "adc    %2, r12, #0          \n\t"        \
        "adds   r9, r9, r11          \n\t"        \
+       "adcs   r10, r10, %2         \n\t"        \
        "umull  r11, r12, %4, r7     \n\t"        \
-       "adcs   r11, r11, %2         \n\t"        \
        "adc    %2, r12, #0          \n\t"        \
        "adds   r10, r10, r11        \n\t"        \
        "stmia  %1!, { r8, r9, r10 } \n\t"        \
        MULADDC_1024_CORE                         \
        "ldmia  %0!, { r5, r6 }      \n\t"        \
        "ldmia  %1, { r8, r9 }       \n\t"        \
+       "adcs   r8, r8, %2           \n\t"        \
        "umull  r11, r12, %4, r5     \n\t"        \
-       "adcs   r11, r11, %2         \n\t"        \
        "adc    %2, r12, #0          \n\t"        \
        "adds   r8, r8, r11          \n\t"        \
+       "adcs   r9, r9, %2           \n\t"        \
        "umull  r11, r12, %4, r6     \n\t"        \
-       "adcs   r11, r11, %2         \n\t"        \
        "adc    %2, r12, #0          \n\t"        \
        "adds   r9, r9, r11          \n\t"        \
        "stmia  %1!, { r8, r9 }      \n\t"        \
 #define MULADDC_CORE                                   \
                   "ldr    r5, [%1]        \n\t"        \
                   "ldr    r4, [%0], #4    \n\t"        \
+                  "adcs   r5, r5, %2      \n\t"        \
                   "umull  r6, r7, %3, r4  \n\t"        \
-                  "adcs   r6, r6, %2      \n\t"        \
                   "adc    %2, r7, #0      \n\t"        \
                   "adds   r5, r5, r6      \n\t"        \
                   "str    r5, [%1], #4    \n\t"