1 /* -*- coding: utf-8 -*-
2 * ecc-mont.c - Elliptic curve computation for
3 * the Montgomery curve: y^2 = x^3 + 486662*x^2 + x.
5 * Copyright (C) 2014, 2015 Free Software Initiative of Japan
6 * Author: NIIBE Yutaka <gniibe@fsij.org>
8 * This file is a part of Gnuk, a GnuPG USB Token implementation.
10 * Gnuk is free software: you can redistribute it and/or modify it
11 * under the terms of the GNU General Public License as published by
12 * the Free Software Foundation, either version 3 of the License, or
13 * (at your option) any later version.
15 * Gnuk is distributed in the hope that it will be useful, but WITHOUT
16 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
17 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
18 * License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program. If not, see <http://www.gnu.org/licenses/>.
35 * [1] D. J. Bernstein. Curve25519: new Diffie-Hellman speed records.
36 * Proceedings of PKC 2006, to appear.
37 * http://cr.yp.to/papers.html#curve25519. Date: 2006.02.09.
39 * [2] D. J. Bernstein. Can we avoid tests for zero in fast
40 * elliptic-curve arithmetic?
41 * http://cr.yp.to/papers.html#curvezero. Date: 2006.07.26.
48 * (0) We assume that the processor has no cache, nor branch target
49 * prediction. Thus, we don't avoid indexing by secret value.
50 * We don't avoid conditional jump if both cases have same timing,
53 * (1) We use Radix-32 field arithmetic. It's a representation like
54 * 2^256-38, but it's more redundant. For example, "1" can be
55 * represented in three ways in 256-bit: 1, 2^255-18, and
58 * (2) We use Montgomery double-and-add.
62 #ifndef BN256_C_IMPLEMENTATION
63 #define ASM_IMPLEMENTATION 1
68 * 1 1101 1011 0100 0001
71 mod25638_mul_121665 (bn256 *x, const bn256 *a)
73 #if ASM_IMPLEMENTATION
74 #include "muladd_256.h"
82 memset (d, 0, sizeof (bn256));
84 MULADD_256_ASM (s, d, w, c);
89 c = c1 = bn256_shift (m, a, 6); c += bn256_add (x, a, m);
90 c1 <<= 2; c1 |= bn256_shift (m, m, 2); c = c + c1 + bn256_add (x, x, m);
91 c1 <<= 1; c1 |= bn256_shift (m, m, 1); c = c + c1 + bn256_add (x, x, m);
92 c1 <<= 2; c1 |= bn256_shift (m, m, 2); c = c + c1 + bn256_add (x, x, m);
93 c1 <<= 1; c1 |= bn256_shift (m, m, 1); c = c + c1 + bn256_add (x, x, m);
94 c1 <<= 2; c1 |= bn256_shift (m, m, 2); c = c + c1 + bn256_add (x, x, m);
95 c1 <<= 1; c1 |= bn256_shift (m, m, 1); c = c + c1 + bn256_add (x, x, m);
96 c1 <<= 1; c1 |= bn256_shift (m, m, 1); c = c + c1 + bn256_add (x, x, m);
98 c = bn256_add_uint (x, x, c*38);
111 * @brief Process Montgomery double-and-add
113 * With Q0, Q1, DIF (= Q0 - Q1), compute PRD = 2Q0, SUM = Q0 + Q1
114 * Q0 and Q1 are clobbered.
118 mont_d_and_a (pt *prd, pt *sum, pt *q0, pt *q1, const bn256 *dif_x)
120 mod25638_add (sum->x, q1->x, q1->z);
121 mod25638_sub (q1->z, q1->x, q1->z);
122 mod25638_add (prd->x, q0->x, q0->z);
123 mod25638_sub (q0->z, q0->x, q0->z);
124 mod25638_mul (q1->x, q0->z, sum->x);
125 mod25638_mul (q1->z, prd->x, q1->z);
126 mod25638_sqr (q0->x, prd->x);
127 mod25638_sqr (q0->z, q0->z);
128 mod25638_add (sum->x, q1->x, q1->z);
129 mod25638_sub (q1->z, q1->x, q1->z);
130 mod25638_mul (prd->x, q0->x, q0->z);
131 mod25638_sub (q0->z, q0->x, q0->z);
132 mod25638_sqr (sum->x, sum->x);
133 mod25638_sqr (sum->z, q1->z);
134 mod25638_mul_121665 (prd->z, q0->z);
135 mod25638_mul (sum->z, sum->z, dif_x);
136 mod25638_add (prd->z, q0->x, prd->z);
137 mod25638_mul (prd->z, prd->z, q0->z);
142 * @brief RES = x-coordinate of [n]Q
144 * @param N Scalar N (three least significant bits are 000)
145 * @param Q_X x-coordinate of Q
149 compute_nQ (bn256 *res, const bn256 *n, const bn256 *q_x)
152 pt p0[1], p1[1], p0_[1], p1_[1];
155 memset (p0->x, 0, sizeof (bn256));
157 memset (p0->z, 0, sizeof (bn256));
160 memcpy (p1->x, q_x, sizeof (bn256));
161 memset (p1->z, 0, sizeof (bn256));
164 for (i = 0; i < 8; i++)
166 uint32_t u = n->word[7-i];
168 for (j = 0; j < 16; j++)
173 if ((u & 0x80000000))
174 q0 = p1, q1 = p0, sum_n = p0_, prd_n = p1_;
176 q0 = p0, q1 = p1, sum_n = p1_, prd_n = p0_;
177 mont_d_and_a (prd_n, sum_n, q0, q1, q_x);
179 if ((u & 0x40000000))
180 q0 = p1_, q1 = p0_, sum_n = p0, prd_n = p1;
182 q0 = p0_, q1 = p1_, sum_n = p1, prd_n = p0;
183 mont_d_and_a (prd_n, sum_n, q0, q1, q_x);
189 /* We know the LSB of N is always 0. Thus, result is always in P0. */
191 * p0->z may be zero here, but our mod_inv doesn't raise error for 0,
192 * but returns 0 (like the implementation of z^(p-2)), thus, RES will
193 * be 0 in that case, which is correct value.
195 mod_inv (res, p0->z, p25519);
196 mod25638_mul (res, res, p0->x);
197 mod25519_reduce (res);
202 ecdh_compute_public_25519 (const uint8_t *key_data)
208 memset (gx, 0, sizeof (bn256));
209 gx[0].word[0] = 9; /* Gx = 9 */
210 memcpy (k, key_data, sizeof (bn256));
211 p = (uint8_t *)malloc (sizeof (bn256));
215 compute_nQ ((bn256 *)p, k, gx);
220 ecdh_decrypt_curve25519 (const uint8_t *input, uint8_t *output,
221 const uint8_t *key_data)
227 memcpy (q_x, input, sizeof (bn256));
228 memcpy (k, key_data, sizeof (bn256));
229 compute_nQ (shared, k, q_x);
230 memcpy (output, shared, sizeof (bn256));