aboutsummaryrefslogtreecommitdiff
path: root/vendor/golang.org/x/crypto/poly1305/sum_amd64.s
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/golang.org/x/crypto/poly1305/sum_amd64.s')
-rw-r--r--vendor/golang.org/x/crypto/poly1305/sum_amd64.s125
1 files changed, 125 insertions, 0 deletions
diff --git a/vendor/golang.org/x/crypto/poly1305/sum_amd64.s b/vendor/golang.org/x/crypto/poly1305/sum_amd64.s
new file mode 100644
index 0000000..2edae63
--- /dev/null
+++ b/vendor/golang.org/x/crypto/poly1305/sum_amd64.s
@@ -0,0 +1,125 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build amd64,!gccgo,!appengine
+
+#include "textflag.h"
+
+#define POLY1305_ADD(msg, h0, h1, h2) \
+ ADDQ 0(msg), h0; \
+ ADCQ 8(msg), h1; \
+ ADCQ $1, h2; \
+ LEAQ 16(msg), msg
+
+#define POLY1305_MUL(h0, h1, h2, r0, r1, t0, t1, t2, t3) \
+ MOVQ r0, AX; \
+ MULQ h0; \
+ MOVQ AX, t0; \
+ MOVQ DX, t1; \
+ MOVQ r0, AX; \
+ MULQ h1; \
+ ADDQ AX, t1; \
+ ADCQ $0, DX; \
+ MOVQ r0, t2; \
+ IMULQ h2, t2; \
+ ADDQ DX, t2; \
+ \
+ MOVQ r1, AX; \
+ MULQ h0; \
+ ADDQ AX, t1; \
+ ADCQ $0, DX; \
+ MOVQ DX, h0; \
+ MOVQ r1, t3; \
+ IMULQ h2, t3; \
+ MOVQ r1, AX; \
+ MULQ h1; \
+ ADDQ AX, t2; \
+ ADCQ DX, t3; \
+ ADDQ h0, t2; \
+ ADCQ $0, t3; \
+ \
+ MOVQ t0, h0; \
+ MOVQ t1, h1; \
+ MOVQ t2, h2; \
+ ANDQ $3, h2; \
+ MOVQ t2, t0; \
+ ANDQ $0xFFFFFFFFFFFFFFFC, t0; \
+ ADDQ t0, h0; \
+ ADCQ t3, h1; \
+ ADCQ $0, h2; \
+ SHRQ $2, t3, t2; \
+ SHRQ $2, t3; \
+ ADDQ t2, h0; \
+ ADCQ t3, h1; \
+ ADCQ $0, h2
+
+DATA ·poly1305Mask<>+0x00(SB)/8, $0x0FFFFFFC0FFFFFFF
+DATA ·poly1305Mask<>+0x08(SB)/8, $0x0FFFFFFC0FFFFFFC
+GLOBL ·poly1305Mask<>(SB), RODATA, $16
+
+// func poly1305(out *[16]byte, m *byte, mlen uint64, key *[32]key)
+TEXT ·poly1305(SB), $0-32
+ MOVQ out+0(FP), DI
+ MOVQ m+8(FP), SI
+ MOVQ mlen+16(FP), R15
+ MOVQ key+24(FP), AX
+
+ MOVQ 0(AX), R11
+ MOVQ 8(AX), R12
+ ANDQ ·poly1305Mask<>(SB), R11 // r0
+ ANDQ ·poly1305Mask<>+8(SB), R12 // r1
+ XORQ R8, R8 // h0
+ XORQ R9, R9 // h1
+ XORQ R10, R10 // h2
+
+ CMPQ R15, $16
+ JB bytes_between_0_and_15
+
+loop:
+ POLY1305_ADD(SI, R8, R9, R10)
+
+multiply:
+ POLY1305_MUL(R8, R9, R10, R11, R12, BX, CX, R13, R14)
+ SUBQ $16, R15
+ CMPQ R15, $16
+ JAE loop
+
+bytes_between_0_and_15:
+ TESTQ R15, R15
+ JZ done
+ MOVQ $1, BX
+ XORQ CX, CX
+ XORQ R13, R13
+ ADDQ R15, SI
+
+flush_buffer:
+ SHLQ $8, BX, CX
+ SHLQ $8, BX
+ MOVB -1(SI), R13
+ XORQ R13, BX
+ DECQ SI
+ DECQ R15
+ JNZ flush_buffer
+
+ ADDQ BX, R8
+ ADCQ CX, R9
+ ADCQ $0, R10
+ MOVQ $16, R15
+ JMP multiply
+
+done:
+ MOVQ R8, AX
+ MOVQ R9, BX
+ SUBQ $0xFFFFFFFFFFFFFFFB, AX
+ SBBQ $0xFFFFFFFFFFFFFFFF, BX
+ SBBQ $3, R10
+ CMOVQCS R8, AX
+ CMOVQCS R9, BX
+ MOVQ key+24(FP), R8
+ ADDQ 16(R8), AX
+ ADCQ 24(R8), BX
+
+ MOVQ AX, 0(DI)
+ MOVQ BX, 8(DI)
+ RET