From 8c12c6939aab9106db14ec2d11d983bc5b29fb2c Mon Sep 17 00:00:00 2001 From: Niall Sheridan Date: Sun, 7 Jul 2019 21:33:44 +0100 Subject: Switch to modules --- vendor/golang.org/x/crypto/poly1305/sum_s390x.s | 400 ------------------------ 1 file changed, 400 deletions(-) delete mode 100644 vendor/golang.org/x/crypto/poly1305/sum_s390x.s (limited to 'vendor/golang.org/x/crypto/poly1305/sum_s390x.s') diff --git a/vendor/golang.org/x/crypto/poly1305/sum_s390x.s b/vendor/golang.org/x/crypto/poly1305/sum_s390x.s deleted file mode 100644 index 356c07a..0000000 --- a/vendor/golang.org/x/crypto/poly1305/sum_s390x.s +++ /dev/null @@ -1,400 +0,0 @@ -// Copyright 2018 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build s390x,go1.11,!gccgo,!appengine - -#include "textflag.h" - -// Implementation of Poly1305 using the vector facility (vx). - -// constants -#define MOD26 V0 -#define EX0 V1 -#define EX1 V2 -#define EX2 V3 - -// temporaries -#define T_0 V4 -#define T_1 V5 -#define T_2 V6 -#define T_3 V7 -#define T_4 V8 - -// key (r) -#define R_0 V9 -#define R_1 V10 -#define R_2 V11 -#define R_3 V12 -#define R_4 V13 -#define R5_1 V14 -#define R5_2 V15 -#define R5_3 V16 -#define R5_4 V17 -#define RSAVE_0 R5 -#define RSAVE_1 R6 -#define RSAVE_2 R7 -#define RSAVE_3 R8 -#define RSAVE_4 R9 -#define R5SAVE_1 V28 -#define R5SAVE_2 V29 -#define R5SAVE_3 V30 -#define R5SAVE_4 V31 - -// message block -#define F_0 V18 -#define F_1 V19 -#define F_2 V20 -#define F_3 V21 -#define F_4 V22 - -// accumulator -#define H_0 V23 -#define H_1 V24 -#define H_2 V25 -#define H_3 V26 -#define H_4 V27 - -GLOBL ·keyMask<>(SB), RODATA, $16 -DATA ·keyMask<>+0(SB)/8, $0xffffff0ffcffff0f -DATA ·keyMask<>+8(SB)/8, $0xfcffff0ffcffff0f - -GLOBL ·bswapMask<>(SB), RODATA, $16 -DATA ·bswapMask<>+0(SB)/8, $0x0f0e0d0c0b0a0908 -DATA ·bswapMask<>+8(SB)/8, $0x0706050403020100 - -GLOBL ·constants<>(SB), RODATA, $64 -// MOD26 -DATA ·constants<>+0(SB)/8, $0x3ffffff -DATA ·constants<>+8(SB)/8, $0x3ffffff -// EX0 -DATA ·constants<>+16(SB)/8, $0x0006050403020100 -DATA ·constants<>+24(SB)/8, $0x1016151413121110 -// EX1 -DATA ·constants<>+32(SB)/8, $0x060c0b0a09080706 -DATA ·constants<>+40(SB)/8, $0x161c1b1a19181716 -// EX2 -DATA ·constants<>+48(SB)/8, $0x0d0d0d0d0d0f0e0d -DATA ·constants<>+56(SB)/8, $0x1d1d1d1d1d1f1e1d - -// h = (f*g) % (2**130-5) [partial reduction] -#define MULTIPLY(f0, f1, f2, f3, f4, g0, g1, g2, g3, g4, g51, g52, g53, g54, h0, h1, h2, h3, h4) \ - VMLOF f0, g0, h0 \ - VMLOF f0, g1, h1 \ - VMLOF f0, g2, h2 \ - VMLOF f0, g3, h3 \ - VMLOF f0, g4, h4 \ - VMLOF f1, g54, T_0 \ - VMLOF f1, g0, T_1 \ - VMLOF f1, g1, T_2 \ - VMLOF f1, g2, T_3 \ - VMLOF f1, g3, T_4 \ - VMALOF f2, g53, h0, h0 \ - VMALOF f2, g54, h1, h1 \ - VMALOF f2, g0, h2, h2 \ - VMALOF f2, g1, h3, h3 \ - VMALOF f2, g2, h4, h4 \ - VMALOF f3, g52, T_0, T_0 \ - VMALOF f3, g53, T_1, T_1 \ - VMALOF f3, g54, T_2, T_2 \ - VMALOF f3, g0, T_3, T_3 \ - VMALOF f3, g1, T_4, T_4 \ - VMALOF f4, g51, h0, h0 \ - VMALOF f4, g52, h1, h1 \ - VMALOF f4, g53, h2, h2 \ - VMALOF f4, g54, h3, h3 \ - VMALOF f4, g0, h4, h4 \ - VAG T_0, h0, h0 \ - VAG T_1, h1, h1 \ - VAG T_2, h2, h2 \ - VAG T_3, h3, h3 \ - VAG T_4, h4, h4 - -// carry h0->h1 h3->h4, h1->h2 h4->h0, h0->h1 h2->h3, h3->h4 -#define REDUCE(h0, h1, h2, h3, h4) \ - VESRLG $26, h0, T_0 \ - VESRLG $26, h3, T_1 \ - VN MOD26, h0, h0 \ - VN MOD26, h3, h3 \ - VAG T_0, h1, h1 \ - VAG T_1, h4, h4 \ - VESRLG $26, h1, T_2 \ - VESRLG $26, h4, T_3 \ - VN MOD26, h1, h1 \ - VN MOD26, h4, h4 \ - VESLG $2, T_3, T_4 \ - VAG T_3, T_4, T_4 \ - VAG T_2, h2, h2 \ - VAG T_4, h0, h0 \ - VESRLG $26, h2, T_0 \ - VESRLG $26, h0, T_1 \ - VN MOD26, h2, h2 \ - VN MOD26, h0, h0 \ - VAG T_0, h3, h3 \ - VAG T_1, h1, h1 \ - VESRLG $26, h3, T_2 \ - VN MOD26, h3, h3 \ - VAG T_2, h4, h4 - -// expand in0 into d[0] and in1 into d[1] -#define EXPAND(in0, in1, d0, d1, d2, d3, d4) \ - VGBM $0x0707, d1 \ // d1=tmp - VPERM in0, in1, EX2, d4 \ - VPERM in0, in1, EX0, d0 \ - VPERM in0, in1, EX1, d2 \ - VN d1, d4, d4 \ - VESRLG $26, d0, d1 \ - VESRLG $30, d2, d3 \ - VESRLG $4, d2, d2 \ - VN MOD26, d0, d0 \ - VN MOD26, d1, d1 \ - VN MOD26, d2, d2 \ - VN MOD26, d3, d3 - -// pack h4:h0 into h1:h0 (no carry) -#define PACK(h0, h1, h2, h3, h4) \ - VESLG $26, h1, h1 \ - VESLG $26, h3, h3 \ - VO h0, h1, h0 \ - VO h2, h3, h2 \ - VESLG $4, h2, h2 \ - VLEIB $7, $48, h1 \ - VSLB h1, h2, h2 \ - VO h0, h2, h0 \ - VLEIB $7, $104, h1 \ - VSLB h1, h4, h3 \ - VO h3, h0, h0 \ - VLEIB $7, $24, h1 \ - VSRLB h1, h4, h1 - -// if h > 2**130-5 then h -= 2**130-5 -#define MOD(h0, h1, t0, t1, t2) \ - VZERO t0 \ - VLEIG $1, $5, t0 \ - VACCQ h0, t0, t1 \ - VAQ h0, t0, t0 \ - VONE t2 \ - VLEIG $1, $-4, t2 \ - VAQ t2, t1, t1 \ - VACCQ h1, t1, t1 \ - VONE t2 \ - VAQ t2, t1, t1 \ - VN h0, t1, t2 \ - VNC t0, t1, t1 \ - VO t1, t2, h0 - -// func poly1305vx(out *[16]byte, m *byte, mlen uint64, key *[32]key) -TEXT ·poly1305vx(SB), $0-32 - // This code processes up to 2 blocks (32 bytes) per iteration - // using the algorithm described in: - // NEON crypto, Daniel J. Bernstein & Peter Schwabe - // https://cryptojedi.org/papers/neoncrypto-20120320.pdf - LMG out+0(FP), R1, R4 // R1=out, R2=m, R3=mlen, R4=key - - // load MOD26, EX0, EX1 and EX2 - MOVD $·constants<>(SB), R5 - VLM (R5), MOD26, EX2 - - // setup r - VL (R4), T_0 - MOVD $·keyMask<>(SB), R6 - VL (R6), T_1 - VN T_0, T_1, T_0 - EXPAND(T_0, T_0, R_0, R_1, R_2, R_3, R_4) - - // setup r*5 - VLEIG $0, $5, T_0 - VLEIG $1, $5, T_0 - - // store r (for final block) - VMLOF T_0, R_1, R5SAVE_1 - VMLOF T_0, R_2, R5SAVE_2 - VMLOF T_0, R_3, R5SAVE_3 - VMLOF T_0, R_4, R5SAVE_4 - VLGVG $0, R_0, RSAVE_0 - VLGVG $0, R_1, RSAVE_1 - VLGVG $0, R_2, RSAVE_2 - VLGVG $0, R_3, RSAVE_3 - VLGVG $0, R_4, RSAVE_4 - - // skip r**2 calculation - CMPBLE R3, $16, skip - - // calculate r**2 - MULTIPLY(R_0, R_1, R_2, R_3, R_4, R_0, R_1, R_2, R_3, R_4, R5SAVE_1, R5SAVE_2, R5SAVE_3, R5SAVE_4, H_0, H_1, H_2, H_3, H_4) - REDUCE(H_0, H_1, H_2, H_3, H_4) - VLEIG $0, $5, T_0 - VLEIG $1, $5, T_0 - VMLOF T_0, H_1, R5_1 - VMLOF T_0, H_2, R5_2 - VMLOF T_0, H_3, R5_3 - VMLOF T_0, H_4, R5_4 - VLR H_0, R_0 - VLR H_1, R_1 - VLR H_2, R_2 - VLR H_3, R_3 - VLR H_4, R_4 - - // initialize h - VZERO H_0 - VZERO H_1 - VZERO H_2 - VZERO H_3 - VZERO H_4 - -loop: - CMPBLE R3, $32, b2 - VLM (R2), T_0, T_1 - SUB $32, R3 - MOVD $32(R2), R2 - EXPAND(T_0, T_1, F_0, F_1, F_2, F_3, F_4) - VLEIB $4, $1, F_4 - VLEIB $12, $1, F_4 - -multiply: - VAG H_0, F_0, F_0 - VAG H_1, F_1, F_1 - VAG H_2, F_2, F_2 - VAG H_3, F_3, F_3 - VAG H_4, F_4, F_4 - MULTIPLY(F_0, F_1, F_2, F_3, F_4, R_0, R_1, R_2, R_3, R_4, R5_1, R5_2, R5_3, R5_4, H_0, H_1, H_2, H_3, H_4) - REDUCE(H_0, H_1, H_2, H_3, H_4) - CMPBNE R3, $0, loop - -finish: - // sum vectors - VZERO T_0 - VSUMQG H_0, T_0, H_0 - VSUMQG H_1, T_0, H_1 - VSUMQG H_2, T_0, H_2 - VSUMQG H_3, T_0, H_3 - VSUMQG H_4, T_0, H_4 - - // h may be >= 2*(2**130-5) so we need to reduce it again - REDUCE(H_0, H_1, H_2, H_3, H_4) - - // carry h1->h4 - VESRLG $26, H_1, T_1 - VN MOD26, H_1, H_1 - VAQ T_1, H_2, H_2 - VESRLG $26, H_2, T_2 - VN MOD26, H_2, H_2 - VAQ T_2, H_3, H_3 - VESRLG $26, H_3, T_3 - VN MOD26, H_3, H_3 - VAQ T_3, H_4, H_4 - - // h is now < 2*(2**130-5) - // pack h into h1 (hi) and h0 (lo) - PACK(H_0, H_1, H_2, H_3, H_4) - - // if h > 2**130-5 then h -= 2**130-5 - MOD(H_0, H_1, T_0, T_1, T_2) - - // h += s - MOVD $·bswapMask<>(SB), R5 - VL (R5), T_1 - VL 16(R4), T_0 - VPERM T_0, T_0, T_1, T_0 // reverse bytes (to big) - VAQ T_0, H_0, H_0 - VPERM H_0, H_0, T_1, H_0 // reverse bytes (to little) - VST H_0, (R1) - - RET - -b2: - CMPBLE R3, $16, b1 - - // 2 blocks remaining - SUB $17, R3 - VL (R2), T_0 - VLL R3, 16(R2), T_1 - ADD $1, R3 - MOVBZ $1, R0 - CMPBEQ R3, $16, 2(PC) - VLVGB R3, R0, T_1 - EXPAND(T_0, T_1, F_0, F_1, F_2, F_3, F_4) - CMPBNE R3, $16, 2(PC) - VLEIB $12, $1, F_4 - VLEIB $4, $1, F_4 - - // setup [r²,r] - VLVGG $1, RSAVE_0, R_0 - VLVGG $1, RSAVE_1, R_1 - VLVGG $1, RSAVE_2, R_2 - VLVGG $1, RSAVE_3, R_3 - VLVGG $1, RSAVE_4, R_4 - VPDI $0, R5_1, R5SAVE_1, R5_1 - VPDI $0, R5_2, R5SAVE_2, R5_2 - VPDI $0, R5_3, R5SAVE_3, R5_3 - VPDI $0, R5_4, R5SAVE_4, R5_4 - - MOVD $0, R3 - BR multiply - -skip: - VZERO H_0 - VZERO H_1 - VZERO H_2 - VZERO H_3 - VZERO H_4 - - CMPBEQ R3, $0, finish - -b1: - // 1 block remaining - SUB $1, R3 - VLL R3, (R2), T_0 - ADD $1, R3 - MOVBZ $1, R0 - CMPBEQ R3, $16, 2(PC) - VLVGB R3, R0, T_0 - VZERO T_1 - EXPAND(T_0, T_1, F_0, F_1, F_2, F_3, F_4) - CMPBNE R3, $16, 2(PC) - VLEIB $4, $1, F_4 - VLEIG $1, $1, R_0 - VZERO R_1 - VZERO R_2 - VZERO R_3 - VZERO R_4 - VZERO R5_1 - VZERO R5_2 - VZERO R5_3 - VZERO R5_4 - - // setup [r, 1] - VLVGG $0, RSAVE_0, R_0 - VLVGG $0, RSAVE_1, R_1 - VLVGG $0, RSAVE_2, R_2 - VLVGG $0, RSAVE_3, R_3 - VLVGG $0, RSAVE_4, R_4 - VPDI $0, R5SAVE_1, R5_1, R5_1 - VPDI $0, R5SAVE_2, R5_2, R5_2 - VPDI $0, R5SAVE_3, R5_3, R5_3 - VPDI $0, R5SAVE_4, R5_4, R5_4 - - MOVD $0, R3 - BR multiply - -TEXT ·hasVectorFacility(SB), NOSPLIT, $24-1 - MOVD $x-24(SP), R1 - XC $24, 0(R1), 0(R1) // clear the storage - MOVD $2, R0 // R0 is the number of double words stored -1 - WORD $0xB2B01000 // STFLE 0(R1) - XOR R0, R0 // reset the value of R0 - MOVBZ z-8(SP), R1 - AND $0x40, R1 - BEQ novector - -vectorinstalled: - // check if the vector instruction has been enabled - VLEIB $0, $0xF, V16 - VLGVB $0, V16, R1 - CMPBNE R1, $0xF, novector - MOVB $1, ret+0(FP) // have vx - RET - -novector: - MOVB $0, ret+0(FP) // no vx - RET -- cgit v1.2.3