diff --git a/TunSafe.vcxproj b/TunSafe.vcxproj
index a85d424..48c8261 100644
--- a/TunSafe.vcxproj
+++ b/TunSafe.vcxproj
@@ -54,7 +54,7 @@
-
+
@@ -178,6 +178,12 @@
+
+
+
+
+
+
@@ -186,9 +192,9 @@
-
+
-
+
@@ -212,8 +218,7 @@
-
-
+
@@ -221,7 +226,7 @@
-
+
NotUsing
NotUsing
@@ -229,7 +234,7 @@
NotUsing
NotUsing
-
+
NotUsing
NotUsing
NotUsing
@@ -255,34 +260,34 @@
-
+
true
true
-
+
true
true
-
+
true
true
-
+
Document
true
true
-
+
true
true
-
+
true
true
-
+
\ No newline at end of file
diff --git a/TunSafe.vcxproj.filters b/TunSafe.vcxproj.filters
index 01e1ff8..71677f1 100644
--- a/TunSafe.vcxproj.filters
+++ b/TunSafe.vcxproj.filters
@@ -14,6 +14,15 @@
{d31b1b9f-4a2e-42d4-a26c-7c3daa4ccbe3}
+
+ {45ab50f7-cde8-4d0b-9756-5bfa3b9a28db}
+
+
+ {6adfd763-0197-437b-b7f6-2ffdd1e8e508}
+
+
+ {1ca37c7b-e91e-4648-9584-7d0c73d8e416}
+
@@ -41,12 +50,6 @@
Source Files\Win32
-
- crypto
-
-
- crypto
-
Source Files
@@ -56,9 +59,6 @@
Source Files
-
- crypto
-
Source Files
@@ -95,6 +95,31 @@
Source Files\Win32
+
+
+ crypto\blake2s
+
+
+ crypto\blake2s
+
+
+ crypto\blake2s
+
+
+ crypto\blake2s
+
+
+ crypto\blake2s
+
+
+ crypto\blake2s
+
+
+ crypto\curve25519
+
+
+ crypto\chacha20poly1305
+
@@ -118,27 +143,12 @@
Source Files\Win32
-
- crypto
-
-
- crypto
-
-
- crypto
-
-
- crypto
-
Source Files
Source Files
-
- crypto
-
crypto\aesgcm
@@ -163,6 +173,18 @@
Source Files\Win32
+
+ Source Files
+
+
+ crypto\blake2s
+
+
+ crypto\curve25519
+
+
+ crypto\chacha20poly1305
+
@@ -174,23 +196,23 @@
-
- crypto
-
-
- crypto
-
-
- crypto
-
-
+
crypto\aesgcm
-
+
crypto\aesgcm
-
+
crypto\aesgcm
+
+ crypto\curve25519
+
+
+ crypto\chacha20poly1305
+
+
+ crypto\chacha20poly1305
+
\ No newline at end of file
diff --git a/build_freebsd.sh b/build_freebsd.sh
index 2f86855..51c1671 100755
--- a/build_freebsd.sh
+++ b/build_freebsd.sh
@@ -1,4 +1,5 @@
-g++7 -I . -O2 -DNDEBUG -static -mssse3 -o tunsafe benchmark.cpp tunsafe_cpu.cpp wireguard_config.cpp ip_to_peer_map.cpp tunsafe_threading.cpp \
-wireguard.cpp wireguard_proto.cpp ts.cpp util.cpp network_bsd.cpp network_bsd_common.cpp \
-crypto/blake2s.cpp crypto/blake2s_sse.cpp crypto/chacha20poly1305.cpp crypto/curve25519-donna.cpp \
-crypto/siphash.cpp crypto/chacha20_x64_gas.s crypto/poly1305_x64_gas.s ipzip2/ipzip2.cpp -lrt -pthread
+g++7 -I . -O2 -DNDEBUG -DWITH_NETWORK_BSD=1 -static -mssse3 -o tunsafe \
+tunsafe_amalgam.cpp \
+crypto/chacha20/chacha20-x64-linux.s \
+crypto/poly1305/poly1305-x64-linux.s \
+-lrt -pthread
diff --git a/build_linux.sh b/build_linux.sh
index 850dcdc..4e87ecc 100755
--- a/build_linux.sh
+++ b/build_linux.sh
@@ -1,9 +1,10 @@
#!/bin/sh
-clang++-6.0 -c -march=skylake-avx512 crypto/poly1305_x64_gas.s crypto/chacha20_x64_gas.s
-clang++-6.0 -I . -O3 -DNDEBUG -mssse3 -pthread -lrt -o tunsafe util.cpp wireguard_config.cpp wireguard.cpp ts.cpp ip_to_peer_map.cpp tunsafe_threading.cpp \
-wireguard_proto.cpp network_bsd.cpp network_bsd_common.cpp tunsafe_cpu.cpp benchmark.cpp crypto/blake2s.cpp crypto/blake2s_sse.cpp crypto/chacha20poly1305.cpp \
-crypto/curve25519-donna.cpp crypto/siphash.cpp chacha20_x64_gas.o crypto/aesgcm/aesni_gcm_x64_gas.s \
-crypto/aesgcm/aesni_x64_gas.s crypto/aesgcm/aesgcm.cpp poly1305_x64_gas.o ipzip2/ipzip2.cpp \
-crypto/aesgcm/ghash_x64_gas.s
-
-
+set -e
+clang++-6.0 -c -march=skylake-avx512 crypto/poly1305/poly1305-x64-linux.s crypto/chacha20/chacha20-x64-linux.s
+clang++-6.0 -I . -O3 -DNDEBUG -DWITH_NETWORK_BSD=1 -mssse3 -pthread -lrt -o tunsafe \
+tunsafe_amalgam.cpp \
+crypto/aesgcm/aesni_gcm-x64-linux.s \
+crypto/aesgcm/aesni-x64-linux.s \
+crypto/aesgcm/ghash-x64-linux.s \
+chacha20-x64-linux.o \
+poly1305-x64-linux.o \
diff --git a/build_linux_rpi.sh b/build_linux_rpi.sh
index 96208a8..aae88c2 100755
--- a/build_linux_rpi.sh
+++ b/build_linux_rpi.sh
@@ -1,11 +1,10 @@
#!/bin/sh
set -e
+#cpp -D__ARM_ARCH__=7 crypto/chacha20/chacha20-arm.s > crypto/chacha20/chacha20-arm.preprocessed.s
+#cpp -D__ARM_ARCH__=7 crypto/poly1305/poly1305-arm.s > crypto/poly1305/poly1305-arm.preprocessed.s
-cpp -D__ARM_ARCH__=7 crypto/chacha20/chacha20-arm.s > crypto/chacha20/chacha20-arm.preprocessed.s
-cpp -D__ARM_ARCH__=7 crypto/poly1305/poly1305-arm.s > crypto/poly1305/poly1305-arm.preprocessed.s
-
-g++-6 -mfpu=neon -I . -g -O2 -DNDEBUG -fno-omit-frame-pointer -march=armv7-a -mthumb -std=c++11 -pthread -lrt -o tunsafe util.cpp wireguard_config.cpp wireguard.cpp ip_to_peer_map.cpp tunsafe_threading.cpp \
-wireguard_proto.cpp network_bsd.cpp network_bsd_common.cpp tunsafe_cpu.cpp benchmark.cpp crypto/blake2s.cpp crypto/chacha20poly1305.cpp \
-crypto/curve25519-donna.cpp crypto/siphash.cpp crypto/aesgcm/aesgcm.cpp ipzip2/ipzip2.cpp \
-crypto/chacha20/chacha20-arm.preprocessed.s crypto/poly1305/poly1305-arm.preprocessed.s
+g++-6 -mfpu=neon -I . -g -O2 -DNDEBUG -DWITH_NETWORK_BSD=1 -fno-omit-frame-pointer -march=armv7-a -mthumb -std=c++11 -pthread -lrt -o tunsafe \
+tunsafe_amalgam.cpp \
+crypto/chacha20/chacha20-arm-linux.S \
+crypto/poly1305/poly1305-arm-linux.S \
diff --git a/build_osx.sh b/build_osx.sh
index 3e905f9..202d83c 100755
--- a/build_osx.sh
+++ b/build_osx.sh
@@ -1,14 +1,14 @@
set -e
+clang++ -c -mavx512f -mavx512vl crypto/poly1305/poly1305-x64-osx.s crypto/chacha20/chacha20-x64-osx.s
-clang++ -c -mavx512f -mavx512vl crypto/poly1305_x64_gas_macosx.s crypto/chacha20_x64_gas_macosx.s
-
-clang++ -g -O3 -I . -std=c++11 -DNDEBUG=1 -Wno-deprecated-declarations -fno-exceptions -fno-rtti -ffunction-sections -o tunsafe \
-wireguard_config.cpp ip_to_peer_map.cpp tunsafe_threading.cpp wireguard.cpp wireguard_proto.cpp ts.cpp util.cpp network_bsd.cpp network_bsd_common.cpp benchmark.cpp tunsafe_cpu.cpp \
-crypto/blake2s.cpp crypto/blake2s_sse.cpp crypto/chacha20poly1305.cpp crypto/curve25519-donna.cpp \
-crypto/siphash.cpp crypto/aesgcm/aesgcm.cpp ipzip2/ipzip2.cpp \
-crypto/aesgcm/aesni_gcm_x64_gas_macosx.s crypto/aesgcm/aesni_x64_gas_macosx.s crypto/aesgcm/ghash_x64_gas_macosx.s \
-chacha20_x64_gas_macosx.o poly1305_x64_gas_macosx.o
+clang++ -g -O3 -I . -std=c++11 -DWITH_NETWORK_BSD=1 -DNDEBUG=1 -Wno-deprecated-declarations -fno-exceptions -fno-rtti -ffunction-sections -o tunsafe \
+tunsafe_amalgam.cpp \
+crypto/aesgcm/aesni_gcm-x64-osx.s \
+crypto/aesgcm/aesni-x64-osx.s \
+crypto/aesgcm/ghash-x64-osx.s \
+chacha20-x64-osx.o \
+poly1305-x64-osx.o
cp tunsafe tunsafe.unstripped
strip tunsafe
diff --git a/crypto/aesgcm/aesni_x64_gas.s b/crypto/aesgcm/aesni-x64-linux.s
similarity index 100%
rename from crypto/aesgcm/aesni_x64_gas.s
rename to crypto/aesgcm/aesni-x64-linux.s
diff --git a/crypto/aesgcm/aesni_x64_gas_macosx.s b/crypto/aesgcm/aesni-x64-osx.s
similarity index 100%
rename from crypto/aesgcm/aesni_x64_gas_macosx.s
rename to crypto/aesgcm/aesni-x64-osx.s
diff --git a/crypto/aesgcm/aesni_x64_nasm.asm b/crypto/aesgcm/aesni-x64-win.asm
similarity index 100%
rename from crypto/aesgcm/aesni_x64_nasm.asm
rename to crypto/aesgcm/aesni-x64-win.asm
diff --git a/crypto/aesgcm/aesni-x86_64.pl b/crypto/aesgcm/aesni-x64.pl
similarity index 99%
rename from crypto/aesgcm/aesni-x86_64.pl
rename to crypto/aesgcm/aesni-x64.pl
index 252c485..20aa364 100644
--- a/crypto/aesgcm/aesni-x86_64.pl
+++ b/crypto/aesgcm/aesni-x64.pl
@@ -203,7 +203,7 @@ $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
-( $xlate="${dir}../x86_64-xlate.pl" and -f $xlate) or
+( $xlate="${dir}../tools/x86_64-xlate.pl" and -f $xlate) or
die "can't locate x86_64-xlate.pl";
open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
diff --git a/crypto/aesgcm/aesni_gcm_x64_gas.s b/crypto/aesgcm/aesni_gcm-x64-linux.s
similarity index 100%
rename from crypto/aesgcm/aesni_gcm_x64_gas.s
rename to crypto/aesgcm/aesni_gcm-x64-linux.s
diff --git a/crypto/aesgcm/aesni_gcm_x64_gas_macosx.s b/crypto/aesgcm/aesni_gcm-x64-osx.s
similarity index 100%
rename from crypto/aesgcm/aesni_gcm_x64_gas_macosx.s
rename to crypto/aesgcm/aesni_gcm-x64-osx.s
diff --git a/crypto/aesgcm/aesni_gcm_x64_nasm.asm b/crypto/aesgcm/aesni_gcm-x64-win.asm
similarity index 100%
rename from crypto/aesgcm/aesni_gcm_x64_nasm.asm
rename to crypto/aesgcm/aesni_gcm-x64-win.asm
diff --git a/crypto/aesgcm/aesni-gcm-x86_64.pl b/crypto/aesgcm/aesni_gcm-x64.pl
similarity index 99%
rename from crypto/aesgcm/aesni-gcm-x86_64.pl
rename to crypto/aesgcm/aesni_gcm-x64.pl
index f1607c7..52fe5c4 100644
--- a/crypto/aesgcm/aesni-gcm-x86_64.pl
+++ b/crypto/aesgcm/aesni_gcm-x64.pl
@@ -50,7 +50,7 @@ $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
-( $xlate="${dir}../x86_64-xlate.pl" and -f $xlate) or
+( $xlate="${dir}../tools/x86_64-xlate.pl" and -f $xlate) or
die "can't locate x86_64-xlate.pl";
# |$avx| in ghash-x86_64.pl must be set to at least 1; otherwise tags will
diff --git a/crypto/aesgcm/ghash_x64_gas.s b/crypto/aesgcm/ghash-x64-linux.s
similarity index 100%
rename from crypto/aesgcm/ghash_x64_gas.s
rename to crypto/aesgcm/ghash-x64-linux.s
diff --git a/crypto/aesgcm/ghash_x64_gas_macosx.s b/crypto/aesgcm/ghash-x64-osx.s
similarity index 100%
rename from crypto/aesgcm/ghash_x64_gas_macosx.s
rename to crypto/aesgcm/ghash-x64-osx.s
diff --git a/crypto/aesgcm/ghash_x64_nasm.asm b/crypto/aesgcm/ghash-x64-win.asm
similarity index 100%
rename from crypto/aesgcm/ghash_x64_nasm.asm
rename to crypto/aesgcm/ghash-x64-win.asm
diff --git a/crypto/aesgcm/ghash-x86_64.pl b/crypto/aesgcm/ghash-x64.pl
similarity index 99%
rename from crypto/aesgcm/ghash-x86_64.pl
rename to crypto/aesgcm/ghash-x64.pl
index ad94168..0eb80be 100644
--- a/crypto/aesgcm/ghash-x86_64.pl
+++ b/crypto/aesgcm/ghash-x64.pl
@@ -99,7 +99,7 @@ $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
-( $xlate="${dir}../x86_64-xlate.pl" and -f $xlate) or
+( $xlate="${dir}../tools/x86_64-xlate.pl" and -f $xlate) or
die "can't locate x86_64-xlate.pl";
# See the notes about |$avx| in aesni-gcm-x86_64.pl; otherwise tags will be
diff --git a/crypto/aesgcm/make.sh b/crypto/aesgcm/make.sh
new file mode 100644
index 0000000..0de76ca
--- /dev/null
+++ b/crypto/aesgcm/make.sh
@@ -0,0 +1,10 @@
+#!/bin/sh
+
+perl aesni_gcm-x64.pl macosx > aesni_gcm-x64-osx.s
+perl aesni-x64.pl macosx > aesni-x64-osx.s
+perl ghash-x64.pl macosx > ghash-x64-osx.s
+
+perl aesni_gcm-x64.pl gas > aesni_gcm-x64-linux.s
+perl aesni-x64.pl gas > aesni-x64-linux.s
+perl ghash-x64.pl gas > ghash-x64-linux.s
+
diff --git a/crypto/blake2s-load-sse2.h b/crypto/blake2s/blake2s-load-sse2.h
similarity index 100%
rename from crypto/blake2s-load-sse2.h
rename to crypto/blake2s/blake2s-load-sse2.h
diff --git a/crypto/blake2s-load-sse41.h b/crypto/blake2s/blake2s-load-sse41.h
similarity index 100%
rename from crypto/blake2s-load-sse41.h
rename to crypto/blake2s/blake2s-load-sse41.h
diff --git a/crypto/blake2s-load-xop.h b/crypto/blake2s/blake2s-load-xop.h
similarity index 100%
rename from crypto/blake2s-load-xop.h
rename to crypto/blake2s/blake2s-load-xop.h
diff --git a/crypto/blake2s-round.h b/crypto/blake2s/blake2s-round.h
similarity index 83%
rename from crypto/blake2s-round.h
rename to crypto/blake2s/blake2s-round.h
index 44a5574..b9c52c1 100644
--- a/crypto/blake2s-round.h
+++ b/crypto/blake2s/blake2s-round.h
@@ -39,7 +39,7 @@
#endif
-#define G1(row1,row2,row3,row4,buf) \
+#define BLAKE2S_G1(row1,row2,row3,row4,buf) \
row1 = _mm_add_epi32( _mm_add_epi32( row1, buf), row2 ); \
row4 = _mm_xor_si128( row4, row1 ); \
row4 = _mm_roti_epi32(row4, -16); \
@@ -47,7 +47,7 @@
row2 = _mm_xor_si128( row2, row3 ); \
row2 = _mm_roti_epi32(row2, -12);
-#define G2(row1,row2,row3,row4,buf) \
+#define BLAKE2S_G2(row1,row2,row3,row4,buf) \
row1 = _mm_add_epi32( _mm_add_epi32( row1, buf), row2 ); \
row4 = _mm_xor_si128( row4, row1 ); \
row4 = _mm_roti_epi32(row4, -8); \
@@ -55,12 +55,12 @@
row2 = _mm_xor_si128( row2, row3 ); \
row2 = _mm_roti_epi32(row2, -7);
-#define DIAGONALIZE(row1,row2,row3,row4) \
+#define BLAKE2S_DIAGONALIZE(row1,row2,row3,row4) \
row4 = _mm_shuffle_epi32( row4, _MM_SHUFFLE(2,1,0,3) ); \
row3 = _mm_shuffle_epi32( row3, _MM_SHUFFLE(1,0,3,2) ); \
row2 = _mm_shuffle_epi32( row2, _MM_SHUFFLE(0,3,2,1) );
-#define UNDIAGONALIZE(row1,row2,row3,row4) \
+#define BLAKE2S_UNDIAGONALIZE(row1,row2,row3,row4) \
row4 = _mm_shuffle_epi32( row4, _MM_SHUFFLE(0,3,2,1) ); \
row3 = _mm_shuffle_epi32( row3, _MM_SHUFFLE(1,0,3,2) ); \
row2 = _mm_shuffle_epi32( row2, _MM_SHUFFLE(2,1,0,3) );
@@ -73,16 +73,16 @@
#include "blake2s-load-sse2.h"
#endif
-#define ROUND(r) \
+#define BLAKE2S_ROUND_SSE(r) \
LOAD_MSG_ ##r ##_1(buf1); \
- G1(row1,row2,row3,row4,buf1); \
+ BLAKE2S_G1(row1,row2,row3,row4,buf1); \
LOAD_MSG_ ##r ##_2(buf2); \
- G2(row1,row2,row3,row4,buf2); \
- DIAGONALIZE(row1,row2,row3,row4); \
+ BLAKE2S_G2(row1,row2,row3,row4,buf2); \
+ BLAKE2S_DIAGONALIZE(row1,row2,row3,row4); \
LOAD_MSG_ ##r ##_3(buf3); \
- G1(row1,row2,row3,row4,buf3); \
+ BLAKE2S_G1(row1,row2,row3,row4,buf3); \
LOAD_MSG_ ##r ##_4(buf4); \
- G2(row1,row2,row3,row4,buf4); \
- UNDIAGONALIZE(row1,row2,row3,row4); \
+ BLAKE2S_G2(row1,row2,row3,row4,buf4); \
+ BLAKE2S_UNDIAGONALIZE(row1,row2,row3,row4); \
#endif
diff --git a/crypto/blake2s/blake2s-sse-impl.h b/crypto/blake2s/blake2s-sse-impl.h
new file mode 100644
index 0000000..a1cb512
--- /dev/null
+++ b/crypto/blake2s/blake2s-sse-impl.h
@@ -0,0 +1,85 @@
+/*
+ BLAKE2 reference source code package - optimized C implementations
+
+ Copyright 2012, Samuel Neves . You may use this under the
+ terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
+ your option. The terms of these licenses can be found at:
+
+ - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
+ - OpenSSL license : https://www.openssl.org/source/license.html
+ - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
+
+ More information about the BLAKE2 hash function can be found at
+ https://blake2.net.
+*/
+
+#include
+#if defined(HAVE_SSSE3)
+#include
+#endif
+#if defined(HAVE_SSE41)
+#include
+#endif
+#if defined(HAVE_AVX)
+#include
+#endif
+#if defined(HAVE_XOP)
+#include
+#endif
+
+#include "blake2s-round.h"
+
+void blake2s_compress_sse( blake2s_state *S, const uint8_t block[BLAKE2S_BLOCKBYTES] ) {
+ __m128i row1, row2, row3, row4;
+ __m128i buf1, buf2, buf3, buf4;
+#if defined(HAVE_SSE41)
+ __m128i t0, t1;
+#if !defined(HAVE_XOP)
+ __m128i t2;
+#endif
+#endif
+ __m128i ff0, ff1;
+#if defined(HAVE_SSSE3) && !defined(HAVE_XOP)
+ const __m128i r8 = _mm_set_epi8( 12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1 );
+ const __m128i r16 = _mm_set_epi8( 13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2 );
+#endif
+#if defined(HAVE_SSE41)
+ const __m128i m0 = LOADU( block + 00 );
+ const __m128i m1 = LOADU( block + 16 );
+ const __m128i m2 = LOADU( block + 32 );
+ const __m128i m3 = LOADU( block + 48 );
+#else
+ const uint32_t m0 = load32(block + 0 * sizeof(uint32_t));
+ const uint32_t m1 = load32(block + 1 * sizeof(uint32_t));
+ const uint32_t m2 = load32(block + 2 * sizeof(uint32_t));
+ const uint32_t m3 = load32(block + 3 * sizeof(uint32_t));
+ const uint32_t m4 = load32(block + 4 * sizeof(uint32_t));
+ const uint32_t m5 = load32(block + 5 * sizeof(uint32_t));
+ const uint32_t m6 = load32(block + 6 * sizeof(uint32_t));
+ const uint32_t m7 = load32(block + 7 * sizeof(uint32_t));
+ const uint32_t m8 = load32(block + 8 * sizeof(uint32_t));
+ const uint32_t m9 = load32(block + 9 * sizeof(uint32_t));
+ const uint32_t m10 = load32(block + 10 * sizeof(uint32_t));
+ const uint32_t m11 = load32(block + 11 * sizeof(uint32_t));
+ const uint32_t m12 = load32(block + 12 * sizeof(uint32_t));
+ const uint32_t m13 = load32(block + 13 * sizeof(uint32_t));
+ const uint32_t m14 = load32(block + 14 * sizeof(uint32_t));
+ const uint32_t m15 = load32(block + 15 * sizeof(uint32_t));
+#endif
+ row1 = ff0 = LOADU( &S->h[0] );
+ row2 = ff1 = LOADU( &S->h[4] );
+ row3 = _mm_loadu_si128( (__m128i const *)&blake2s_IV[0] );
+ row4 = _mm_xor_si128( _mm_loadu_si128( (__m128i const *)&blake2s_IV[4] ), LOADU( &S->t[0] ) );
+ BLAKE2S_ROUND_SSE( 0 );
+ BLAKE2S_ROUND_SSE( 1 );
+ BLAKE2S_ROUND_SSE( 2 );
+ BLAKE2S_ROUND_SSE( 3 );
+ BLAKE2S_ROUND_SSE( 4 );
+ BLAKE2S_ROUND_SSE( 5 );
+ BLAKE2S_ROUND_SSE( 6 );
+ BLAKE2S_ROUND_SSE( 7 );
+ BLAKE2S_ROUND_SSE( 8 );
+ BLAKE2S_ROUND_SSE( 9 );
+ STOREU( &S->h[0], _mm_xor_si128( ff0, _mm_xor_si128( row1, row3 ) ) );
+ STOREU( &S->h[4], _mm_xor_si128( ff1, _mm_xor_si128( row2, row4 ) ) );
+}
diff --git a/crypto/blake2s.cpp b/crypto/blake2s/blake2s.cpp
old mode 100755
new mode 100644
similarity index 99%
rename from crypto/blake2s.cpp
rename to crypto/blake2s/blake2s.cpp
index 540407d..840720e
--- a/crypto/blake2s.cpp
+++ b/crypto/blake2s/blake2s.cpp
@@ -26,8 +26,6 @@ https://blake2.net.
#define BLAKE2S_WITH_ASM 1
#endif // BLAKE2S_WITH_ASM
-void blake2s_compress_sse(blake2s_state *S, const uint8_t block[BLAKE2S_BLOCKBYTES]);
-
#if !defined(__cplusplus) && (!defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L)
#if defined(_MSC_VER)
#define BLAKE2_INLINE __inline
@@ -246,6 +244,12 @@ static void blake2s_compress(blake2s_state *S, const uint8_t in[BLAKE2S_BLOCKBYT
#undef G
#undef ROUND
+
+#if defined(ARCH_CPU_X86_FAMILY)
+#include "blake2s-sse-impl.h"
+#endif
+
+
static inline void blake2s_compress_impl(blake2s_state *S, const uint8_t block[BLAKE2S_BLOCKBYTES]) {
#if defined(ARCH_CPU_X86_64) && BLAKE2S_WITH_ASM
blake2s_compress_sse(S, block);
diff --git a/crypto/blake2s.h b/crypto/blake2s/blake2s.h
similarity index 100%
rename from crypto/blake2s.h
rename to crypto/blake2s/blake2s.h
diff --git a/crypto/blake2s_sse.cpp b/crypto/blake2s_sse.cpp
deleted file mode 100755
index 2527f24..0000000
--- a/crypto/blake2s_sse.cpp
+++ /dev/null
@@ -1,399 +0,0 @@
-/*
- BLAKE2 reference source code package - optimized C implementations
-
- Copyright 2012, Samuel Neves . You may use this under the
- terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
- your option. The terms of these licenses can be found at:
-
- - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
- - OpenSSL license : https://www.openssl.org/source/license.html
- - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0
-
- More information about the BLAKE2 hash function can be found at
- https://blake2.net.
-*/
-#include "stdafx.h"
-#include
-#include
-#include
-
-#include "blake2s.h"
-#include "crypto_ops.h"
-
-#include
-#if defined(HAVE_SSSE3)
-#include
-#endif
-#if defined(HAVE_SSE41)
-#include
-#endif
-#if defined(HAVE_AVX)
-#include
-#endif
-#if defined(HAVE_XOP)
-#include
-#endif
-
-#include "blake2s-round.h"
-
-#if !defined(__cplusplus) && (!defined(__STDC_VERSION__) || __STDC_VERSION__ < 199901L)
-#if defined(_MSC_VER)
-#define BLAKE2_INLINE __inline
-#elif defined(__GNUC__)
-#define BLAKE2_INLINE __inline__
-#else
-#define BLAKE2_INLINE
-#endif
-#else
-#define BLAKE2_INLINE inline
-#endif
-
-static BLAKE2_INLINE uint32_t load32(const void *src) {
-#if defined(ARCH_CPU_LITTLE_ENDIAN)
- uint32_t w;
- memcpy(&w, src, sizeof w);
- return w;
-#else
- const uint8_t *p = (const uint8_t *)src;
- return ((uint32_t)(p[0]) << 0) |
- ((uint32_t)(p[1]) << 8) |
- ((uint32_t)(p[2]) << 16) |
- ((uint32_t)(p[3]) << 24);
-#endif
-}
-
-static BLAKE2_INLINE void store32(void *dst, uint32_t w) {
-#if defined(ARCH_CPU_LITTLE_ENDIAN)
- memcpy(dst, &w, sizeof w);
-#else
- uint8_t *p = (uint8_t *)dst;
- p[0] = (uint8_t)(w >> 0);
- p[1] = (uint8_t)(w >> 8);
- p[2] = (uint8_t)(w >> 16);
- p[3] = (uint8_t)(w >> 24);
-#endif
-}
-
-
-
-static const uint32_t blake2s_IV[8] =
-{
- 0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL,
- 0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL
-};
-
-/* Some helper functions */
-static void blake2s_set_lastnode( blake2s_state *S )
-{
- S->f[1] = (uint32_t)-1;
-}
-
-static int blake2s_is_lastblock( const blake2s_state *S )
-{
- return S->f[0] != 0;
-}
-
-static void blake2s_set_lastblock( blake2s_state *S )
-{
- if( S->last_node ) blake2s_set_lastnode( S );
-
- S->f[0] = (uint32_t)-1;
-}
-
-static void blake2s_increment_counter( blake2s_state *S, const uint32_t inc )
-{
- uint64_t t = ( ( uint64_t )S->t[1] << 32 ) | S->t[0];
- t += inc;
- S->t[0] = ( uint32_t )( t >> 0 );
- S->t[1] = ( uint32_t )( t >> 32 );
-}
-
-/* init2 xors IV with input parameter block */
-#if 0
-void blake2s_init_param( blake2s_state *S, const blake2s_param *P )
-{
- size_t i;
- /*blake2s_init0( S ); */
- const uint8_t * v = ( const uint8_t * )( blake2s_IV );
- const uint8_t * p = ( const uint8_t * )( P );
- uint8_t * h = ( uint8_t * )( S->h );
- /* IV XOR ParamBlock */
- memset( S, 0, sizeof( blake2s_state ) );
-
- for( i = 0; i < BLAKE2S_OUTBYTES; ++i ) h[i] = v[i] ^ p[i];
-
- S->outlen = P->digest_length;
-}
-
-/* Some sort of default parameter block initialization, for sequential blake2s */
-void blake2s_init( blake2s_state *S, size_t outlen )
-{
- blake2s_param P[1];
- assert(outlen && outlen <= BLAKE2S_OUTBYTES);
-
- P->digest_length = (uint8_t)outlen;
- P->key_length = 0;
- P->fanout = 1;
- P->depth = 1;
- store32( &P->leaf_length, 0 );
- store32( &P->node_offset, 0 );
- store16( &P->xof_length, 0 );
- P->node_depth = 0;
- P->inner_length = 0;
- /* memset(P->reserved, 0, sizeof(P->reserved) ); */
- memset( P->salt, 0, sizeof( P->salt ) );
- memset( P->personal, 0, sizeof( P->personal ) );
-
- blake2s_init_param( S, P );
-}
-
-int blake2s_init_key( blake2s_state *S, size_t outlen, const void *key, size_t keylen )
-{
- blake2s_param P[1];
-
- /* Move interval verification here? */
- if ( ( !outlen ) || ( outlen > BLAKE2S_OUTBYTES ) ) return -1;
-
- if ( ( !key ) || ( !keylen ) || keylen > BLAKE2S_KEYBYTES ) return -1;
-
- P->digest_length = (uint8_t)outlen;
- P->key_length = (uint8_t)keylen;
- P->fanout = 1;
- P->depth = 1;
- store32( &P->leaf_length, 0 );
- store32( &P->node_offset, 0 );
- store16( &P->xof_length, 0 );
- P->node_depth = 0;
- P->inner_length = 0;
- /* memset(P->reserved, 0, sizeof(P->reserved) ); */
- memset( P->salt, 0, sizeof( P->salt ) );
- memset( P->personal, 0, sizeof( P->personal ) );
-
- if( blake2s_init_param( S, P ) < 0 )
- return -1;
-
- {
- uint8_t block[BLAKE2S_BLOCKBYTES];
- memset( block, 0, BLAKE2S_BLOCKBYTES );
- memcpy( block, key, keylen );
- blake2s_update( S, block, BLAKE2S_BLOCKBYTES );
- memzero_crypto( block, BLAKE2S_BLOCKBYTES ); /* Burn the key from stack */
- }
- return 0;
-}
-#endif
-
-
-void blake2s_compress_sse( blake2s_state *S, const uint8_t block[BLAKE2S_BLOCKBYTES] )
-{
- __m128i row1, row2, row3, row4;
- __m128i buf1, buf2, buf3, buf4;
-#if defined(HAVE_SSE41)
- __m128i t0, t1;
-#if !defined(HAVE_XOP)
- __m128i t2;
-#endif
-#endif
- __m128i ff0, ff1;
-#if defined(HAVE_SSSE3) && !defined(HAVE_XOP)
- const __m128i r8 = _mm_set_epi8( 12, 15, 14, 13, 8, 11, 10, 9, 4, 7, 6, 5, 0, 3, 2, 1 );
- const __m128i r16 = _mm_set_epi8( 13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2 );
-#endif
-#if defined(HAVE_SSE41)
- const __m128i m0 = LOADU( block + 00 );
- const __m128i m1 = LOADU( block + 16 );
- const __m128i m2 = LOADU( block + 32 );
- const __m128i m3 = LOADU( block + 48 );
-#else
- const uint32_t m0 = load32(block + 0 * sizeof(uint32_t));
- const uint32_t m1 = load32(block + 1 * sizeof(uint32_t));
- const uint32_t m2 = load32(block + 2 * sizeof(uint32_t));
- const uint32_t m3 = load32(block + 3 * sizeof(uint32_t));
- const uint32_t m4 = load32(block + 4 * sizeof(uint32_t));
- const uint32_t m5 = load32(block + 5 * sizeof(uint32_t));
- const uint32_t m6 = load32(block + 6 * sizeof(uint32_t));
- const uint32_t m7 = load32(block + 7 * sizeof(uint32_t));
- const uint32_t m8 = load32(block + 8 * sizeof(uint32_t));
- const uint32_t m9 = load32(block + 9 * sizeof(uint32_t));
- const uint32_t m10 = load32(block + 10 * sizeof(uint32_t));
- const uint32_t m11 = load32(block + 11 * sizeof(uint32_t));
- const uint32_t m12 = load32(block + 12 * sizeof(uint32_t));
- const uint32_t m13 = load32(block + 13 * sizeof(uint32_t));
- const uint32_t m14 = load32(block + 14 * sizeof(uint32_t));
- const uint32_t m15 = load32(block + 15 * sizeof(uint32_t));
-#endif
- row1 = ff0 = LOADU( &S->h[0] );
- row2 = ff1 = LOADU( &S->h[4] );
- row3 = _mm_loadu_si128( (__m128i const *)&blake2s_IV[0] );
- row4 = _mm_xor_si128( _mm_loadu_si128( (__m128i const *)&blake2s_IV[4] ), LOADU( &S->t[0] ) );
- ROUND( 0 );
- ROUND( 1 );
- ROUND( 2 );
- ROUND( 3 );
- ROUND( 4 );
- ROUND( 5 );
- ROUND( 6 );
- ROUND( 7 );
- ROUND( 8 );
- ROUND( 9 );
- STOREU( &S->h[0], _mm_xor_si128( ff0, _mm_xor_si128( row1, row3 ) ) );
- STOREU( &S->h[4], _mm_xor_si128( ff1, _mm_xor_si128( row2, row4 ) ) );
-}
-
-#if 0
-int blake2s_update( blake2s_state *S, const void *pin, size_t inlen )
-{
- const unsigned char * in = (const unsigned char *)pin;
- if( inlen > 0 )
- {
- size_t left = S->buflen;
- size_t fill = BLAKE2S_BLOCKBYTES - left;
- if( inlen > fill )
- {
- S->buflen = 0;
- memcpy( S->buf + left, in, fill ); /* Fill buffer */
- blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES );
- blake2s_compress( S, S->buf ); /* Compress */
- in += fill; inlen -= fill;
- while(inlen > BLAKE2S_BLOCKBYTES) {
- blake2s_increment_counter(S, BLAKE2S_BLOCKBYTES);
- blake2s_compress( S, in );
- in += BLAKE2S_BLOCKBYTES;
- inlen -= BLAKE2S_BLOCKBYTES;
- }
- }
- memcpy( S->buf + S->buflen, in, inlen );
- S->buflen += inlen;
- }
- return 0;
-}
-
-int blake2s_final( blake2s_state *S, void *out, size_t outlen )
-{
- uint8_t buffer[BLAKE2S_OUTBYTES] = {0};
- size_t i;
-
- if( out == NULL || outlen < S->outlen )
- return -1;
-
- if( blake2s_is_lastblock( S ) )
- return -1;
-
- blake2s_increment_counter( S, (uint32_t)S->buflen );
- blake2s_set_lastblock( S );
- memset( S->buf + S->buflen, 0, BLAKE2S_BLOCKBYTES - S->buflen ); /* Padding */
- blake2s_compress( S, S->buf );
-
- for( i = 0; i < 8; ++i ) /* Output full hash to temp buffer */
- store32( buffer + sizeof( S->h[i] ) * i, S->h[i] );
-
- memcpy( out, buffer, S->outlen );
- memzero_crypto( buffer, sizeof(buffer) );
- return 0;
-}
-
-/* inlen, at least, should be uint64_t. Others can be size_t. */
-int blake2s( void *out, size_t outlen, const void *in, size_t inlen, const void *key, size_t keylen )
-{
- blake2s_state S[1];
-
- /* Verify parameters */
- if ( NULL == in && inlen > 0 ) return -1;
-
- if ( NULL == out ) return -1;
-
- if ( NULL == key && keylen > 0) return -1;
-
- if( !outlen || outlen > BLAKE2S_OUTBYTES ) return -1;
-
- if( keylen > BLAKE2S_KEYBYTES ) return -1;
-
- if( keylen > 0 )
- {
- if( blake2s_init_key( S, outlen, key, keylen ) < 0 ) return -1;
- }
- else
- {
- if( blake2s_init( S, outlen ) < 0 ) return -1;
- }
-
- blake2s_update( S, ( const uint8_t * )in, inlen );
- blake2s_final( S, out, outlen );
- return 0;
-}
-#endif
-
-#if defined(SUPERCOP)
-int crypto_hash( unsigned char *out, unsigned char *in, unsigned long long inlen )
-{
- return blake2s( out, BLAKE2S_OUTBYTES, in, inlen, NULL, 0 );
-}
-#endif
-
-#if defined(BLAKE2S_SELFTEST)
-#include
-#include "blake2-kat.h"
-int main( void )
-{
- uint8_t key[BLAKE2S_KEYBYTES];
- uint8_t buf[BLAKE2_KAT_LENGTH];
- size_t i, step;
-
- for( i = 0; i < BLAKE2S_KEYBYTES; ++i )
- key[i] = ( uint8_t )i;
-
- for( i = 0; i < BLAKE2_KAT_LENGTH; ++i )
- buf[i] = ( uint8_t )i;
-
- /* Test simple API */
- for( i = 0; i < BLAKE2_KAT_LENGTH; ++i )
- {
- uint8_t hash[BLAKE2S_OUTBYTES];
- blake2s( hash, BLAKE2S_OUTBYTES, buf, i, key, BLAKE2S_KEYBYTES );
-
- if( 0 != memcmp( hash, blake2s_keyed_kat[i], BLAKE2S_OUTBYTES ) )
- {
- goto fail;
- }
- }
-
- /* Test streaming API */
- for(step = 1; step < BLAKE2S_BLOCKBYTES; ++step) {
- for (i = 0; i < BLAKE2_KAT_LENGTH; ++i) {
- uint8_t hash[BLAKE2S_OUTBYTES];
- blake2s_state S;
- uint8_t * p = buf;
- size_t mlen = i;
- int err = 0;
-
- if( (err = blake2s_init_key(&S, BLAKE2S_OUTBYTES, key, BLAKE2S_KEYBYTES)) < 0 ) {
- goto fail;
- }
-
- while (mlen >= step) {
- if ( (err = blake2s_update(&S, p, step)) < 0 ) {
- goto fail;
- }
- mlen -= step;
- p += step;
- }
- if ( (err = blake2s_update(&S, p, mlen)) < 0) {
- goto fail;
- }
- if ( (err = blake2s_final(&S, hash, BLAKE2S_OUTBYTES)) < 0) {
- goto fail;
- }
-
- if (0 != memcmp(hash, blake2s_keyed_kat[i], BLAKE2S_OUTBYTES)) {
- goto fail;
- }
- }
- }
-
- puts( "ok" );
- return 0;
-fail:
- puts("error");
- return -1;
-}
-#endif
diff --git a/crypto/chacha20/chacha20-arm.s b/crypto/chacha20/chacha20-arm-linux.S
similarity index 99%
rename from crypto/chacha20/chacha20-arm.s
rename to crypto/chacha20/chacha20-arm-linux.S
index 2e22fd1..7721cea 100644
--- a/crypto/chacha20/chacha20-arm.s
+++ b/crypto/chacha20/chacha20-arm-linux.S
@@ -6,6 +6,8 @@
/*#include */
+#define __ARM_ARCH__ 7
+
.text
#if defined(__thumb2__) || defined(__clang__)
.syntax unified
diff --git a/crypto/chacha20/chacha20-arm.pl b/crypto/chacha20/chacha20-arm.pl
index cec1b89..d75e4bb 100644
--- a/crypto/chacha20/chacha20-arm.pl
+++ b/crypto/chacha20/chacha20-arm.pl
@@ -41,7 +41,7 @@ else { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} }
if ($flavour && $flavour ne "void") {
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}../arm-xlate.pl" and -f $xlate ) or
- ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
+ ( $xlate="${dir}../tools/arm-xlate.pl" and -f $xlate) or
die "can't locate arm-xlate.pl";
open STDOUT,"| \"$^X\" $xlate $flavour $output";
diff --git a/crypto/chacha20/chacha20-arm64.s b/crypto/chacha20/chacha20-arm64-ios.S
similarity index 96%
rename from crypto/chacha20/chacha20-arm64.s
rename to crypto/chacha20/chacha20-arm64-ios.S
index c3d1243..2e92a93 100644
--- a/crypto/chacha20/chacha20-arm64.s
+++ b/crypto/chacha20/chacha20-arm64-ios.S
@@ -1,26 +1,23 @@
-/* SPDX-License-Identifier: OpenSSL OR (BSD-3-Clause OR GPL-2.0)
- *
- * Copyright (C) 2015-2018 Jason A. Donenfeld . All Rights Reserved.
- * Copyright 2016 The OpenSSL Project Authors. All Rights Reserved.
- */
-
-#include
-
.text
+
.align 5
-.Lsigma:
+Lsigma:
.quad 0x3320646e61707865,0x6b20657479622d32 // endian-neutral
-.Lone:
+Lone:
.long 1,0,0,0
+.globl _chacha20_arm
+.globl _chacha20_neon
+
+
.align 5
-ENTRY(chacha20_arm)
- cbz x2,.Labort
-.Lshort:
+_chacha20_arm:
+ cbz x2,Labort
+Lshort:
stp x29,x30,[sp,#-96]!
add x29,sp,#0
- adr x5,.Lsigma
+ adr x5,Lsigma
stp x19,x20,[sp,#16]
stp x21,x22,[sp,#32]
stp x23,x24,[sp,#48]
@@ -41,7 +38,7 @@ ENTRY(chacha20_arm)
ror x30,x30,#32
#endif
-.Loop_outer:
+Loop_outer:
mov w5,w22 // unpack key block
lsr x6,x22,#32
mov w7,w23
@@ -61,7 +58,7 @@ ENTRY(chacha20_arm)
mov x4,#10
subs x2,x2,#64
-.Loop:
+Loop:
sub x4,x4,#1
add w5,w5,w9
add w6,w6,w10
@@ -159,7 +156,7 @@ ENTRY(chacha20_arm)
ror w11,w11,#25
ror w12,w12,#25
ror w9,w9,#25
- cbnz x4,.Loop
+ cbnz x4,Loop
add w5,w5,w22 // accumulate key block
add x6,x6,x22,lsr#32
@@ -178,7 +175,7 @@ ENTRY(chacha20_arm)
add w20,w20,w30
add x21,x21,x30,lsr#32
- b.lo .Ltail
+ b.lo Ltail
add x5,x5,x6,lsl#32 // pack
add x7,x7,x8,lsl#32
@@ -219,7 +216,7 @@ ENTRY(chacha20_arm)
stp x17,x20,[x0,#48]
add x0,x0,#64
- b.hi .Loop_outer
+ b.hi Loop_outer
ldp x19,x20,[x29,#16]
add sp,sp,#64
@@ -228,13 +225,13 @@ ENTRY(chacha20_arm)
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#96
-.Labort:
+Labort:
ret
.align 4
-.Ltail:
+Ltail:
add x2,x2,#64
-.Less_than_64:
+Less_than_64:
sub x0,x0,#1
add x1,x1,x2
add x0,x0,x2
@@ -264,13 +261,13 @@ ENTRY(chacha20_arm)
stp x13,x15,[sp,#32]
stp x17,x20,[sp,#48]
-.Loop_tail:
+Loop_tail:
ldrb w10,[x1,x2]
ldrb w11,[x4,x2]
add x2,x2,#1
eor w10,w10,w11
strb w10,[x0,x2]
- cbnz x2,.Loop_tail
+ cbnz x2,Loop_tail
stp xzr,xzr,[sp,#0]
stp xzr,xzr,[sp,#16]
@@ -285,25 +282,26 @@ ENTRY(chacha20_arm)
ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#96
ret
-ENDPROC(chacha20_arm)
+
+
.align 5
-ENTRY(chacha20_neon)
- cbz x2,.Labort_neon
+_chacha20_neon:
+ cbz x2,Labort_neon
cmp x2,#192
- b.lo .Lshort
+ b.lo Lshort
stp x29,x30,[sp,#-96]!
add x29,sp,#0
- adr x5,.Lsigma
+ adr x5,Lsigma
stp x19,x20,[sp,#16]
stp x21,x22,[sp,#32]
stp x23,x24,[sp,#48]
stp x25,x26,[sp,#64]
stp x27,x28,[sp,#80]
cmp x2,#512
- b.hs .L512_or_more_neon
+ b.hs L512_or_more_neon
sub sp,sp,#64
@@ -329,7 +327,7 @@ ENTRY(chacha20_neon)
add v29.4s,v28.4s,v31.4s
shl v31.4s,v31.4s,#2 // 1 -> 4
-.Loop_outer_neon:
+Loop_outer_neon:
mov w5,w22 // unpack key block
lsr x6,x22,#32
mov v0.16b,v24.16b
@@ -361,7 +359,7 @@ ENTRY(chacha20_neon)
mov x4,#10
subs x2,x2,#256
-.Loop_neon:
+Loop_neon:
sub x4,x4,#1
add v0.4s,v0.4s,v1.4s
add w5,w5,w9
@@ -567,7 +565,7 @@ ENTRY(chacha20_neon)
ext v1.16b,v1.16b,v1.16b,#12
ext v5.16b,v5.16b,v5.16b,#12
ext v17.16b,v17.16b,v17.16b,#12
- cbnz x4,.Loop_neon
+ cbnz x4,Loop_neon
add w5,w5,w22 // accumulate key block
add v0.4s,v0.4s,v24.4s
@@ -598,7 +596,7 @@ ENTRY(chacha20_neon)
add x21,x21,x30,lsr#32
add v17.4s,v17.4s,v25.4s
- b.lo .Ltail_neon
+ b.lo Ltail_neon
add x5,x5,x6,lsl#32 // pack
add x7,x7,x8,lsl#32
@@ -663,7 +661,7 @@ ENTRY(chacha20_neon)
eor v19.16b,v19.16b,v3.16b
st1 {v16.16b,v17.16b,v18.16b,v19.16b},[x0],#64
- b.hi .Loop_outer_neon
+ b.hi Loop_outer_neon
ldp x19,x20,[x29,#16]
add sp,sp,#64
@@ -674,10 +672,10 @@ ENTRY(chacha20_neon)
ldp x29,x30,[sp],#96
ret
-.Ltail_neon:
+Ltail_neon:
add x2,x2,#256
cmp x2,#64
- b.lo .Less_than_64
+ b.lo Less_than_64
add x5,x5,x6,lsl#32 // pack
add x7,x7,x8,lsl#32
@@ -717,10 +715,10 @@ ENTRY(chacha20_neon)
stp x13,x15,[x0,#32]
stp x17,x20,[x0,#48]
add x0,x0,#64
- b.eq .Ldone_neon
+ b.eq Ldone_neon
sub x2,x2,#64
cmp x2,#64
- b.lo .Less_than_128
+ b.lo Less_than_128
ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64
eor v0.16b,v0.16b,v20.16b
@@ -728,10 +726,10 @@ ENTRY(chacha20_neon)
eor v2.16b,v2.16b,v22.16b
eor v3.16b,v3.16b,v23.16b
st1 {v0.16b,v1.16b,v2.16b,v3.16b},[x0],#64
- b.eq .Ldone_neon
+ b.eq Ldone_neon
sub x2,x2,#64
cmp x2,#64
- b.lo .Less_than_192
+ b.lo Less_than_192
ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64
eor v4.16b,v4.16b,v20.16b
@@ -739,41 +737,41 @@ ENTRY(chacha20_neon)
eor v6.16b,v6.16b,v22.16b
eor v7.16b,v7.16b,v23.16b
st1 {v4.16b,v5.16b,v6.16b,v7.16b},[x0],#64
- b.eq .Ldone_neon
+ b.eq Ldone_neon
sub x2,x2,#64
st1 {v16.16b,v17.16b,v18.16b,v19.16b},[sp]
- b .Last_neon
+ b Last_neon
-.Less_than_128:
+Less_than_128:
st1 {v0.16b,v1.16b,v2.16b,v3.16b},[sp]
- b .Last_neon
-.Less_than_192:
+ b Last_neon
+Less_than_192:
st1 {v4.16b,v5.16b,v6.16b,v7.16b},[sp]
- b .Last_neon
+ b Last_neon
.align 4
-.Last_neon:
+Last_neon:
sub x0,x0,#1
add x1,x1,x2
add x0,x0,x2
add x4,sp,x2
neg x2,x2
-.Loop_tail_neon:
+Loop_tail_neon:
ldrb w10,[x1,x2]
ldrb w11,[x4,x2]
add x2,x2,#1
eor w10,w10,w11
strb w10,[x0,x2]
- cbnz x2,.Loop_tail_neon
+ cbnz x2,Loop_tail_neon
stp xzr,xzr,[sp,#0]
stp xzr,xzr,[sp,#16]
stp xzr,xzr,[sp,#32]
stp xzr,xzr,[sp,#48]
-.Ldone_neon:
+Ldone_neon:
ldp x19,x20,[x29,#16]
add sp,sp,#64
ldp x21,x22,[x29,#32]
@@ -781,9 +779,11 @@ ENTRY(chacha20_neon)
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#96
+Labort_neon:
ret
-.L512_or_more_neon:
+
+L512_or_more_neon:
sub sp,sp,#128+64
ldp x22,x23,[x5] // load sigma
@@ -819,7 +819,7 @@ ENTRY(chacha20_neon)
sub x2,x2,#512 // not typo
-.Loop_outer_512_neon:
+Loop_outer_512_neon:
mov v0.16b,v24.16b
mov v4.16b,v24.16b
mov v8.16b,v24.16b
@@ -865,7 +865,7 @@ ENTRY(chacha20_neon)
mov x4,#5
subs x2,x2,#512
-.Loop_upper_neon:
+Loop_upper_neon:
sub x4,x4,#1
add v0.4s,v0.4s,v1.4s
add w5,w5,w9
@@ -1275,7 +1275,7 @@ ENTRY(chacha20_neon)
ext v13.16b,v13.16b,v13.16b,#12
ext v17.16b,v17.16b,v17.16b,#12
ext v21.16b,v21.16b,v21.16b,#12
- cbnz x4,.Loop_upper_neon
+ cbnz x4,Loop_upper_neon
add w5,w5,w22 // accumulate key block
add x6,x6,x22,lsr#32
@@ -1350,7 +1350,7 @@ ENTRY(chacha20_neon)
lsr x21,x30,#32
mov x4,#5
-.Loop_lower_neon:
+Loop_lower_neon:
sub x4,x4,#1
add v0.4s,v0.4s,v1.4s
add w5,w5,w9
@@ -1760,7 +1760,7 @@ ENTRY(chacha20_neon)
ext v13.16b,v13.16b,v13.16b,#12
ext v17.16b,v17.16b,v17.16b,#12
ext v21.16b,v21.16b,v21.16b,#12
- cbnz x4,.Loop_lower_neon
+ cbnz x4,Loop_lower_neon
add w5,w5,w22 // accumulate key block
ldp q24,q25,[sp,#0]
@@ -1896,7 +1896,7 @@ ENTRY(chacha20_neon)
add v29.4s,v29.4s,v0.4s
add v30.4s,v30.4s,v0.4s
- b.hs .Loop_outer_512_neon
+ b.hs Loop_outer_512_neon
adds x2,x2,#512
ushr v0.4s,v31.4s,#2 // 4 -> 1
@@ -1910,14 +1910,14 @@ ENTRY(chacha20_neon)
stp q24,q31,[sp,#32]
stp q24,q31,[sp,#64]
- b.eq .Ldone_512_neon
+ b.eq Ldone_512_neon
cmp x2,#192
sub v27.4s,v27.4s,v0.4s // -= 1
sub v28.4s,v28.4s,v0.4s
sub v29.4s,v29.4s,v0.4s
add sp,sp,#128
- b.hs .Loop_outer_neon
+ b.hs Loop_outer_neon
eor v25.16b,v25.16b,v25.16b
eor v26.16b,v26.16b,v26.16b
@@ -1925,9 +1925,9 @@ ENTRY(chacha20_neon)
eor v28.16b,v28.16b,v28.16b
eor v29.16b,v29.16b,v29.16b
eor v30.16b,v30.16b,v30.16b
- b .Loop_outer
+ b Loop_outer
-.Ldone_512_neon:
+Ldone_512_neon:
ldp x19,x20,[x29,#16]
add sp,sp,#128+64
ldp x21,x22,[x29,#32]
@@ -1935,6 +1935,5 @@ ENTRY(chacha20_neon)
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#96
-.Labort_neon:
ret
-ENDPROC(chacha20_neon)
+
diff --git a/crypto/chacha20/chacha20-arm64.pl b/crypto/chacha20/chacha20-arm64.pl
index 4a838bc..96dc3b7 100644
--- a/crypto/chacha20/chacha20-arm64.pl
+++ b/crypto/chacha20/chacha20-arm64.pl
@@ -16,7 +16,7 @@
#
# June 2015
#
-# ChaCha20 for ARMv8.
+# chacha20 for ARMv8.
#
# Performance in cycles per byte out of large buffer.
#
@@ -40,7 +40,7 @@ $output=shift;
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
-( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
+( $xlate="${dir}../tools/arm-xlate.pl" and -f $xlate) or
die "can't locate arm-xlate.pl";
open OUT,"| \"$^X\" $xlate $flavour $output";
@@ -120,42 +120,21 @@ my ($a3,$b3,$c3,$d3)=map(($_&~3)+(($_+1)&3),($a2,$b2,$c2,$d2));
}
$code.=<<___;
-#include "arm_arch.h"
-
.text
-.extern OPENSSL_armcap_P
-
.align 5
.Lsigma:
.quad 0x3320646e61707865,0x6b20657479622d32 // endian-neutral
.Lone:
.long 1,0,0,0
-.LOPENSSL_armcap_P:
-#ifdef __ILP32__
-.long OPENSSL_armcap_P-.
-#else
-.quad OPENSSL_armcap_P-.
-#endif
-.asciz "ChaCha20 for ARMv8, CRYPTOGAMS by "
-.globl ChaCha20_ctr32
-.type ChaCha20_ctr32,%function
+.globl chacha20_arm
+.globl chacha20_neon
+
+.type chacha20_arm,%function
.align 5
-ChaCha20_ctr32:
+chacha20_arm:
cbz $len,.Labort
- adr @x[0],.LOPENSSL_armcap_P
- cmp $len,#192
- b.lo .Lshort
-#ifdef __ILP32__
- ldrsw @x[1],[@x[0]]
-#else
- ldr @x[1],[@x[0]]
-#endif
- ldr w17,[@x[1],@x[0]]
- tst w17,#ARMV7_NEON
- b.ne ChaCha20_neon
-
.Lshort:
stp x29,x30,[sp,#-96]!
add x29,sp,#0
@@ -333,7 +312,7 @@ $code.=<<___;
ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#96
ret
-.size ChaCha20_ctr32,.-ChaCha20_ctr32
+.size chacha20_arm,.-chacha20_arm
___
{{{
@@ -374,9 +353,13 @@ my ($a,$b,$c,$d,$t)=@_;
$code.=<<___;
-.type ChaCha20_neon,%function
+.type chacha20_neon,%function
.align 5
-ChaCha20_neon:
+chacha20_neon:
+ cbz x2,.Labort_neon
+ cmp x2,#192
+ b.lo .Lshort
+
stp x29,x30,[sp,#-96]!
add x29,sp,#0
@@ -684,8 +667,9 @@ $code.=<<___;
ldp x25,x26,[x29,#64]
ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#96
+.Labort_neon:
ret
-.size ChaCha20_neon,.-ChaCha20_neon
+.size chacha20_neon,.-chacha20_neon
___
{
my ($T0,$T1,$T2,$T3,$T4,$T5)=@K;
@@ -693,18 +677,6 @@ my ($A0,$B0,$C0,$D0,$A1,$B1,$C1,$D1,$A2,$B2,$C2,$D2,
$A3,$B3,$C3,$D3,$A4,$B4,$C4,$D4,$A5,$B5,$C5,$D5) = map("v$_.4s",(0..23));
$code.=<<___;
-.type ChaCha20_512_neon,%function
-.align 5
-ChaCha20_512_neon:
- stp x29,x30,[sp,#-96]!
- add x29,sp,#0
-
- adr @x[0],.Lsigma
- stp x19,x20,[sp,#16]
- stp x21,x22,[sp,#32]
- stp x23,x24,[sp,#48]
- stp x25,x26,[sp,#64]
- stp x27,x28,[sp,#80]
.L512_or_more_neon:
sub sp,sp,#128+64
@@ -1115,7 +1087,7 @@ $code.=<<___;
ldp x27,x28,[x29,#80]
ldp x29,x30,[sp],#96
ret
-.size ChaCha20_512_neon,.-ChaCha20_512_neon
+.size chacha20_512_neon,.-chacha20_512_neon
___
}
}}}
diff --git a/crypto/chacha20_x64_gas.s b/crypto/chacha20/chacha20-x64-linux.s
similarity index 100%
rename from crypto/chacha20_x64_gas.s
rename to crypto/chacha20/chacha20-x64-linux.s
diff --git a/crypto/chacha20_x64_gas_macosx.s b/crypto/chacha20/chacha20-x64-osx.s
similarity index 100%
rename from crypto/chacha20_x64_gas_macosx.s
rename to crypto/chacha20/chacha20-x64-osx.s
diff --git a/crypto/chacha20_x64.asm b/crypto/chacha20/chacha20-x64-win.asm
similarity index 100%
rename from crypto/chacha20_x64.asm
rename to crypto/chacha20/chacha20-x64-win.asm
diff --git a/crypto/make_chacha20_x64.pl b/crypto/chacha20/chacha20-x64.pl
similarity index 99%
rename from crypto/make_chacha20_x64.pl
rename to crypto/chacha20/chacha20-x64.pl
index f9379ca..95443f5 100644
--- a/crypto/make_chacha20_x64.pl
+++ b/crypto/chacha20/chacha20-x64.pl
@@ -67,7 +67,7 @@ $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
-( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
+( $xlate="${dir}../tools/x86_64-xlate.pl" and -f $xlate) or
die "can't locate x86_64-xlate.pl";
$avx = 3;
diff --git a/crypto/chacha20/make.sh b/crypto/chacha20/make.sh
new file mode 100644
index 0000000..6cfee45
--- /dev/null
+++ b/crypto/chacha20/make.sh
@@ -0,0 +1,4 @@
+#!/bin/sh
+perl chacha20-x64.pl gas > chacha20-x64-linux.s
+perl chacha20-x64.pl macosx > chacha20-x64-osx.s
+perl chacha20-arm64.pl ios > chacha20-arm64-ios.S
\ No newline at end of file
diff --git a/crypto/curve25519-donna.cpp b/crypto/curve25519/curve25519-donna.cpp
similarity index 99%
rename from crypto/curve25519-donna.cpp
rename to crypto/curve25519/curve25519-donna.cpp
index a8f5cbe..45003cf 100644
--- a/crypto/curve25519-donna.cpp
+++ b/crypto/curve25519/curve25519-donna.cpp
@@ -48,6 +48,7 @@
#include
#include
+#include "curve25519-donna.h"
#ifdef _MSC_VER
#define inline __inline
@@ -57,6 +58,8 @@ typedef uint8_t u8;
typedef int32_t s32;
typedef int64_t limb;
+const uint8 kCurve25519Basepoint[32] = {9};
+
/* Field element representation:
*
* Field elements are written as an array of signed, 64-bit limbs, least
diff --git a/crypto/curve25519-donna.h b/crypto/curve25519/curve25519-donna.h
similarity index 92%
rename from crypto/curve25519-donna.h
rename to crypto/curve25519/curve25519-donna.h
index 93380b3..f563e5d 100644
--- a/crypto/curve25519-donna.h
+++ b/crypto/curve25519/curve25519-donna.h
@@ -1,17 +1,19 @@
-#ifndef TUNSAFE_CRYPTO_CURVE25519_DONNA_H_
-#define TUNSAFE_CRYPTO_CURVE25519_DONNA_H_
-
-#include "tunsafe_types.h"
-
-void curve25519_donna_ref(uint8 *mypublic, const uint8 *secret, const uint8 *basepoint);
-extern "C" void curve25519_donna_x64(uint8 *mypublic, const uint8 *secret, const uint8 *basepoint);
-
-#if defined(ARCH_CPU_X86_64) && defined(COMPILER_MSVC)
-#define curve25519_donna curve25519_donna_x64
-#else
-#define curve25519_donna curve25519_donna_ref
-#endif
-
-void curve25519_normalize(uint8 *e);
-
+#ifndef TUNSAFE_CRYPTO_CURVE25519_DONNA_H_
+#define TUNSAFE_CRYPTO_CURVE25519_DONNA_H_
+
+#include "tunsafe_types.h"
+
+void curve25519_donna_ref(uint8 *mypublic, const uint8 *secret, const uint8 *basepoint);
+extern "C" void curve25519_donna_x64(uint8 *mypublic, const uint8 *secret, const uint8 *basepoint);
+
+#if defined(ARCH_CPU_X86_64) && defined(COMPILER_MSVC)
+#define curve25519_donna curve25519_donna_x64
+#else
+#define curve25519_donna curve25519_donna_ref
+#endif
+
+void curve25519_normalize(uint8 *e);
+
+extern const uint8 kCurve25519Basepoint[32];
+
#endif // TUNSAFE_CRYPTO_CURVE25519_DONNA_H_
\ No newline at end of file
diff --git a/crypto/curve25519_x64_nasm.asm b/crypto/curve25519/curve25519-x64-win.asm
similarity index 100%
rename from crypto/curve25519_x64_nasm.asm
rename to crypto/curve25519/curve25519-x64-win.asm
diff --git a/crypto/poly1305/make.sh b/crypto/poly1305/make.sh
new file mode 100644
index 0000000..07c27ac
--- /dev/null
+++ b/crypto/poly1305/make.sh
@@ -0,0 +1,6 @@
+#!/bin/sh
+perl poly1305-x64.pl gas > poly1305-x64-linux.s
+perl poly1305-x64.pl macosx > poly1305-x64-osx.s
+perl poly1305-arm64.pl ios > poly1305-arm64-ios.S
+
+
diff --git a/crypto/poly1305/poly1305-arm.s b/crypto/poly1305/poly1305-arm-linux.S
similarity index 99%
rename from crypto/poly1305/poly1305-arm.s
rename to crypto/poly1305/poly1305-arm-linux.S
index 1893360..2be4893 100644
--- a/crypto/poly1305/poly1305-arm.s
+++ b/crypto/poly1305/poly1305-arm-linux.S
@@ -4,7 +4,7 @@
* Copyright 2016 The OpenSSL Project Authors. All Rights Reserved.
*/
-//#include
+#define __ARM_ARCH__ 7
.text
#if defined(__thumb2__)
diff --git a/crypto/poly1305/poly1305-arm.pl b/crypto/poly1305/poly1305-arm.pl
index 5cdb6be..ab3a819 100644
--- a/crypto/poly1305/poly1305-arm.pl
+++ b/crypto/poly1305/poly1305-arm.pl
@@ -35,7 +35,7 @@ else { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} }
if ($flavour && $flavour ne "void") {
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
- ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
+ ( $xlate="${dir}../tools/arm-xlate.pl" and -f $xlate) or
die "can't locate arm-xlate.pl";
open STDOUT,"| \"$^X\" $xlate $flavour $output";
diff --git a/crypto/poly1305/poly1305-arm64.s b/crypto/poly1305/poly1305-arm64-ios.S
similarity index 92%
rename from crypto/poly1305/poly1305-arm64.s
rename to crypto/poly1305/poly1305-arm64-ios.S
index 911b57e..610613c 100644
--- a/crypto/poly1305/poly1305-arm64.s
+++ b/crypto/poly1305/poly1305-arm64-ios.S
@@ -1,20 +1,21 @@
-/* SPDX-License-Identifier: OpenSSL OR (BSD-3-Clause OR GPL-2.0)
- *
- * Copyright (C) 2015-2018 Jason A. Donenfeld . All Rights Reserved.
- * Copyright 2016 The OpenSSL Project Authors. All Rights Reserved.
- */
-
-#include
.text
+// forward "declarations" are required for Apple
+.globl _poly1305_blocks_arm
+.globl _poly1305_emit_arm
+.globl _poly1305_blocks_neon
+.globl _poly1305_emit_neon
+
+.globl _poly1305_init_arm
+
.align 5
-ENTRY(poly1305_init_arm)
+_poly1305_init_arm:
cmp x1,xzr
stp xzr,xzr,[x0] // zero hash value
stp xzr,xzr,[x0,#16] // [along with is_base2_26]
csel x0,xzr,x0,eq
- b.eq .Lno_key
+ b.eq Lno_key
ldp x7,x8,[x1] // load key
mov x9,#0xfffffffc0fffffff
@@ -28,23 +29,24 @@ ENTRY(poly1305_init_arm)
and x8,x8,x9 // &=0ffffffc0ffffffc
stp x7,x8,[x0,#32] // save key value
-.Lno_key:
+Lno_key:
ret
-ENDPROC(poly1305_init_arm)
+
+
.align 5
-ENTRY(poly1305_blocks_arm)
+_poly1305_blocks_arm:
ands x2,x2,#-16
- b.eq .Lno_data
+ b.eq Lno_data
ldp x4,x5,[x0] // load hash value
ldp x7,x8,[x0,#32] // load key value
ldr x6,[x0,#16]
add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2)
- b .Loop
+ b Loop
.align 5
-.Loop:
+Loop:
ldp x10,x11,[x1],#16 // load input
sub x2,x2,#16
#ifdef __ARMEB__
@@ -86,17 +88,18 @@ ENTRY(poly1305_blocks_arm)
adcs x5,x13,xzr
adc x6,x6,xzr
- cbnz x2,.Loop
+ cbnz x2,Loop
stp x4,x5,[x0] // store hash value
str x6,[x0,#16]
-.Lno_data:
+Lno_data:
ret
-ENDPROC(poly1305_blocks_arm)
+
+
.align 5
-ENTRY(poly1305_emit_arm)
+_poly1305_emit_arm:
ldp x4,x5,[x0] // load hash base 2^64
ldr x6,[x0,#16]
ldp x10,x11,[x2] // load nonce
@@ -123,10 +126,10 @@ ENTRY(poly1305_emit_arm)
stp x4,x5,[x1] // write result
ret
-ENDPROC(poly1305_emit_arm)
+
.align 5
-__poly1305_mult:
+poly1305_mult:
mul x12,x4,x7 // h0*r0
umulh x13,x4,x7
@@ -160,7 +163,10 @@ __poly1305_mult:
ret
-__poly1305_splat:
+
+
+.align 5
+poly1305_splat:
and x12,x4,#0x03ffffff // base 2^64 -> base 2^26
ubfx x13,x4,#26,#26
extr x14,x5,x4,#52
@@ -184,28 +190,30 @@ __poly1305_splat:
ret
+
+
.align 5
-ENTRY(poly1305_blocks_neon)
+_poly1305_blocks_neon:
ldr x17,[x0,#24]
cmp x2,#128
- b.hs .Lblocks_neon
- cbz x17,poly1305_blocks_arm
+ b.hs Lblocks_neon
+ cbz x17,_poly1305_blocks_arm
-.Lblocks_neon:
+Lblocks_neon:
stp x29,x30,[sp,#-80]!
add x29,sp,#0
ands x2,x2,#-16
- b.eq .Lno_data_neon
+ b.eq Lno_data_neon
- cbz x17,.Lbase2_64_neon
+ cbz x17,Lbase2_64_neon
ldp w10,w11,[x0] // load hash value base 2^26
ldp w12,w13,[x0,#8]
ldr w14,[x0,#16]
tst x2,#31
- b.eq .Leven_neon
+ b.eq Leven_neon
ldp x7,x8,[x0,#32] // load key value
@@ -237,10 +245,10 @@ ENTRY(poly1305_blocks_neon)
adcs x5,x5,x13
adc x6,x6,x3
- bl __poly1305_mult
+ bl poly1305_mult
ldr x30,[sp,#8]
- cbz x3,.Lstore_base2_64_neon
+ cbz x3,Lstore_base2_64_neon
and x10,x4,#0x03ffffff // base 2^64 -> base 2^26
ubfx x11,x4,#26,#26
@@ -249,28 +257,28 @@ ENTRY(poly1305_blocks_neon)
ubfx x13,x5,#14,#26
extr x14,x6,x5,#40
- cbnz x2,.Leven_neon
+ cbnz x2,Leven_neon
stp w10,w11,[x0] // store hash value base 2^26
stp w12,w13,[x0,#8]
str w14,[x0,#16]
- b .Lno_data_neon
+ b Lno_data_neon
.align 4
-.Lstore_base2_64_neon:
+Lstore_base2_64_neon:
stp x4,x5,[x0] // store hash value base 2^64
stp x6,xzr,[x0,#16] // note that is_base2_26 is zeroed
- b .Lno_data_neon
+ b Lno_data_neon
.align 4
-.Lbase2_64_neon:
+Lbase2_64_neon:
ldp x7,x8,[x0,#32] // load key value
ldp x4,x5,[x0] // load hash value base 2^64
ldr x6,[x0,#16]
tst x2,#31
- b.eq .Linit_neon
+ b.eq Linit_neon
ldp x12,x13,[x1],#16 // load input
sub x2,x2,#16
@@ -283,9 +291,9 @@ ENTRY(poly1305_blocks_neon)
adcs x5,x5,x13
adc x6,x6,x3
- bl __poly1305_mult
+ bl poly1305_mult
-.Linit_neon:
+Linit_neon:
and x10,x4,#0x03ffffff // base 2^64 -> base 2^26
ubfx x11,x4,#26,#26
extr x12,x5,x4,#52
@@ -310,35 +318,35 @@ ENTRY(poly1305_blocks_neon)
mov x5,x8
mov x6,xzr
add x0,x0,#48+12
- bl __poly1305_splat
+ bl poly1305_splat
- bl __poly1305_mult // r^2
+ bl poly1305_mult // r^2
sub x0,x0,#4
- bl __poly1305_splat
+ bl poly1305_splat
- bl __poly1305_mult // r^3
+ bl poly1305_mult // r^3
sub x0,x0,#4
- bl __poly1305_splat
+ bl poly1305_splat
- bl __poly1305_mult // r^4
+ bl poly1305_mult // r^4
sub x0,x0,#4
- bl __poly1305_splat
+ bl poly1305_splat
ldr x30,[sp,#8]
add x16,x1,#32
- adr x17,.Lzeros
+ adr x17,Lzeros
subs x2,x2,#64
csel x16,x17,x16,lo
mov x4,#1
str x4,[x0,#-24] // set is_base2_26
sub x0,x0,#48 // restore original x0
- b .Ldo_neon
+ b Ldo_neon
.align 4
-.Leven_neon:
+Leven_neon:
add x16,x1,#32
- adr x17,.Lzeros
+ adr x17,Lzeros
subs x2,x2,#64
csel x16,x17,x16,lo
@@ -353,7 +361,7 @@ ENTRY(poly1305_blocks_neon)
fmov d27,x13
fmov d28,x14
-.Ldo_neon:
+Ldo_neon:
ldp x8,x12,[x16],#16 // inp[2:3] (or zero)
ldp x9,x13,[x16],#48
@@ -427,10 +435,10 @@ ENTRY(poly1305_blocks_neon)
fmov d13,x12
ushr v31.2d,v31.2d,#38
- b.ls .Lskip_loop
+ b.ls Lskip_loop
.align 4
-.Loop_neon:
+Loop_neon:
////////////////////////////////////////////////////////////////
// ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2
// ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r
@@ -616,9 +624,9 @@ ENTRY(poly1305_blocks_neon)
add v25.2s,v25.2s,v29.2s // h0 -> h1
add v28.2s,v28.2s,v30.2s // h3 -> h4
- b.hi .Loop_neon
+ b.hi Loop_neon
-.Lskip_loop:
+Lskip_loop:
dup v16.2d,v16.d[0]
add v11.2s,v11.2s,v26.2s
@@ -626,7 +634,7 @@ ENTRY(poly1305_blocks_neon)
// multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1
adds x2,x2,#32
- b.ne .Long_tail
+ b.ne Long_tail
dup v16.2d,v11.d[0]
add v14.2s,v9.2s,v24.2s
@@ -634,7 +642,7 @@ ENTRY(poly1305_blocks_neon)
add v15.2s,v10.2s,v25.2s
add v18.2s,v13.2s,v28.2s
-.Long_tail:
+Long_tail:
dup v14.2d,v14.d[0]
umull2 v19.2d,v16.4s,v6.4s
umull2 v22.2d,v16.4s,v1.4s
@@ -669,7 +677,7 @@ ENTRY(poly1305_blocks_neon)
umlal2 v20.2d,v18.4s,v4.4s
umlal2 v21.2d,v18.4s,v6.4s
- b.eq .Lshort_tail
+ b.eq Lshort_tail
////////////////////////////////////////////////////////////////
// (hash+inp[0:1])*r^4:r^3 and accumulate
@@ -708,7 +716,7 @@ ENTRY(poly1305_blocks_neon)
umlal v20.2d,v13.2s,v4.2s
umlal v21.2d,v13.2s,v6.2s
-.Lshort_tail:
+Lshort_tail:
////////////////////////////////////////////////////////////////
// horizontal add
@@ -759,15 +767,16 @@ ENTRY(poly1305_blocks_neon)
st4 {v19.s,v20.s,v21.s,v22.s}[0],[x0],#16
st1 {v23.s}[0],[x0]
-.Lno_data_neon:
+Lno_data_neon:
ldr x29,[sp],#80
ret
-ENDPROC(poly1305_blocks_neon)
+
+
.align 5
-ENTRY(poly1305_emit_neon)
+_poly1305_emit_neon:
ldr x17,[x0,#24]
- cbz x17,poly1305_emit_arm
+ cbz x17,_poly1305_emit_arm
ldp w10,w11,[x0] // load hash value base 2^26
ldp w12,w13,[x0,#8]
@@ -813,8 +822,8 @@ ENTRY(poly1305_emit_neon)
stp x4,x5,[x1] // write result
ret
-ENDPROC(poly1305_emit_neon)
+
.align 5
-.Lzeros:
+Lzeros:
.long 0,0,0,0,0,0,0,0
diff --git a/crypto/poly1305/poly1305-arm64.pl b/crypto/poly1305/poly1305-arm64.pl
index ac06457..0ef3453 100644
--- a/crypto/poly1305/poly1305-arm64.pl
+++ b/crypto/poly1305/poly1305-arm64.pl
@@ -18,7 +18,7 @@
#
# June 2015
#
-# Numbers are cycles per processed byte with poly1305_blocks alone.
+# Numbers are cycles per processed byte with poly1305_blocks_arm alone.
#
# IALU/gcc-4.9 NEON
#
@@ -39,7 +39,7 @@ $output=shift;
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
-( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
+( $xlate="${dir}../tools/arm-xlate.pl" and -f $xlate) or
die "can't locate arm-xlate.pl";
open OUT,"| \"$^X\" $xlate $flavour $output";
@@ -51,19 +51,18 @@ my ($mac,$nonce)=($inp,$len);
my ($h0,$h1,$h2,$r0,$r1,$s1,$t0,$t1,$d0,$d1,$d2) = map("x$_",(4..14));
$code.=<<___;
-#include "arm_arch.h"
-
.text
// forward "declarations" are required for Apple
-.extern OPENSSL_armcap_P
-.globl poly1305_blocks
-.globl poly1305_emit
+.globl poly1305_blocks_arm
+.globl poly1305_emit_arm
+.globl poly1305_blocks_neon
+.globl poly1305_emit_neon
-.globl poly1305_init
-.type poly1305_init,%function
+.globl poly1305_init_arm
+.type poly1305_init_arm,%function
.align 5
-poly1305_init:
+poly1305_init_arm:
cmp $inp,xzr
stp xzr,xzr,[$ctx] // zero hash value
stp xzr,xzr,[$ctx,#16] // [along with is_base2_26]
@@ -71,17 +70,9 @@ poly1305_init:
csel x0,xzr,x0,eq
b.eq .Lno_key
-#ifdef __ILP32__
- ldrsw $t1,.LOPENSSL_armcap_P
-#else
- ldr $t1,.LOPENSSL_armcap_P
-#endif
- adr $t0,.LOPENSSL_armcap_P
-
ldp $r0,$r1,[$inp] // load key
mov $s1,#0xfffffffc0fffffff
movk $s1,#0x0fff,lsl#48
- ldr w17,[$t0,$t1]
#ifdef __ARMEB__
rev $r0,$r0 // flip bytes
rev $r1,$r1
@@ -91,30 +82,13 @@ poly1305_init:
and $r1,$r1,$s1 // &=0ffffffc0ffffffc
stp $r0,$r1,[$ctx,#32] // save key value
- tst w17,#ARMV7_NEON
-
- adr $d0,poly1305_blocks
- adr $r0,poly1305_blocks_neon
- adr $d1,poly1305_emit
- adr $r1,poly1305_emit_neon
-
- csel $d0,$d0,$r0,eq
- csel $d1,$d1,$r1,eq
-
-#ifdef __ILP32__
- stp w12,w13,[$len]
-#else
- stp $d0,$d1,[$len]
-#endif
-
- mov x0,#1
.Lno_key:
ret
-.size poly1305_init,.-poly1305_init
+.size poly1305_init_arm,.-poly1305_init_arm
-.type poly1305_blocks,%function
+.type poly1305_blocks_arm,%function
.align 5
-poly1305_blocks:
+poly1305_blocks_arm:
ands $len,$len,#-16
b.eq .Lno_data
@@ -174,11 +148,11 @@ poly1305_blocks:
.Lno_data:
ret
-.size poly1305_blocks,.-poly1305_blocks
+.size poly1305_blocks_arm,.-poly1305_blocks_arm
-.type poly1305_emit,%function
+.type poly1305_emit_arm,%function
.align 5
-poly1305_emit:
+poly1305_emit_arm:
ldp $h0,$h1,[$ctx] // load hash base 2^64
ldr $h2,[$ctx,#16]
ldp $t0,$t1,[$nonce] // load nonce
@@ -205,7 +179,7 @@ poly1305_emit:
stp $h0,$h1,[$mac] // write result
ret
-.size poly1305_emit,.-poly1305_emit
+.size poly1305_emit_arm,.-poly1305_emit_arm
___
my ($R0,$R1,$S1,$R2,$S2,$R3,$S3,$R4,$S4) = map("v$_.4s",(0..8));
my ($IN01_0,$IN01_1,$IN01_2,$IN01_3,$IN01_4) = map("v$_.2s",(9..13));
@@ -288,7 +262,7 @@ poly1305_blocks_neon:
ldr $is_base2_26,[$ctx,#24]
cmp $len,#128
b.hs .Lblocks_neon
- cbz $is_base2_26,poly1305_blocks
+ cbz $is_base2_26,poly1305_blocks_arm
.Lblocks_neon:
stp x29,x30,[sp,#-80]!
@@ -867,7 +841,7 @@ poly1305_blocks_neon:
.align 5
poly1305_emit_neon:
ldr $is_base2_26,[$ctx,#24]
- cbz $is_base2_26,poly1305_emit
+ cbz $is_base2_26,poly1305_emit_arm
ldp w10,w11,[$ctx] // load hash value base 2^26
ldp w12,w13,[$ctx,#8]
@@ -918,14 +892,6 @@ poly1305_emit_neon:
.align 5
.Lzeros:
.long 0,0,0,0,0,0,0,0
-.LOPENSSL_armcap_P:
-#ifdef __ILP32__
-.long OPENSSL_armcap_P-.
-#else
-.quad OPENSSL_armcap_P-.
-#endif
-.asciz "Poly1305 for ARMv8, CRYPTOGAMS by "
-.align 2
___
foreach (split("\n",$code)) {
diff --git a/crypto/poly1305_x64_gas.s b/crypto/poly1305/poly1305-x64-linux.s
old mode 100755
new mode 100644
similarity index 100%
rename from crypto/poly1305_x64_gas.s
rename to crypto/poly1305/poly1305-x64-linux.s
diff --git a/crypto/poly1305_x64_gas_macosx.s b/crypto/poly1305/poly1305-x64-osx.s
similarity index 100%
rename from crypto/poly1305_x64_gas_macosx.s
rename to crypto/poly1305/poly1305-x64-osx.s
diff --git a/crypto/poly1305_x64_nasm.asm b/crypto/poly1305/poly1305-x64-win.asm
similarity index 100%
rename from crypto/poly1305_x64_nasm.asm
rename to crypto/poly1305/poly1305-x64-win.asm
diff --git a/crypto/make_poly1305_x64.pl b/crypto/poly1305/poly1305-x64.pl
old mode 100755
new mode 100644
similarity index 99%
rename from crypto/make_poly1305_x64.pl
rename to crypto/poly1305/poly1305-x64.pl
index f7a2ab7..122008f
--- a/crypto/make_poly1305_x64.pl
+++ b/crypto/poly1305/poly1305-x64.pl
@@ -71,7 +71,7 @@ $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
-( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
+( $xlate="${dir}../tools/x86_64-xlate.pl" and -f $xlate) or
die "can't locate x86_64-xlate.pl";
$avx = 3;
diff --git a/crypto/make_poly1305_x86.pl b/crypto/poly1305/poly1305-x86.pl
similarity index 100%
rename from crypto/make_poly1305_x86.pl
rename to crypto/poly1305/poly1305-x86.pl
diff --git a/crypto/siphash.cpp b/crypto/siphash/siphash.cpp
similarity index 99%
rename from crypto/siphash.cpp
rename to crypto/siphash/siphash.cpp
index 4bd11ad..fddf5fd 100644
--- a/crypto/siphash.cpp
+++ b/crypto/siphash/siphash.cpp
@@ -11,7 +11,7 @@
*/
#include "stdafx.h"
-#include "crypto/siphash.h"
+#include "crypto/siphash/siphash.h"
#include "tunsafe_endian.h"
#define SIPROUND \
diff --git a/crypto/siphash.h b/crypto/siphash/siphash.h
similarity index 100%
rename from crypto/siphash.h
rename to crypto/siphash/siphash.h
diff --git a/crypto/tools/arm-xlate.pl b/crypto/tools/arm-xlate.pl
new file mode 100644
index 0000000..ca2f8b9
--- /dev/null
+++ b/crypto/tools/arm-xlate.pl
@@ -0,0 +1,177 @@
+#! /usr/bin/env perl
+# Copyright 2015-2016 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+use strict;
+
+my $flavour = shift;
+my $output = shift;
+open STDOUT,">$output" || die "can't open $output: $!";
+
+$flavour = "linux32" if (!$flavour or $flavour eq "void");
+
+my %GLOBALS;
+my $dotinlocallabels=($flavour=~/linux/)?1:0;
+
+################################################################
+# directives which need special treatment on different platforms
+################################################################
+my $arch = sub {
+ if ($flavour =~ /linux/) { ".arch\t".join(',',@_); }
+ else { ""; }
+};
+my $fpu = sub {
+ if ($flavour =~ /linux/) { ".fpu\t".join(',',@_); }
+ else { ""; }
+};
+my $hidden = sub {
+ if ($flavour =~ /ios/) { ".private_extern\t".join(',',@_); }
+ else { ".hidden\t".join(',',@_); }
+};
+my $comm = sub {
+ my @args = split(/,\s*/,shift);
+ my $name = @args[0];
+ my $global = \$GLOBALS{$name};
+ my $ret;
+
+ if ($flavour =~ /ios32/) {
+ $ret = ".comm\t_$name,@args[1]\n";
+ $ret .= ".non_lazy_symbol_pointer\n";
+ $ret .= "$name:\n";
+ $ret .= ".indirect_symbol\t_$name\n";
+ $ret .= ".long\t0";
+ $name = "_$name";
+ } else { $ret = ".comm\t".join(',',@args); }
+
+ $$global = $name;
+ $ret;
+};
+my $globl = sub {
+ my $name = shift;
+ my $global = \$GLOBALS{$name};
+ my $ret;
+
+ SWITCH: for ($flavour) {
+ /ios/ && do { $name = "_$name";
+ last;
+ };
+ }
+
+ $ret = ".globl $name" if (!$ret);
+ $$global = $name;
+ $ret;
+};
+my $global = $globl;
+my $extern = sub {
+ &$globl(@_);
+ return; # return nothing
+};
+my $type = sub {
+ if ($flavour =~ /linux/) { ".type\t".join(',',@_); }
+ elsif ($flavour =~ /ios32/) { if (join(',',@_) =~ /(\w+),%function/) {
+ "#ifdef __thumb2__\n".
+ ".thumb_func $1\n".
+ "#endif";
+ }
+ }
+ else { ""; }
+};
+my $size = sub {
+ if ($flavour =~ /linux/) { ".size\t".join(',',@_); }
+ else { ""; }
+};
+my $inst = sub {
+ if ($flavour =~ /linux/) { ".inst\t".join(',',@_); }
+ else { ".long\t".join(',',@_); }
+};
+my $asciz = sub {
+ my $line = join(",",@_);
+ if ($line =~ /^"(.*)"$/)
+ { ".byte " . join(",",unpack("C*",$1),0) . "\n.align 2"; }
+ else
+ { ""; }
+};
+
+sub range {
+ my ($r,$sfx,$start,$end) = @_;
+
+ join(",",map("$r$_$sfx",($start..$end)));
+}
+
+sub expand_line {
+ my $line = shift;
+ my @ret = ();
+
+ pos($line)=0;
+
+ while ($line =~ m/\G[^@\/\{\"]*/g) {
+ if ($line =~ m/\G(@|\/\/|$)/gc) {
+ last;
+ }
+ elsif ($line =~ m/\G\{/gc) {
+ my $saved_pos = pos($line);
+ $line =~ s/\G([rdqv])([0-9]+)([^\-]*)\-\1([0-9]+)\3/range($1,$3,$2,$4)/e;
+ pos($line) = $saved_pos;
+ $line =~ m/\G[^\}]*\}/g;
+ }
+ elsif ($line =~ m/\G\"/gc) {
+ $line =~ m/\G[^\"]*\"/g;
+ }
+ }
+
+ $line =~ s/\b(\w+)/$GLOBALS{$1} or $1/ge;
+
+ return $line;
+}
+
+while(my $line=<>) {
+
+ if ($line =~ m/^\s*(#|@|\/\/)/) { print $line; next; }
+
+ $line =~ s|/\*.*\*/||; # get rid of C-style comments...
+ $line =~ s|^\s+||; # ... and skip white spaces in beginning...
+ $line =~ s|\s+$||; # ... and at the end
+
+ {
+ $line =~ s|[\b\.]L(\w{2,})|L$1|g; # common denominator for Locallabel
+ $line =~ s|\bL(\w{2,})|\.L$1|g if ($dotinlocallabels);
+ }
+
+ {
+ $line =~ s|(^[\.\w]+)\:\s*||;
+ my $label = $1;
+ if ($label) {
+ printf "%s:",($GLOBALS{$label} or $label);
+ }
+ }
+
+ if ($line !~ m/^[#@]/) {
+ $line =~ s|^\s*(\.?)(\S+)\s*||;
+ my $c = $1; $c = "\t" if ($c eq "");
+ my $mnemonic = $2;
+ my $opcode;
+ if ($mnemonic =~ m/([^\.]+)\.([^\.]+)/) {
+ $opcode = eval("\$$1_$2");
+ } else {
+ $opcode = eval("\$$mnemonic");
+ }
+
+ my $arg=expand_line($line);
+
+ if (ref($opcode) eq 'CODE') {
+ $line = &$opcode($arg);
+ } elsif ($mnemonic) {
+ $line = $c.$mnemonic;
+ $line.= "\t$arg" if ($arg ne "");
+ }
+ }
+
+ print $line if ($line);
+ print "\n";
+}
+
+close STDOUT;
diff --git a/crypto/nasm.props b/crypto/tools/nasm.props
similarity index 100%
rename from crypto/nasm.props
rename to crypto/tools/nasm.props
diff --git a/crypto/nasm.targets b/crypto/tools/nasm.targets
similarity index 100%
rename from crypto/nasm.targets
rename to crypto/tools/nasm.targets
diff --git a/crypto/nasm.xml b/crypto/tools/nasm.xml
similarity index 100%
rename from crypto/nasm.xml
rename to crypto/tools/nasm.xml
diff --git a/crypto/x86_64-xlate.pl b/crypto/tools/x86_64-xlate.pl
similarity index 100%
rename from crypto/x86_64-xlate.pl
rename to crypto/tools/x86_64-xlate.pl
diff --git a/network_bsd_common.cpp b/network_bsd_common.cpp
index 0d4346f..dae7b66 100644
--- a/network_bsd_common.cpp
+++ b/network_bsd_common.cpp
@@ -248,11 +248,6 @@ done:
}
#endif // defined(OS_LINUX)
-
-void OsInterruptibleSleep(int millis) {
- usleep((useconds_t)millis * 1000);
-}
-
#if defined(OS_MACOSX)
int open_tun(char *devname, size_t devname_size) {
struct sockaddr_ctl sc;
@@ -789,14 +784,6 @@ public:
bool is_connected_;
};
-struct CommandLineOutput {
- const char *filename_to_load;
- const char *interface_name;
- bool daemon;
-};
-
-int HandleCommandLine(int argc, char **argv, CommandLineOutput *output);
-
int main(int argc, char **argv) {
CommandLineOutput cmd = {0};
diff --git a/ts.cpp b/ts.cpp
index 2cd5d8c..fd363f2 100644
--- a/ts.cpp
+++ b/ts.cpp
@@ -1,7 +1,7 @@
#include "stdafx.h"
#include "tunsafe_types.h"
#include "netapi.h"
-#include "crypto/curve25519-donna.h"
+#include "crypto/curve25519/curve25519-donna.h"
#include "util.h"
#include "wireguard_proto.h"
#include
@@ -35,8 +35,6 @@
#define ANSI_FG_CYAN "\x1b[36m"
#define ANSI_FG_WHITE "\x1b[37m"
-static const uint8 kCurve25519Basepoint[32] = {9};
-
#if defined(OS_WIN)
#define EXENAME "ts"
@@ -758,12 +756,6 @@ static int HandleStopCommand(int argc, char **argv) {
#endif // defined(OS_WIN)
-struct CommandLineOutput {
- const char *filename_to_load;
- const char *interface_name;
- bool daemon;
-};
-
// Returns -1 on invalid subcommand
int HandleCommandLine(int argc, char **argv, CommandLineOutput *output) {
uint8 key[32];
diff --git a/ts.vcxproj b/ts.vcxproj
index 0774b3d..0a9c943 100644
--- a/ts.vcxproj
+++ b/ts.vcxproj
@@ -54,7 +54,7 @@
-
+
@@ -100,6 +100,7 @@
_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
true
false
+ .
Console
@@ -115,6 +116,7 @@
WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
true
false
+ .
Console
@@ -134,6 +136,7 @@
MultiThreaded
true
false
+ .
Console
@@ -155,6 +158,7 @@
MultiThreaded
true
false
+ .
Console
@@ -164,13 +168,13 @@
-
+
-
+
NotUsing
NotUsing
NotUsing
@@ -187,13 +191,13 @@
-
+
true
true
-
+
\ No newline at end of file
diff --git a/ts.vcxproj.filters b/ts.vcxproj.filters
index 4d6247d..7f47ece 100644
--- a/ts.vcxproj.filters
+++ b/ts.vcxproj.filters
@@ -21,12 +21,12 @@
Source Files
-
- Source Files
-
Source Files
+
+ Header Files
+
@@ -38,15 +38,15 @@
Source Files
-
+
Source Files
-
+
Source Files
-
+
Source Files
diff --git a/tunsafe_amalgam.cpp b/tunsafe_amalgam.cpp
new file mode 100644
index 0000000..19108eb
--- /dev/null
+++ b/tunsafe_amalgam.cpp
@@ -0,0 +1,29 @@
+#include "build_config.h"
+
+// Skip asm for IOS simulator
+#if defined(OS_IOS) && defined(ARCH_CPU_X86_FAMILY)
+#define CHACHA20_WITH_ASM 0
+#define BLAKE2S_WITH_ASM 0
+#endif
+
+#include "wireguard.cpp"
+#include "wireguard_proto.cpp"
+#include "wireguard_config.cpp"
+#include "util.cpp"
+#include "tunsafe_threading.cpp"
+#include "tunsafe_cpu.cpp"
+#include "ip_to_peer_map.cpp"
+#include "crypto/curve25519/curve25519-donna.cpp"
+#include "crypto/chacha20poly1305.cpp"
+#include "crypto/blake2s/blake2s.cpp"
+#include "crypto/siphash/siphash.cpp"
+#include "crypto/aesgcm/aesgcm.cpp"
+#include "ipzip2/ipzip2.cpp"
+
+#if defined(WITH_NETWORK_BSD)
+#include "network_bsd.cpp"
+#include "network_bsd_common.cpp"
+#include "ts.cpp"
+#include "benchmark.cpp"
+#endif
+
diff --git a/tunsafe_win32.cpp b/tunsafe_win32.cpp
index f786043..31e807f 100644
--- a/tunsafe_win32.cpp
+++ b/tunsafe_win32.cpp
@@ -22,7 +22,7 @@
#include "util.h"
#include
#include
-#include "crypto/curve25519-donna.h"
+#include "crypto/curve25519/curve25519-donna.h"
#include "service_win32.h"
#include "util_win32.h"
@@ -652,8 +652,6 @@ void BrowseFile(HWND wnd) {
ImportFile(szFile);
}
-static const uint8 kCurve25519Basepoint[32] = {9};
-
static void SetKeyBox(HWND wnd, int ctr, uint8 buf[32]) {
char base64[WG_PUBLIC_KEY_LEN_BASE64 + 1];
SetDlgItemText(wnd, ctr, base64_encode(buf, 32, base64, sizeof(base64), NULL));
diff --git a/util.h b/util.h
index 7eae298..72d8521 100644
--- a/util.h
+++ b/util.h
@@ -48,3 +48,10 @@ uint64 OsGetMilliseconds();
void InitOsxGetMilliseconds();
void OsInterruptibleSleep(int millis);
void OsGetTimestampTAI64N(uint8 dst[12]);
+
+struct CommandLineOutput {
+ const char *filename_to_load;
+ const char *interface_name;
+ bool daemon;
+};
+int HandleCommandLine(int argc, char **argv, CommandLineOutput *output);
diff --git a/wireguard.cpp b/wireguard.cpp
index be21969..e4f0b33 100644
--- a/wireguard.cpp
+++ b/wireguard.cpp
@@ -5,8 +5,8 @@
#include "netapi.h"
#include "wireguard_proto.h"
#include "crypto/chacha20poly1305.h"
-#include "crypto/blake2s.h"
-#include "crypto/siphash.h"
+#include "crypto/blake2s/blake2s.h"
+#include "crypto/siphash/siphash.h"
#include "tunsafe_endian.h"
#include
#include
diff --git a/wireguard_proto.cpp b/wireguard_proto.cpp
index 033064b..60acdae 100644
--- a/wireguard_proto.cpp
+++ b/wireguard_proto.cpp
@@ -3,10 +3,10 @@
#include "stdafx.h"
#include "wireguard_proto.h"
#include "crypto/chacha20poly1305.h"
-#include "crypto/blake2s.h"
-#include "crypto/curve25519-donna.h"
+#include "crypto/blake2s/blake2s.h"
+#include "crypto/curve25519/curve25519-donna.h"
#include "crypto/aesgcm/aes.h"
-#include "crypto/siphash.h"
+#include "crypto/siphash/siphash.h"
#include "tunsafe_endian.h"
#include "util.h"
#include "crypto_ops.h"
@@ -21,7 +21,6 @@ static const uint8 kLabelCookie[] = {'c', 'o', 'o', 'k', 'i', 'e', '-', '-'};
static const uint8 kLabelMac1[] = {'m', 'a', 'c', '1', '-', '-', '-', '-'};
static const uint8 kWgInitHash[WG_HASH_LEN] = {0x22,0x11,0xb3,0x61,0x08,0x1a,0xc5,0x66,0x69,0x12,0x43,0xdb,0x45,0x8a,0xd5,0x32,0x2d,0x9c,0x6c,0x66,0x22,0x93,0xe8,0xb7,0x0e,0xe1,0x9c,0x65,0xba,0x07,0x9e,0xf3};
static const uint8 kWgInitChainingKey[WG_HASH_LEN] = {0x60,0xe2,0x6d,0xae,0xf3,0x27,0xef,0xc0,0x2e,0xc3,0x35,0xe2,0xa0,0x25,0xd2,0xd0,0x16,0xeb,0x42,0x06,0xf8,0x72,0x77,0xf5,0x2d,0x38,0xd1,0x98,0x8b,0x78,0xcd,0x36};
-static const uint8 kCurve25519Basepoint[32] = {9};
ReplayDetector::ReplayDetector() {
expected_seq_nr_ = 0;