Updates for TunSafe 1.4-rc1

1.Subfolders in the Config/ directory now show up as submenus.
2.Added a way to run TunSafe as a Windows Service.
  Foreground Mode: The service will disconnect when TunSafe closes.
  Background Mode: The service will stay connected in the background.
  No longer required to run the TunSafe client as Admin as long as
  the service is running.
3.New config setting [Interface].ExcludedIPs to configure IPs that
  should not be routed through TunSafe.
4.Can now automatically start TunSafe when Windows starts
5.New UI with tabs and graphs
6.Cache DNS queries to ensure DNS will succeed if connection fails
7.Recreate tray icon when explorer.exe restarts
8.Renamed window title to TunSafe instead of TunSafe VPN Client
9.Main window is now resizable
10.Disallow roaming endpoint when using AllowedIPs=0.0.0.0/0
   Only the original endpoint is added in the routing table so
   this would result in an endless loop of packets.
11.Display approximate Wireguard framing overhead in stats
12.Preparations for protocol handling with multiple threads
13.Delete the routes we made when disconnecting
14.Fix error message about unable to delete a route when connecting
This commit is contained in:
Ludvig Strigeus 2018-08-12 03:27:14 +02:00
parent 1a7ba8683e
commit cf92ac7a0c
68 changed files with 15851 additions and 1632 deletions

4
.gitignore vendored
View file

@ -4,7 +4,6 @@
/Build
/Win32/
/TunSafe.aps
/ipch
/*.sdf
/*vcxproj.user
/*.opensdf
@ -15,4 +14,5 @@
/*.psess
/*.vspx
/installer/*.zip
/config/
/config/
/tunsafe.com/

View file

@ -4,7 +4,6 @@ ListenPort = 51820
Address = 192.168.2.2/24
MTU = 1420
[Peer]
PublicKey = 2m1BdGW9AwwF5dqaGm0NgMggdDZDUPFAL4JxCySdgBw=
#AllowedIPs = 0.0.0.0/0, fc00::2/64
@ -14,3 +13,4 @@ Endpoint = 192.168.1.4:8040
PersistentKeepalive = 25

Binary file not shown.

View file

@ -25,22 +25,4 @@ Global
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(Performance) = preSolution
HasPerformanceSessions = true
EndGlobalSection
GlobalSection(Performance) = preSolution
HasPerformanceSessions = true
EndGlobalSection
GlobalSection(Performance) = preSolution
HasPerformanceSessions = true
EndGlobalSection
GlobalSection(Performance) = preSolution
HasPerformanceSessions = true
EndGlobalSection
GlobalSection(Performance) = preSolution
HasPerformanceSessions = true
EndGlobalSection
GlobalSection(Performance) = preSolution
HasPerformanceSessions = true
EndGlobalSection
EndGlobal

View file

@ -103,7 +103,6 @@
<SubSystem>Windows</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies);ws2_32.lib;Iphlpapi.lib</AdditionalDependencies>
<UACExecutionLevel>RequireAdministrator</UACExecutionLevel>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
@ -122,7 +121,6 @@
<AdditionalDependencies>kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies);ws2_32.lib;Iphlpapi.lib;Comctl32.lib</AdditionalDependencies>
<AdditionalManifestDependencies>
</AdditionalManifestDependencies>
<UACExecutionLevel>RequireAdministrator</UACExecutionLevel>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
@ -142,7 +140,6 @@
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<AdditionalDependencies>kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies);ws2_32.lib;Iphlpapi.lib</AdditionalDependencies>
<UACExecutionLevel>RequireAdministrator</UACExecutionLevel>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
@ -167,11 +164,13 @@
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<AdditionalDependencies>kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies);ws2_32.lib;Iphlpapi.lib</AdditionalDependencies>
<UACExecutionLevel>RequireAdministrator</UACExecutionLevel>
</Link>
</ItemDefinitionGroup>
<ItemGroup>
<ClInclude Include="bit_ops.h" />
<ClInclude Include="ip_to_peer_map.h" />
<ClInclude Include="service_win32.h" />
<ClInclude Include="service_win32_api.h" />
<ClInclude Include="tunsafe_config.h" />
<ClInclude Include="tunsafe_cpu.h" />
<ClInclude Include="crypto\aesgcm\aes.h" />
@ -179,12 +178,15 @@
<ClInclude Include="crypto\chacha20poly1305.h" />
<ClInclude Include="crypto\siphash.h" />
<ClInclude Include="tunsafe_endian.h" />
<ClInclude Include="ipzip2\ipzip2.h" />
<ClInclude Include="netapi.h" />
<ClInclude Include="network_win32_api.h" />
<ClInclude Include="network_win32_dnsblock.h" />
<ClInclude Include="resource.h" />
<ClInclude Include="stdafx.h" />
<ClInclude Include="tunsafe_threading.h" />
<ClInclude Include="tunsafe_types.h" />
<ClInclude Include="util_win32.h" />
<ClInclude Include="wireguard_config.h" />
<ClInclude Include="util.h" />
<ClInclude Include="network_win32.h" />
@ -193,13 +195,18 @@
</ItemGroup>
<ItemGroup>
<ClCompile Include="benchmark.cpp" />
<ClCompile Include="ip_to_peer_map.cpp" />
<ClCompile Include="service_win32.cpp" />
<ClCompile Include="tunsafe_cpu.cpp" />
<ClCompile Include="crypto\aesgcm\aesgcm.cpp" />
<ClCompile Include="crypto\blake2s_sse.cpp" />
<ClCompile Include="crypto\siphash.cpp" />
<ClCompile Include="ipzip2\ipzip2.cpp" />
<ClCompile Include="network_win32_dnsblock.cpp" />
<ClCompile Include="tunsafe_threading.cpp" />
<ClCompile Include="util.cpp" />
<ClCompile Include="network_win32.cpp" />
<ClCompile Include="util_win32.cpp" />
<ClCompile Include="wireguard.cpp" />
<ClCompile Include="crypto\blake2s.cpp">
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">NotUsing</PrecompiledHeader>
@ -229,10 +236,10 @@
<ResourceCompile Include="TunSafe.rc" />
</ItemGroup>
<ItemGroup>
<Image Include="downarrow.bmp" />
<Image Include="icons\green-bg-icon.ico" />
<Image Include="icons\green-icon.ico" />
<Image Include="icons\neutral-icon.ico" />
<Image Include="icons\red-icon.ico" />
</ItemGroup>
<ItemGroup>
<NASM Include="crypto\aesgcm\aesni_gcm_x64_nasm.asm">

View file

@ -53,6 +53,9 @@
<ClInclude Include="netapi.h">
<Filter>Source Files</Filter>
</ClInclude>
<ClInclude Include="ipzip2\ipzip2.h">
<Filter>Source Files</Filter>
</ClInclude>
<ClInclude Include="crypto\siphash.h">
<Filter>crypto</Filter>
</ClInclude>
@ -71,6 +74,21 @@
<ClInclude Include="tunsafe_config.h">
<Filter>Source Files</Filter>
</ClInclude>
<ClInclude Include="service_win32_api.h">
<Filter>Source Files\Win32</Filter>
</ClInclude>
<ClInclude Include="service_win32.h">
<Filter>Source Files\Win32</Filter>
</ClInclude>
<ClInclude Include="tunsafe_threading.h">
<Filter>Source Files</Filter>
</ClInclude>
<ClInclude Include="ip_to_peer_map.h">
<Filter>Source Files</Filter>
</ClInclude>
<ClInclude Include="util_win32.h">
<Filter>Source Files\Win32</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="stdafx.cpp">
@ -109,6 +127,9 @@
<ClCompile Include="wireguard_config.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="ipzip2\ipzip2.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="crypto\siphash.cpp">
<Filter>crypto</Filter>
</ClCompile>
@ -121,6 +142,18 @@
<ClCompile Include="tunsafe_cpu.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="service_win32.cpp">
<Filter>Source Files\Win32</Filter>
</ClCompile>
<ClCompile Include="util_win32.cpp">
<Filter>Source Files\Win32</Filter>
</ClCompile>
<ClCompile Include="tunsafe_threading.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="ip_to_peer_map.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ResourceCompile Include="TunSafe.rc" />
@ -128,8 +161,8 @@
<ItemGroup>
<Image Include="icons\neutral-icon.ico" />
<Image Include="icons\green-icon.ico" />
<Image Include="icons\red-icon.ico" />
<Image Include="icons\green-bg-icon.ico" />
<Image Include="downarrow.bmp" />
</ItemGroup>
<ItemGroup>
<NASM Include="crypto\chacha20_x64.asm">

View file

@ -66,11 +66,15 @@ void Benchmark() {
fake_glb = dst;
size_t max_bytes = 1000000000;
#if defined(ARCH_CPU_ARM_FAMILY)
max_bytes = 100000000;
#endif
auto RunOneBenchmark = [&](const char *name, const std::function<uint64(size_t)> &ff) {
uint64 bytes = 0;
QueryPerformanceCounter((LARGE_INTEGER*)&b);
size_t i;
for (i = 0; bytes < 1000000000; i++)
for (i = 0; bytes < max_bytes; i++)
bytes += ff(i);
QueryPerformanceCounter((LARGE_INTEGER*)&a);
RINFO("%s: %f MB/s", name, (double)bytes * 0.000001 / (a - b) * f);

View file

@ -11,9 +11,10 @@ import re
MSBUILD_PATH = r"C:\Dev\VS2017\MSBuild\15.0\Bin\MSBuild.exe"
NSIS_PATH = r'C:\Dev\NSIS\makeNSIS.EXE'
SIGNTOOL_PATH = r'c:\Program Files (x86)\Windows Kits\10\bin\10.0.15063.0\x86\signtool.exe'
SIGNTOOL_KEY_PATH = '' # put key here
SIGNTOOL_PASS = '' # put key pass here
SIGNTOOL_KEY_PATH = "" # path to key file
SIGNTOOL_PASS = "" # password
def RmTree(path):
try:

3
build_freebsd.sh Normal file → Executable file
View file

@ -1,5 +1,4 @@
g++7 -I . -O2 -static -mssse3 -o tunsafe benchmark.cpp tunsafe_cpu.cpp wireguard_config.cpp \
g++7 -I . -O2 -DNDEBUG -static -mssse3 -o tunsafe benchmark.cpp tunsafe_cpu.cpp wireguard_config.cpp ip_to_peer_map.cpp tunsafe_threading.cpp \
wireguard.cpp wireguard_proto.cpp util.cpp network_bsd.cpp network_bsd_common.cpp \
crypto/blake2s.cpp crypto/blake2s_sse.cpp crypto/chacha20poly1305.cpp crypto/curve25519-donna.cpp \
crypto/siphash.cpp crypto/chacha20_x64_gas.s crypto/poly1305_x64_gas.s ipzip2/ipzip2.cpp -lrt -pthread

2
build_linux.sh Normal file → Executable file
View file

@ -1,6 +1,6 @@
#!/bin/sh
clang++-6.0 -c -march=skylake-avx512 crypto/poly1305_x64_gas.s crypto/chacha20_x64_gas.s
clang++-6.0 -I . -O3 -mssse3 -pthread -lrt -o tunsafe util.cpp wireguard_config.cpp wireguard.cpp \
clang++-6.0 -I . -O3 -DNDEBUG -mssse3 -pthread -lrt -o tunsafe util.cpp wireguard_config.cpp wireguard.cpp ip_to_peer_map.cpp tunsafe_threading.cpp \
wireguard_proto.cpp network_bsd.cpp network_bsd_common.cpp tunsafe_cpu.cpp benchmark.cpp crypto/blake2s.cpp crypto/blake2s_sse.cpp crypto/chacha20poly1305.cpp \
crypto/curve25519-donna.cpp crypto/siphash.cpp chacha20_x64_gas.o crypto/aesgcm/aesni_gcm_x64_gas.s \
crypto/aesgcm/aesni_x64_gas.s crypto/aesgcm/aesgcm.cpp poly1305_x64_gas.o ipzip2/ipzip2.cpp \

11
build_linux_rpi.sh Executable file
View file

@ -0,0 +1,11 @@
#!/bin/sh
set -e
cpp -D__ARM_ARCH__=7 crypto/chacha20/chacha20-arm.s > crypto/chacha20/chacha20-arm.preprocessed.s
cpp -D__ARM_ARCH__=7 crypto/poly1305/poly1305-arm.s > crypto/poly1305/poly1305-arm.preprocessed.s
g++-6 -mfpu=neon -I . -g -O2 -DNDEBUG -fno-omit-frame-pointer -march=armv7-a -mthumb -std=c++11 -pthread -lrt -o tunsafe util.cpp wireguard_config.cpp wireguard.cpp ip_to_peer_map.cpp tunsafe_threading.cpp \
wireguard_proto.cpp network_bsd.cpp network_bsd_common.cpp tunsafe_cpu.cpp benchmark.cpp crypto/blake2s.cpp crypto/chacha20poly1305.cpp \
crypto/curve25519-donna.cpp crypto/siphash.cpp crypto/aesgcm/aesgcm.cpp ipzip2/ipzip2.cpp \
crypto/chacha20/chacha20-arm.preprocessed.s crypto/poly1305/poly1305-arm.preprocessed.s

2
build_osx.sh Normal file → Executable file
View file

@ -4,7 +4,7 @@ set -e
clang++ -c -mavx512f -mavx512vl crypto/poly1305_x64_gas_macosx.s crypto/chacha20_x64_gas_macosx.s
clang++ -g -O3 -I . -std=c++11 -DNDEBUG=1 -fno-exceptions -fno-rtti -ffunction-sections -o tunsafe \
wireguard_config.cpp wireguard.cpp wireguard_proto.cpp util.cpp network_bsd.cpp network_bsd_common.cpp benchmark.cpp tunsafe_cpu.cpp \
wireguard_config.cpp ip_to_peer_map.cpp tunsafe_threading.cpp wireguard.cpp wireguard_proto.cpp util.cpp network_bsd.cpp network_bsd_common.cpp benchmark.cpp tunsafe_cpu.cpp \
crypto/blake2s.cpp crypto/blake2s_sse.cpp crypto/chacha20poly1305.cpp crypto/curve25519-donna.cpp \
crypto/siphash.cpp crypto/aesgcm/aesgcm.cpp ipzip2/ipzip2.cpp \
crypto/aesgcm/aesni_gcm_x64_gas_macosx.s crypto/aesgcm/aesni_x64_gas_macosx.s crypto/aesgcm/ghash_x64_gas_macosx.s \

0
crypto/blake2s.cpp Normal file → Executable file
View file

0
crypto/blake2s_sse.cpp Normal file → Executable file
View file

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -43,8 +43,22 @@ void _cdecl poly1305_emit_avx(void *ctx, uint8 mac[16], const uint32 nonce[4]);
void _cdecl poly1305_blocks_avx(void *ctx, const uint8 *inp, size_t len, uint32 padbit);
void _cdecl poly1305_blocks_avx2(void *ctx, const uint8 *inp, size_t len, uint32 padbit);
void _cdecl poly1305_blocks_avx512(void *ctx, const uint8 *inp, size_t len, uint32 padbit);
#if defined(ARCH_CPU_ARM_FAMILY)
void chacha20_arm(uint8 *out, const uint8 *in, size_t len, const uint32 key[8], const uint32 counter[4]);
void chacha20_neon(uint8 *out, const uint8 *in, size_t len, const uint32 key[8], const uint32 counter[4]);
#endif
void poly1305_init_arm(void *ctx, const uint8 key[16]);
void poly1305_blocks_arm(void *ctx, const uint8 *inp, size_t len, uint32 padbit);
void poly1305_emit_arm(void *ctx, uint8 mac[16], const uint32 nonce[4]);
void poly1305_blocks_neon(void *ctx, const uint8 *inp, size_t len, uint32 padbit);
void poly1305_emit_neon(void *ctx, uint8 mac[16], const uint32 nonce[4]);
}
struct chacha20_ctx {
uint32 state[CHACHA20_BLOCK_SIZE / sizeof(uint32)];
};
@ -193,6 +207,17 @@ SAFEBUFFERS static void chacha20_crypt(struct chacha20_ctx *ctx, uint8 *dst, con
}
#endif // defined(ARCH_CPU_X86_64)
#if defined(ARCH_CPU_ARM_FAMILY)
if (ARM_PCAP_NEON) {
chacha20_neon(dst, src, bytes, &ctx->state[4], &ctx->state[12]);
} else {
chacha20_arm(dst, src, bytes, &ctx->state[4], &ctx->state[12]);
}
ctx->state[12] += (bytes + 63) / 64;
return;
#endif // defined(ARCH_CPU_ARM_FAMILY)
if (dst != src)
memcpy(dst, src, bytes);
@ -385,7 +410,7 @@ SAFEBUFFERS static void poly1305_init(struct poly1305_ctx *ctx, const uint8 key[
#if defined(ARCH_CPU_X86_64)
poly1305_init_x86_64(ctx->opaque, key);
#elif defined(CONFIG_ARM) || defined(CONFIG_ARM64)
#elif defined(ARCH_CPU_ARM_FAMILY)
poly1305_init_arm(ctx->opaque, key);
#elif defined(CONFIG_MIPS) && defined(CONFIG_64BIT)
poly1305_init_mips(ctx->opaque, key);
@ -409,7 +434,12 @@ static inline void poly1305_blocks(void *ctx, const uint8 *inp, size_t len, uint
poly1305_blocks_avx(ctx, inp, len, padbit);
else
poly1305_blocks_x86_64(ctx, inp, len, padbit);
#else // defined(ARCH_CPU_X86_64)
#elif defined(ARCH_CPU_ARM_FAMILY)
if (ARM_PCAP_NEON)
poly1305_blocks_neon(ctx, inp, len, padbit);
else
poly1305_blocks_arm(ctx, inp, len, padbit);
#else
poly1305_blocks_generic(ctx, inp, len, padbit);
#endif // defined(ARCH_CPU_X86_64)
}
@ -421,6 +451,11 @@ static inline void poly1305_emit(void *ctx, uint8 mac[16], const uint32 nonce[4]
poly1305_emit_avx(ctx, mac, nonce);
else
poly1305_emit_x86_64(ctx, mac, nonce);
#elif defined(ARCH_CPU_ARM_FAMILY)
if (ARM_PCAP_NEON)
poly1305_emit_neon(ctx, mac, nonce);
else
poly1305_emit_arm(ctx, mac, nonce);
#else // defined(ARCH_CPU_X86_64)
poly1305_emit_generic(ctx, mac, nonce);
#endif // defined(ARCH_CPU_X86_64)

View file

@ -1,17 +1,17 @@
#ifndef TUNSAFE_CRYPTO_CURVE25519_DONNA_H_
#define TUNSAFE_CRYPTO_CURVE25519_DONNA_H_
#include "tunsafe_types.h"
void curve25519_donna_ref(uint8 *mypublic, const uint8 *secret, const uint8 *basepoint);
extern "C" void curve25519_donna_x64(uint8 *mypublic, const uint8 *secret, const uint8 *basepoint);
#if defined(ARCH_CPU_X86_64) && defined(COMPILER_MSVC)
#define curve25519_donna curve25519_donna_x64
#else
#define curve25519_donna curve25519_donna_ref
#endif
void curve25519_normalize(uint8 *e);
#ifndef TUNSAFE_CRYPTO_CURVE25519_DONNA_H_
#define TUNSAFE_CRYPTO_CURVE25519_DONNA_H_
#include "tunsafe_types.h"
void curve25519_donna_ref(uint8 *mypublic, const uint8 *secret, const uint8 *basepoint);
extern "C" void curve25519_donna_x64(uint8 *mypublic, const uint8 *secret, const uint8 *basepoint);
#if defined(ARCH_CPU_X86_64) && defined(COMPILER_MSVC)
#define curve25519_donna curve25519_donna_x64
#else
#define curve25519_donna curve25519_donna_ref
#endif
void curve25519_normalize(uint8 *e);
#endif // TUNSAFE_CRYPTO_CURVE25519_DONNA_H_

0
crypto/make_all_asm_files.sh Normal file → Executable file
View file

0
crypto/make_poly1305_x64.pl Normal file → Executable file
View file

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,944 @@
#! /usr/bin/env perl
# Copyright 2016 The OpenSSL Project Authors. All Rights Reserved.
#
# Licensed under the OpenSSL license (the "License"). You may not use
# this file except in compliance with the License. You can obtain a copy
# in the file LICENSE in the source distribution or at
# https://www.openssl.org/source/license.html
#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
# ====================================================================
#
# This module implements Poly1305 hash for ARMv8.
#
# June 2015
#
# Numbers are cycles per processed byte with poly1305_blocks alone.
#
# IALU/gcc-4.9 NEON
#
# Apple A7 1.86/+5% 0.72
# Cortex-A53 2.69/+58% 1.47
# Cortex-A57 2.70/+7% 1.14
# Denver 1.64/+50% 1.18(*)
# X-Gene 2.13/+68% 2.27
# Mongoose 1.77/+75% 1.12
# Kryo 2.70/+55% 1.13
#
# (*) estimate based on resources availability is less than 1.0,
# i.e. measured result is worse than expected, presumably binary
# translator is not almighty;
$flavour=shift;
$output=shift;
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
die "can't locate arm-xlate.pl";
open OUT,"| \"$^X\" $xlate $flavour $output";
*STDOUT=*OUT;
my ($ctx,$inp,$len,$padbit) = map("x$_",(0..3));
my ($mac,$nonce)=($inp,$len);
my ($h0,$h1,$h2,$r0,$r1,$s1,$t0,$t1,$d0,$d1,$d2) = map("x$_",(4..14));
$code.=<<___;
#include "arm_arch.h"
.text
// forward "declarations" are required for Apple
.extern OPENSSL_armcap_P
.globl poly1305_blocks
.globl poly1305_emit
.globl poly1305_init
.type poly1305_init,%function
.align 5
poly1305_init:
cmp $inp,xzr
stp xzr,xzr,[$ctx] // zero hash value
stp xzr,xzr,[$ctx,#16] // [along with is_base2_26]
csel x0,xzr,x0,eq
b.eq .Lno_key
#ifdef __ILP32__
ldrsw $t1,.LOPENSSL_armcap_P
#else
ldr $t1,.LOPENSSL_armcap_P
#endif
adr $t0,.LOPENSSL_armcap_P
ldp $r0,$r1,[$inp] // load key
mov $s1,#0xfffffffc0fffffff
movk $s1,#0x0fff,lsl#48
ldr w17,[$t0,$t1]
#ifdef __ARMEB__
rev $r0,$r0 // flip bytes
rev $r1,$r1
#endif
and $r0,$r0,$s1 // &=0ffffffc0fffffff
and $s1,$s1,#-4
and $r1,$r1,$s1 // &=0ffffffc0ffffffc
stp $r0,$r1,[$ctx,#32] // save key value
tst w17,#ARMV7_NEON
adr $d0,poly1305_blocks
adr $r0,poly1305_blocks_neon
adr $d1,poly1305_emit
adr $r1,poly1305_emit_neon
csel $d0,$d0,$r0,eq
csel $d1,$d1,$r1,eq
#ifdef __ILP32__
stp w12,w13,[$len]
#else
stp $d0,$d1,[$len]
#endif
mov x0,#1
.Lno_key:
ret
.size poly1305_init,.-poly1305_init
.type poly1305_blocks,%function
.align 5
poly1305_blocks:
ands $len,$len,#-16
b.eq .Lno_data
ldp $h0,$h1,[$ctx] // load hash value
ldp $r0,$r1,[$ctx,#32] // load key value
ldr $h2,[$ctx,#16]
add $s1,$r1,$r1,lsr#2 // s1 = r1 + (r1 >> 2)
b .Loop
.align 5
.Loop:
ldp $t0,$t1,[$inp],#16 // load input
sub $len,$len,#16
#ifdef __ARMEB__
rev $t0,$t0
rev $t1,$t1
#endif
adds $h0,$h0,$t0 // accumulate input
adcs $h1,$h1,$t1
mul $d0,$h0,$r0 // h0*r0
adc $h2,$h2,$padbit
umulh $d1,$h0,$r0
mul $t0,$h1,$s1 // h1*5*r1
umulh $t1,$h1,$s1
adds $d0,$d0,$t0
mul $t0,$h0,$r1 // h0*r1
adc $d1,$d1,$t1
umulh $d2,$h0,$r1
adds $d1,$d1,$t0
mul $t0,$h1,$r0 // h1*r0
adc $d2,$d2,xzr
umulh $t1,$h1,$r0
adds $d1,$d1,$t0
mul $t0,$h2,$s1 // h2*5*r1
adc $d2,$d2,$t1
mul $t1,$h2,$r0 // h2*r0
adds $d1,$d1,$t0
adc $d2,$d2,$t1
and $t0,$d2,#-4 // final reduction
and $h2,$d2,#3
add $t0,$t0,$d2,lsr#2
adds $h0,$d0,$t0
adcs $h1,$d1,xzr
adc $h2,$h2,xzr
cbnz $len,.Loop
stp $h0,$h1,[$ctx] // store hash value
str $h2,[$ctx,#16]
.Lno_data:
ret
.size poly1305_blocks,.-poly1305_blocks
.type poly1305_emit,%function
.align 5
poly1305_emit:
ldp $h0,$h1,[$ctx] // load hash base 2^64
ldr $h2,[$ctx,#16]
ldp $t0,$t1,[$nonce] // load nonce
adds $d0,$h0,#5 // compare to modulus
adcs $d1,$h1,xzr
adc $d2,$h2,xzr
tst $d2,#-4 // see if it's carried/borrowed
csel $h0,$h0,$d0,eq
csel $h1,$h1,$d1,eq
#ifdef __ARMEB__
ror $t0,$t0,#32 // flip nonce words
ror $t1,$t1,#32
#endif
adds $h0,$h0,$t0 // accumulate nonce
adc $h1,$h1,$t1
#ifdef __ARMEB__
rev $h0,$h0 // flip output bytes
rev $h1,$h1
#endif
stp $h0,$h1,[$mac] // write result
ret
.size poly1305_emit,.-poly1305_emit
___
my ($R0,$R1,$S1,$R2,$S2,$R3,$S3,$R4,$S4) = map("v$_.4s",(0..8));
my ($IN01_0,$IN01_1,$IN01_2,$IN01_3,$IN01_4) = map("v$_.2s",(9..13));
my ($IN23_0,$IN23_1,$IN23_2,$IN23_3,$IN23_4) = map("v$_.2s",(14..18));
my ($ACC0,$ACC1,$ACC2,$ACC3,$ACC4) = map("v$_.2d",(19..23));
my ($H0,$H1,$H2,$H3,$H4) = map("v$_.2s",(24..28));
my ($T0,$T1,$MASK) = map("v$_",(29..31));
my ($in2,$zeros)=("x16","x17");
my $is_base2_26 = $zeros; # borrow
$code.=<<___;
.type poly1305_mult,%function
.align 5
poly1305_mult:
mul $d0,$h0,$r0 // h0*r0
umulh $d1,$h0,$r0
mul $t0,$h1,$s1 // h1*5*r1
umulh $t1,$h1,$s1
adds $d0,$d0,$t0
mul $t0,$h0,$r1 // h0*r1
adc $d1,$d1,$t1
umulh $d2,$h0,$r1
adds $d1,$d1,$t0
mul $t0,$h1,$r0 // h1*r0
adc $d2,$d2,xzr
umulh $t1,$h1,$r0
adds $d1,$d1,$t0
mul $t0,$h2,$s1 // h2*5*r1
adc $d2,$d2,$t1
mul $t1,$h2,$r0 // h2*r0
adds $d1,$d1,$t0
adc $d2,$d2,$t1
and $t0,$d2,#-4 // final reduction
and $h2,$d2,#3
add $t0,$t0,$d2,lsr#2
adds $h0,$d0,$t0
adcs $h1,$d1,xzr
adc $h2,$h2,xzr
ret
.size poly1305_mult,.-poly1305_mult
.type poly1305_splat,%function
.align 5
poly1305_splat:
and x12,$h0,#0x03ffffff // base 2^64 -> base 2^26
ubfx x13,$h0,#26,#26
extr x14,$h1,$h0,#52
and x14,x14,#0x03ffffff
ubfx x15,$h1,#14,#26
extr x16,$h2,$h1,#40
str w12,[$ctx,#16*0] // r0
add w12,w13,w13,lsl#2 // r1*5
str w13,[$ctx,#16*1] // r1
add w13,w14,w14,lsl#2 // r2*5
str w12,[$ctx,#16*2] // s1
str w14,[$ctx,#16*3] // r2
add w14,w15,w15,lsl#2 // r3*5
str w13,[$ctx,#16*4] // s2
str w15,[$ctx,#16*5] // r3
add w15,w16,w16,lsl#2 // r4*5
str w14,[$ctx,#16*6] // s3
str w16,[$ctx,#16*7] // r4
str w15,[$ctx,#16*8] // s4
ret
.size poly1305_splat,.-poly1305_splat
.type poly1305_blocks_neon,%function
.align 5
poly1305_blocks_neon:
ldr $is_base2_26,[$ctx,#24]
cmp $len,#128
b.hs .Lblocks_neon
cbz $is_base2_26,poly1305_blocks
.Lblocks_neon:
stp x29,x30,[sp,#-80]!
add x29,sp,#0
ands $len,$len,#-16
b.eq .Lno_data_neon
cbz $is_base2_26,.Lbase2_64_neon
ldp w10,w11,[$ctx] // load hash value base 2^26
ldp w12,w13,[$ctx,#8]
ldr w14,[$ctx,#16]
tst $len,#31
b.eq .Leven_neon
ldp $r0,$r1,[$ctx,#32] // load key value
add $h0,x10,x11,lsl#26 // base 2^26 -> base 2^64
lsr $h1,x12,#12
adds $h0,$h0,x12,lsl#52
add $h1,$h1,x13,lsl#14
adc $h1,$h1,xzr
lsr $h2,x14,#24
adds $h1,$h1,x14,lsl#40
adc $d2,$h2,xzr // can be partially reduced...
ldp $d0,$d1,[$inp],#16 // load input
sub $len,$len,#16
add $s1,$r1,$r1,lsr#2 // s1 = r1 + (r1 >> 2)
and $t0,$d2,#-4 // ... so reduce
and $h2,$d2,#3
add $t0,$t0,$d2,lsr#2
adds $h0,$h0,$t0
adcs $h1,$h1,xzr
adc $h2,$h2,xzr
#ifdef __ARMEB__
rev $d0,$d0
rev $d1,$d1
#endif
adds $h0,$h0,$d0 // accumulate input
adcs $h1,$h1,$d1
adc $h2,$h2,$padbit
bl poly1305_mult
ldr x30,[sp,#8]
cbz $padbit,.Lstore_base2_64_neon
and x10,$h0,#0x03ffffff // base 2^64 -> base 2^26
ubfx x11,$h0,#26,#26
extr x12,$h1,$h0,#52
and x12,x12,#0x03ffffff
ubfx x13,$h1,#14,#26
extr x14,$h2,$h1,#40
cbnz $len,.Leven_neon
stp w10,w11,[$ctx] // store hash value base 2^26
stp w12,w13,[$ctx,#8]
str w14,[$ctx,#16]
b .Lno_data_neon
.align 4
.Lstore_base2_64_neon:
stp $h0,$h1,[$ctx] // store hash value base 2^64
stp $h2,xzr,[$ctx,#16] // note that is_base2_26 is zeroed
b .Lno_data_neon
.align 4
.Lbase2_64_neon:
ldp $r0,$r1,[$ctx,#32] // load key value
ldp $h0,$h1,[$ctx] // load hash value base 2^64
ldr $h2,[$ctx,#16]
tst $len,#31
b.eq .Linit_neon
ldp $d0,$d1,[$inp],#16 // load input
sub $len,$len,#16
add $s1,$r1,$r1,lsr#2 // s1 = r1 + (r1 >> 2)
#ifdef __ARMEB__
rev $d0,$d0
rev $d1,$d1
#endif
adds $h0,$h0,$d0 // accumulate input
adcs $h1,$h1,$d1
adc $h2,$h2,$padbit
bl poly1305_mult
.Linit_neon:
and x10,$h0,#0x03ffffff // base 2^64 -> base 2^26
ubfx x11,$h0,#26,#26
extr x12,$h1,$h0,#52
and x12,x12,#0x03ffffff
ubfx x13,$h1,#14,#26
extr x14,$h2,$h1,#40
stp d8,d9,[sp,#16] // meet ABI requirements
stp d10,d11,[sp,#32]
stp d12,d13,[sp,#48]
stp d14,d15,[sp,#64]
fmov ${H0},x10
fmov ${H1},x11
fmov ${H2},x12
fmov ${H3},x13
fmov ${H4},x14
////////////////////////////////// initialize r^n table
mov $h0,$r0 // r^1
add $s1,$r1,$r1,lsr#2 // s1 = r1 + (r1 >> 2)
mov $h1,$r1
mov $h2,xzr
add $ctx,$ctx,#48+12
bl poly1305_splat
bl poly1305_mult // r^2
sub $ctx,$ctx,#4
bl poly1305_splat
bl poly1305_mult // r^3
sub $ctx,$ctx,#4
bl poly1305_splat
bl poly1305_mult // r^4
sub $ctx,$ctx,#4
bl poly1305_splat
ldr x30,[sp,#8]
add $in2,$inp,#32
adr $zeros,.Lzeros
subs $len,$len,#64
csel $in2,$zeros,$in2,lo
mov x4,#1
str x4,[$ctx,#-24] // set is_base2_26
sub $ctx,$ctx,#48 // restore original $ctx
b .Ldo_neon
.align 4
.Leven_neon:
add $in2,$inp,#32
adr $zeros,.Lzeros
subs $len,$len,#64
csel $in2,$zeros,$in2,lo
stp d8,d9,[sp,#16] // meet ABI requirements
stp d10,d11,[sp,#32]
stp d12,d13,[sp,#48]
stp d14,d15,[sp,#64]
fmov ${H0},x10
fmov ${H1},x11
fmov ${H2},x12
fmov ${H3},x13
fmov ${H4},x14
.Ldo_neon:
ldp x8,x12,[$in2],#16 // inp[2:3] (or zero)
ldp x9,x13,[$in2],#48
lsl $padbit,$padbit,#24
add x15,$ctx,#48
#ifdef __ARMEB__
rev x8,x8
rev x12,x12
rev x9,x9
rev x13,x13
#endif
and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
and x5,x9,#0x03ffffff
ubfx x6,x8,#26,#26
ubfx x7,x9,#26,#26
add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
extr x8,x12,x8,#52
extr x9,x13,x9,#52
add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
fmov $IN23_0,x4
and x8,x8,#0x03ffffff
and x9,x9,#0x03ffffff
ubfx x10,x12,#14,#26
ubfx x11,x13,#14,#26
add x12,$padbit,x12,lsr#40
add x13,$padbit,x13,lsr#40
add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
fmov $IN23_1,x6
add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
fmov $IN23_2,x8
fmov $IN23_3,x10
fmov $IN23_4,x12
ldp x8,x12,[$inp],#16 // inp[0:1]
ldp x9,x13,[$inp],#48
ld1 {$R0,$R1,$S1,$R2},[x15],#64
ld1 {$S2,$R3,$S3,$R4},[x15],#64
ld1 {$S4},[x15]
#ifdef __ARMEB__
rev x8,x8
rev x12,x12
rev x9,x9
rev x13,x13
#endif
and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
and x5,x9,#0x03ffffff
ubfx x6,x8,#26,#26
ubfx x7,x9,#26,#26
add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
extr x8,x12,x8,#52
extr x9,x13,x9,#52
add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
fmov $IN01_0,x4
and x8,x8,#0x03ffffff
and x9,x9,#0x03ffffff
ubfx x10,x12,#14,#26
ubfx x11,x13,#14,#26
add x12,$padbit,x12,lsr#40
add x13,$padbit,x13,lsr#40
add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
fmov $IN01_1,x6
add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
movi $MASK.2d,#-1
fmov $IN01_2,x8
fmov $IN01_3,x10
fmov $IN01_4,x12
ushr $MASK.2d,$MASK.2d,#38
b.ls .Lskip_loop
.align 4
.Loop_neon:
////////////////////////////////////////////////////////////////
// ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2
// ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r
// \___________________/
// ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2
// ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r
// \___________________/ \____________________/
//
// Note that we start with inp[2:3]*r^2. This is because it
// doesn't depend on reduction in previous iteration.
////////////////////////////////////////////////////////////////
// d4 = h0*r4 + h1*r3 + h2*r2 + h3*r1 + h4*r0
// d3 = h0*r3 + h1*r2 + h2*r1 + h3*r0 + h4*5*r4
// d2 = h0*r2 + h1*r1 + h2*r0 + h3*5*r4 + h4*5*r3
// d1 = h0*r1 + h1*r0 + h2*5*r4 + h3*5*r3 + h4*5*r2
// d0 = h0*r0 + h1*5*r4 + h2*5*r3 + h3*5*r2 + h4*5*r1
subs $len,$len,#64
umull $ACC4,$IN23_0,${R4}[2]
csel $in2,$zeros,$in2,lo
umull $ACC3,$IN23_0,${R3}[2]
umull $ACC2,$IN23_0,${R2}[2]
ldp x8,x12,[$in2],#16 // inp[2:3] (or zero)
umull $ACC1,$IN23_0,${R1}[2]
ldp x9,x13,[$in2],#48
umull $ACC0,$IN23_0,${R0}[2]
#ifdef __ARMEB__
rev x8,x8
rev x12,x12
rev x9,x9
rev x13,x13
#endif
umlal $ACC4,$IN23_1,${R3}[2]
and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
umlal $ACC3,$IN23_1,${R2}[2]
and x5,x9,#0x03ffffff
umlal $ACC2,$IN23_1,${R1}[2]
ubfx x6,x8,#26,#26
umlal $ACC1,$IN23_1,${R0}[2]
ubfx x7,x9,#26,#26
umlal $ACC0,$IN23_1,${S4}[2]
add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
umlal $ACC4,$IN23_2,${R2}[2]
extr x8,x12,x8,#52
umlal $ACC3,$IN23_2,${R1}[2]
extr x9,x13,x9,#52
umlal $ACC2,$IN23_2,${R0}[2]
add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
umlal $ACC1,$IN23_2,${S4}[2]
fmov $IN23_0,x4
umlal $ACC0,$IN23_2,${S3}[2]
and x8,x8,#0x03ffffff
umlal $ACC4,$IN23_3,${R1}[2]
and x9,x9,#0x03ffffff
umlal $ACC3,$IN23_3,${R0}[2]
ubfx x10,x12,#14,#26
umlal $ACC2,$IN23_3,${S4}[2]
ubfx x11,x13,#14,#26
umlal $ACC1,$IN23_3,${S3}[2]
add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
umlal $ACC0,$IN23_3,${S2}[2]
fmov $IN23_1,x6
add $IN01_2,$IN01_2,$H2
add x12,$padbit,x12,lsr#40
umlal $ACC4,$IN23_4,${R0}[2]
add x13,$padbit,x13,lsr#40
umlal $ACC3,$IN23_4,${S4}[2]
add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
umlal $ACC2,$IN23_4,${S3}[2]
add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
umlal $ACC1,$IN23_4,${S2}[2]
fmov $IN23_2,x8
umlal $ACC0,$IN23_4,${S1}[2]
fmov $IN23_3,x10
////////////////////////////////////////////////////////////////
// (hash+inp[0:1])*r^4 and accumulate
add $IN01_0,$IN01_0,$H0
fmov $IN23_4,x12
umlal $ACC3,$IN01_2,${R1}[0]
ldp x8,x12,[$inp],#16 // inp[0:1]
umlal $ACC0,$IN01_2,${S3}[0]
ldp x9,x13,[$inp],#48
umlal $ACC4,$IN01_2,${R2}[0]
umlal $ACC1,$IN01_2,${S4}[0]
umlal $ACC2,$IN01_2,${R0}[0]
#ifdef __ARMEB__
rev x8,x8
rev x12,x12
rev x9,x9
rev x13,x13
#endif
add $IN01_1,$IN01_1,$H1
umlal $ACC3,$IN01_0,${R3}[0]
umlal $ACC4,$IN01_0,${R4}[0]
and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
umlal $ACC2,$IN01_0,${R2}[0]
and x5,x9,#0x03ffffff
umlal $ACC0,$IN01_0,${R0}[0]
ubfx x6,x8,#26,#26
umlal $ACC1,$IN01_0,${R1}[0]
ubfx x7,x9,#26,#26
add $IN01_3,$IN01_3,$H3
add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
umlal $ACC3,$IN01_1,${R2}[0]
extr x8,x12,x8,#52
umlal $ACC4,$IN01_1,${R3}[0]
extr x9,x13,x9,#52
umlal $ACC0,$IN01_1,${S4}[0]
add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
umlal $ACC2,$IN01_1,${R1}[0]
fmov $IN01_0,x4
umlal $ACC1,$IN01_1,${R0}[0]
and x8,x8,#0x03ffffff
add $IN01_4,$IN01_4,$H4
and x9,x9,#0x03ffffff
umlal $ACC3,$IN01_3,${R0}[0]
ubfx x10,x12,#14,#26
umlal $ACC0,$IN01_3,${S2}[0]
ubfx x11,x13,#14,#26
umlal $ACC4,$IN01_3,${R1}[0]
add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
umlal $ACC1,$IN01_3,${S3}[0]
fmov $IN01_1,x6
umlal $ACC2,$IN01_3,${S4}[0]
add x12,$padbit,x12,lsr#40
umlal $ACC3,$IN01_4,${S4}[0]
add x13,$padbit,x13,lsr#40
umlal $ACC0,$IN01_4,${S1}[0]
add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
umlal $ACC4,$IN01_4,${R0}[0]
add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
umlal $ACC1,$IN01_4,${S2}[0]
fmov $IN01_2,x8
umlal $ACC2,$IN01_4,${S3}[0]
fmov $IN01_3,x10
fmov $IN01_4,x12
/////////////////////////////////////////////////////////////////
// lazy reduction as discussed in "NEON crypto" by D.J. Bernstein
// and P. Schwabe
//
// [see discussion in poly1305-armv4 module]
ushr $T0.2d,$ACC3,#26
xtn $H3,$ACC3
ushr $T1.2d,$ACC0,#26
and $ACC0,$ACC0,$MASK.2d
add $ACC4,$ACC4,$T0.2d // h3 -> h4
bic $H3,#0xfc,lsl#24 // &=0x03ffffff
add $ACC1,$ACC1,$T1.2d // h0 -> h1
ushr $T0.2d,$ACC4,#26
xtn $H4,$ACC4
ushr $T1.2d,$ACC1,#26
xtn $H1,$ACC1
bic $H4,#0xfc,lsl#24
add $ACC2,$ACC2,$T1.2d // h1 -> h2
add $ACC0,$ACC0,$T0.2d
shl $T0.2d,$T0.2d,#2
shrn $T1.2s,$ACC2,#26
xtn $H2,$ACC2
add $ACC0,$ACC0,$T0.2d // h4 -> h0
bic $H1,#0xfc,lsl#24
add $H3,$H3,$T1.2s // h2 -> h3
bic $H2,#0xfc,lsl#24
shrn $T0.2s,$ACC0,#26
xtn $H0,$ACC0
ushr $T1.2s,$H3,#26
bic $H3,#0xfc,lsl#24
bic $H0,#0xfc,lsl#24
add $H1,$H1,$T0.2s // h0 -> h1
add $H4,$H4,$T1.2s // h3 -> h4
b.hi .Loop_neon
.Lskip_loop:
dup $IN23_2,${IN23_2}[0]
add $IN01_2,$IN01_2,$H2
////////////////////////////////////////////////////////////////
// multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1
adds $len,$len,#32
b.ne .Long_tail
dup $IN23_2,${IN01_2}[0]
add $IN23_0,$IN01_0,$H0
add $IN23_3,$IN01_3,$H3
add $IN23_1,$IN01_1,$H1
add $IN23_4,$IN01_4,$H4
.Long_tail:
dup $IN23_0,${IN23_0}[0]
umull2 $ACC0,$IN23_2,${S3}
umull2 $ACC3,$IN23_2,${R1}
umull2 $ACC4,$IN23_2,${R2}
umull2 $ACC2,$IN23_2,${R0}
umull2 $ACC1,$IN23_2,${S4}
dup $IN23_1,${IN23_1}[0]
umlal2 $ACC0,$IN23_0,${R0}
umlal2 $ACC2,$IN23_0,${R2}
umlal2 $ACC3,$IN23_0,${R3}
umlal2 $ACC4,$IN23_0,${R4}
umlal2 $ACC1,$IN23_0,${R1}
dup $IN23_3,${IN23_3}[0]
umlal2 $ACC0,$IN23_1,${S4}
umlal2 $ACC3,$IN23_1,${R2}
umlal2 $ACC2,$IN23_1,${R1}
umlal2 $ACC4,$IN23_1,${R3}
umlal2 $ACC1,$IN23_1,${R0}
dup $IN23_4,${IN23_4}[0]
umlal2 $ACC3,$IN23_3,${R0}
umlal2 $ACC4,$IN23_3,${R1}
umlal2 $ACC0,$IN23_3,${S2}
umlal2 $ACC1,$IN23_3,${S3}
umlal2 $ACC2,$IN23_3,${S4}
umlal2 $ACC3,$IN23_4,${S4}
umlal2 $ACC0,$IN23_4,${S1}
umlal2 $ACC4,$IN23_4,${R0}
umlal2 $ACC1,$IN23_4,${S2}
umlal2 $ACC2,$IN23_4,${S3}
b.eq .Lshort_tail
////////////////////////////////////////////////////////////////
// (hash+inp[0:1])*r^4:r^3 and accumulate
add $IN01_0,$IN01_0,$H0
umlal $ACC3,$IN01_2,${R1}
umlal $ACC0,$IN01_2,${S3}
umlal $ACC4,$IN01_2,${R2}
umlal $ACC1,$IN01_2,${S4}
umlal $ACC2,$IN01_2,${R0}
add $IN01_1,$IN01_1,$H1
umlal $ACC3,$IN01_0,${R3}
umlal $ACC0,$IN01_0,${R0}
umlal $ACC4,$IN01_0,${R4}
umlal $ACC1,$IN01_0,${R1}
umlal $ACC2,$IN01_0,${R2}
add $IN01_3,$IN01_3,$H3
umlal $ACC3,$IN01_1,${R2}
umlal $ACC0,$IN01_1,${S4}
umlal $ACC4,$IN01_1,${R3}
umlal $ACC1,$IN01_1,${R0}
umlal $ACC2,$IN01_1,${R1}
add $IN01_4,$IN01_4,$H4
umlal $ACC3,$IN01_3,${R0}
umlal $ACC0,$IN01_3,${S2}
umlal $ACC4,$IN01_3,${R1}
umlal $ACC1,$IN01_3,${S3}
umlal $ACC2,$IN01_3,${S4}
umlal $ACC3,$IN01_4,${S4}
umlal $ACC0,$IN01_4,${S1}
umlal $ACC4,$IN01_4,${R0}
umlal $ACC1,$IN01_4,${S2}
umlal $ACC2,$IN01_4,${S3}
.Lshort_tail:
////////////////////////////////////////////////////////////////
// horizontal add
addp $ACC3,$ACC3,$ACC3
ldp d8,d9,[sp,#16] // meet ABI requirements
addp $ACC0,$ACC0,$ACC0
ldp d10,d11,[sp,#32]
addp $ACC4,$ACC4,$ACC4
ldp d12,d13,[sp,#48]
addp $ACC1,$ACC1,$ACC1
ldp d14,d15,[sp,#64]
addp $ACC2,$ACC2,$ACC2
////////////////////////////////////////////////////////////////
// lazy reduction, but without narrowing
ushr $T0.2d,$ACC3,#26
and $ACC3,$ACC3,$MASK.2d
ushr $T1.2d,$ACC0,#26
and $ACC0,$ACC0,$MASK.2d
add $ACC4,$ACC4,$T0.2d // h3 -> h4
add $ACC1,$ACC1,$T1.2d // h0 -> h1
ushr $T0.2d,$ACC4,#26
and $ACC4,$ACC4,$MASK.2d
ushr $T1.2d,$ACC1,#26
and $ACC1,$ACC1,$MASK.2d
add $ACC2,$ACC2,$T1.2d // h1 -> h2
add $ACC0,$ACC0,$T0.2d
shl $T0.2d,$T0.2d,#2
ushr $T1.2d,$ACC2,#26
and $ACC2,$ACC2,$MASK.2d
add $ACC0,$ACC0,$T0.2d // h4 -> h0
add $ACC3,$ACC3,$T1.2d // h2 -> h3
ushr $T0.2d,$ACC0,#26
and $ACC0,$ACC0,$MASK.2d
ushr $T1.2d,$ACC3,#26
and $ACC3,$ACC3,$MASK.2d
add $ACC1,$ACC1,$T0.2d // h0 -> h1
add $ACC4,$ACC4,$T1.2d // h3 -> h4
////////////////////////////////////////////////////////////////
// write the result, can be partially reduced
st4 {$ACC0,$ACC1,$ACC2,$ACC3}[0],[$ctx],#16
st1 {$ACC4}[0],[$ctx]
.Lno_data_neon:
ldr x29,[sp],#80
ret
.size poly1305_blocks_neon,.-poly1305_blocks_neon
.type poly1305_emit_neon,%function
.align 5
poly1305_emit_neon:
ldr $is_base2_26,[$ctx,#24]
cbz $is_base2_26,poly1305_emit
ldp w10,w11,[$ctx] // load hash value base 2^26
ldp w12,w13,[$ctx,#8]
ldr w14,[$ctx,#16]
add $h0,x10,x11,lsl#26 // base 2^26 -> base 2^64
lsr $h1,x12,#12
adds $h0,$h0,x12,lsl#52
add $h1,$h1,x13,lsl#14
adc $h1,$h1,xzr
lsr $h2,x14,#24
adds $h1,$h1,x14,lsl#40
adc $h2,$h2,xzr // can be partially reduced...
ldp $t0,$t1,[$nonce] // load nonce
and $d0,$h2,#-4 // ... so reduce
add $d0,$d0,$h2,lsr#2
and $h2,$h2,#3
adds $h0,$h0,$d0
adcs $h1,$h1,xzr
adc $h2,$h2,xzr
adds $d0,$h0,#5 // compare to modulus
adcs $d1,$h1,xzr
adc $d2,$h2,xzr
tst $d2,#-4 // see if it's carried/borrowed
csel $h0,$h0,$d0,eq
csel $h1,$h1,$d1,eq
#ifdef __ARMEB__
ror $t0,$t0,#32 // flip nonce words
ror $t1,$t1,#32
#endif
adds $h0,$h0,$t0 // accumulate nonce
adc $h1,$h1,$t1
#ifdef __ARMEB__
rev $h0,$h0 // flip output bytes
rev $h1,$h1
#endif
stp $h0,$h1,[$mac] // write result
ret
.size poly1305_emit_neon,.-poly1305_emit_neon
.align 5
.Lzeros:
.long 0,0,0,0,0,0,0,0
.LOPENSSL_armcap_P:
#ifdef __ILP32__
.long OPENSSL_armcap_P-.
#else
.quad OPENSSL_armcap_P-.
#endif
.asciz "Poly1305 for ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
.align 2
___
foreach (split("\n",$code)) {
s/\b(shrn\s+v[0-9]+)\.[24]d/$1.2s/ or
s/\b(fmov\s+)v([0-9]+)[^,]*,\s*x([0-9]+)/$1d$2,x$3/ or
(m/\bdup\b/ and (s/\.[24]s/.2d/g or 1)) or
(m/\b(eor|and)/ and (s/\.[248][sdh]/.16b/g or 1)) or
(m/\bum(ul|la)l\b/ and (s/\.4s/.2s/g or 1)) or
(m/\bum(ul|la)l2\b/ and (s/\.2s/.4s/g or 1)) or
(m/\bst[1-4]\s+{[^}]+}\[/ and (s/\.[24]d/.s/g or 1));
s/\.[124]([sd])\[/.$1\[/;
print $_,"\n";
}
close STDOUT;

View file

@ -0,0 +1,820 @@
/* SPDX-License-Identifier: OpenSSL OR (BSD-3-Clause OR GPL-2.0)
*
* Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
* Copyright 2016 The OpenSSL Project Authors. All Rights Reserved.
*/
#include <linux/linkage.h>
.text
.align 5
ENTRY(poly1305_init_arm)
cmp x1,xzr
stp xzr,xzr,[x0] // zero hash value
stp xzr,xzr,[x0,#16] // [along with is_base2_26]
csel x0,xzr,x0,eq
b.eq .Lno_key
ldp x7,x8,[x1] // load key
mov x9,#0xfffffffc0fffffff
movk x9,#0x0fff,lsl#48
#ifdef __ARMEB__
rev x7,x7 // flip bytes
rev x8,x8
#endif
and x7,x7,x9 // &=0ffffffc0fffffff
and x9,x9,#-4
and x8,x8,x9 // &=0ffffffc0ffffffc
stp x7,x8,[x0,#32] // save key value
.Lno_key:
ret
ENDPROC(poly1305_init_arm)
.align 5
ENTRY(poly1305_blocks_arm)
ands x2,x2,#-16
b.eq .Lno_data
ldp x4,x5,[x0] // load hash value
ldp x7,x8,[x0,#32] // load key value
ldr x6,[x0,#16]
add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2)
b .Loop
.align 5
.Loop:
ldp x10,x11,[x1],#16 // load input
sub x2,x2,#16
#ifdef __ARMEB__
rev x10,x10
rev x11,x11
#endif
adds x4,x4,x10 // accumulate input
adcs x5,x5,x11
mul x12,x4,x7 // h0*r0
adc x6,x6,x3
umulh x13,x4,x7
mul x10,x5,x9 // h1*5*r1
umulh x11,x5,x9
adds x12,x12,x10
mul x10,x4,x8 // h0*r1
adc x13,x13,x11
umulh x14,x4,x8
adds x13,x13,x10
mul x10,x5,x7 // h1*r0
adc x14,x14,xzr
umulh x11,x5,x7
adds x13,x13,x10
mul x10,x6,x9 // h2*5*r1
adc x14,x14,x11
mul x11,x6,x7 // h2*r0
adds x13,x13,x10
adc x14,x14,x11
and x10,x14,#-4 // final reduction
and x6,x14,#3
add x10,x10,x14,lsr#2
adds x4,x12,x10
adcs x5,x13,xzr
adc x6,x6,xzr
cbnz x2,.Loop
stp x4,x5,[x0] // store hash value
str x6,[x0,#16]
.Lno_data:
ret
ENDPROC(poly1305_blocks_arm)
.align 5
ENTRY(poly1305_emit_arm)
ldp x4,x5,[x0] // load hash base 2^64
ldr x6,[x0,#16]
ldp x10,x11,[x2] // load nonce
adds x12,x4,#5 // compare to modulus
adcs x13,x5,xzr
adc x14,x6,xzr
tst x14,#-4 // see if it's carried/borrowed
csel x4,x4,x12,eq
csel x5,x5,x13,eq
#ifdef __ARMEB__
ror x10,x10,#32 // flip nonce words
ror x11,x11,#32
#endif
adds x4,x4,x10 // accumulate nonce
adc x5,x5,x11
#ifdef __ARMEB__
rev x4,x4 // flip output bytes
rev x5,x5
#endif
stp x4,x5,[x1] // write result
ret
ENDPROC(poly1305_emit_arm)
.align 5
__poly1305_mult:
mul x12,x4,x7 // h0*r0
umulh x13,x4,x7
mul x10,x5,x9 // h1*5*r1
umulh x11,x5,x9
adds x12,x12,x10
mul x10,x4,x8 // h0*r1
adc x13,x13,x11
umulh x14,x4,x8
adds x13,x13,x10
mul x10,x5,x7 // h1*r0
adc x14,x14,xzr
umulh x11,x5,x7
adds x13,x13,x10
mul x10,x6,x9 // h2*5*r1
adc x14,x14,x11
mul x11,x6,x7 // h2*r0
adds x13,x13,x10
adc x14,x14,x11
and x10,x14,#-4 // final reduction
and x6,x14,#3
add x10,x10,x14,lsr#2
adds x4,x12,x10
adcs x5,x13,xzr
adc x6,x6,xzr
ret
__poly1305_splat:
and x12,x4,#0x03ffffff // base 2^64 -> base 2^26
ubfx x13,x4,#26,#26
extr x14,x5,x4,#52
and x14,x14,#0x03ffffff
ubfx x15,x5,#14,#26
extr x16,x6,x5,#40
str w12,[x0,#16*0] // r0
add w12,w13,w13,lsl#2 // r1*5
str w13,[x0,#16*1] // r1
add w13,w14,w14,lsl#2 // r2*5
str w12,[x0,#16*2] // s1
str w14,[x0,#16*3] // r2
add w14,w15,w15,lsl#2 // r3*5
str w13,[x0,#16*4] // s2
str w15,[x0,#16*5] // r3
add w15,w16,w16,lsl#2 // r4*5
str w14,[x0,#16*6] // s3
str w16,[x0,#16*7] // r4
str w15,[x0,#16*8] // s4
ret
.align 5
ENTRY(poly1305_blocks_neon)
ldr x17,[x0,#24]
cmp x2,#128
b.hs .Lblocks_neon
cbz x17,poly1305_blocks_arm
.Lblocks_neon:
stp x29,x30,[sp,#-80]!
add x29,sp,#0
ands x2,x2,#-16
b.eq .Lno_data_neon
cbz x17,.Lbase2_64_neon
ldp w10,w11,[x0] // load hash value base 2^26
ldp w12,w13,[x0,#8]
ldr w14,[x0,#16]
tst x2,#31
b.eq .Leven_neon
ldp x7,x8,[x0,#32] // load key value
add x4,x10,x11,lsl#26 // base 2^26 -> base 2^64
lsr x5,x12,#12
adds x4,x4,x12,lsl#52
add x5,x5,x13,lsl#14
adc x5,x5,xzr
lsr x6,x14,#24
adds x5,x5,x14,lsl#40
adc x14,x6,xzr // can be partially reduced...
ldp x12,x13,[x1],#16 // load input
sub x2,x2,#16
add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2)
and x10,x14,#-4 // ... so reduce
and x6,x14,#3
add x10,x10,x14,lsr#2
adds x4,x4,x10
adcs x5,x5,xzr
adc x6,x6,xzr
#ifdef __ARMEB__
rev x12,x12
rev x13,x13
#endif
adds x4,x4,x12 // accumulate input
adcs x5,x5,x13
adc x6,x6,x3
bl __poly1305_mult
ldr x30,[sp,#8]
cbz x3,.Lstore_base2_64_neon
and x10,x4,#0x03ffffff // base 2^64 -> base 2^26
ubfx x11,x4,#26,#26
extr x12,x5,x4,#52
and x12,x12,#0x03ffffff
ubfx x13,x5,#14,#26
extr x14,x6,x5,#40
cbnz x2,.Leven_neon
stp w10,w11,[x0] // store hash value base 2^26
stp w12,w13,[x0,#8]
str w14,[x0,#16]
b .Lno_data_neon
.align 4
.Lstore_base2_64_neon:
stp x4,x5,[x0] // store hash value base 2^64
stp x6,xzr,[x0,#16] // note that is_base2_26 is zeroed
b .Lno_data_neon
.align 4
.Lbase2_64_neon:
ldp x7,x8,[x0,#32] // load key value
ldp x4,x5,[x0] // load hash value base 2^64
ldr x6,[x0,#16]
tst x2,#31
b.eq .Linit_neon
ldp x12,x13,[x1],#16 // load input
sub x2,x2,#16
add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2)
#ifdef __ARMEB__
rev x12,x12
rev x13,x13
#endif
adds x4,x4,x12 // accumulate input
adcs x5,x5,x13
adc x6,x6,x3
bl __poly1305_mult
.Linit_neon:
and x10,x4,#0x03ffffff // base 2^64 -> base 2^26
ubfx x11,x4,#26,#26
extr x12,x5,x4,#52
and x12,x12,#0x03ffffff
ubfx x13,x5,#14,#26
extr x14,x6,x5,#40
stp d8,d9,[sp,#16] // meet ABI requirements
stp d10,d11,[sp,#32]
stp d12,d13,[sp,#48]
stp d14,d15,[sp,#64]
fmov d24,x10
fmov d25,x11
fmov d26,x12
fmov d27,x13
fmov d28,x14
////////////////////////////////// initialize r^n table
mov x4,x7 // r^1
add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2)
mov x5,x8
mov x6,xzr
add x0,x0,#48+12
bl __poly1305_splat
bl __poly1305_mult // r^2
sub x0,x0,#4
bl __poly1305_splat
bl __poly1305_mult // r^3
sub x0,x0,#4
bl __poly1305_splat
bl __poly1305_mult // r^4
sub x0,x0,#4
bl __poly1305_splat
ldr x30,[sp,#8]
add x16,x1,#32
adr x17,.Lzeros
subs x2,x2,#64
csel x16,x17,x16,lo
mov x4,#1
str x4,[x0,#-24] // set is_base2_26
sub x0,x0,#48 // restore original x0
b .Ldo_neon
.align 4
.Leven_neon:
add x16,x1,#32
adr x17,.Lzeros
subs x2,x2,#64
csel x16,x17,x16,lo
stp d8,d9,[sp,#16] // meet ABI requirements
stp d10,d11,[sp,#32]
stp d12,d13,[sp,#48]
stp d14,d15,[sp,#64]
fmov d24,x10
fmov d25,x11
fmov d26,x12
fmov d27,x13
fmov d28,x14
.Ldo_neon:
ldp x8,x12,[x16],#16 // inp[2:3] (or zero)
ldp x9,x13,[x16],#48
lsl x3,x3,#24
add x15,x0,#48
#ifdef __ARMEB__
rev x8,x8
rev x12,x12
rev x9,x9
rev x13,x13
#endif
and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
and x5,x9,#0x03ffffff
ubfx x6,x8,#26,#26
ubfx x7,x9,#26,#26
add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
extr x8,x12,x8,#52
extr x9,x13,x9,#52
add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
fmov d14,x4
and x8,x8,#0x03ffffff
and x9,x9,#0x03ffffff
ubfx x10,x12,#14,#26
ubfx x11,x13,#14,#26
add x12,x3,x12,lsr#40
add x13,x3,x13,lsr#40
add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
fmov d15,x6
add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
fmov d16,x8
fmov d17,x10
fmov d18,x12
ldp x8,x12,[x1],#16 // inp[0:1]
ldp x9,x13,[x1],#48
ld1 {v0.4s,v1.4s,v2.4s,v3.4s},[x15],#64
ld1 {v4.4s,v5.4s,v6.4s,v7.4s},[x15],#64
ld1 {v8.4s},[x15]
#ifdef __ARMEB__
rev x8,x8
rev x12,x12
rev x9,x9
rev x13,x13
#endif
and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
and x5,x9,#0x03ffffff
ubfx x6,x8,#26,#26
ubfx x7,x9,#26,#26
add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
extr x8,x12,x8,#52
extr x9,x13,x9,#52
add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
fmov d9,x4
and x8,x8,#0x03ffffff
and x9,x9,#0x03ffffff
ubfx x10,x12,#14,#26
ubfx x11,x13,#14,#26
add x12,x3,x12,lsr#40
add x13,x3,x13,lsr#40
add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
fmov d10,x6
add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
movi v31.2d,#-1
fmov d11,x8
fmov d12,x10
fmov d13,x12
ushr v31.2d,v31.2d,#38
b.ls .Lskip_loop
.align 4
.Loop_neon:
////////////////////////////////////////////////////////////////
// ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2
// ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r
// ___________________/
// ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2
// ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r
// ___________________/ ____________________/
//
// Note that we start with inp[2:3]*r^2. This is because it
// doesn't depend on reduction in previous iteration.
////////////////////////////////////////////////////////////////
// d4 = h0*r4 + h1*r3 + h2*r2 + h3*r1 + h4*r0
// d3 = h0*r3 + h1*r2 + h2*r1 + h3*r0 + h4*5*r4
// d2 = h0*r2 + h1*r1 + h2*r0 + h3*5*r4 + h4*5*r3
// d1 = h0*r1 + h1*r0 + h2*5*r4 + h3*5*r3 + h4*5*r2
// d0 = h0*r0 + h1*5*r4 + h2*5*r3 + h3*5*r2 + h4*5*r1
subs x2,x2,#64
umull v23.2d,v14.2s,v7.s[2]
csel x16,x17,x16,lo
umull v22.2d,v14.2s,v5.s[2]
umull v21.2d,v14.2s,v3.s[2]
ldp x8,x12,[x16],#16 // inp[2:3] (or zero)
umull v20.2d,v14.2s,v1.s[2]
ldp x9,x13,[x16],#48
umull v19.2d,v14.2s,v0.s[2]
#ifdef __ARMEB__
rev x8,x8
rev x12,x12
rev x9,x9
rev x13,x13
#endif
umlal v23.2d,v15.2s,v5.s[2]
and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
umlal v22.2d,v15.2s,v3.s[2]
and x5,x9,#0x03ffffff
umlal v21.2d,v15.2s,v1.s[2]
ubfx x6,x8,#26,#26
umlal v20.2d,v15.2s,v0.s[2]
ubfx x7,x9,#26,#26
umlal v19.2d,v15.2s,v8.s[2]
add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
umlal v23.2d,v16.2s,v3.s[2]
extr x8,x12,x8,#52
umlal v22.2d,v16.2s,v1.s[2]
extr x9,x13,x9,#52
umlal v21.2d,v16.2s,v0.s[2]
add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
umlal v20.2d,v16.2s,v8.s[2]
fmov d14,x4
umlal v19.2d,v16.2s,v6.s[2]
and x8,x8,#0x03ffffff
umlal v23.2d,v17.2s,v1.s[2]
and x9,x9,#0x03ffffff
umlal v22.2d,v17.2s,v0.s[2]
ubfx x10,x12,#14,#26
umlal v21.2d,v17.2s,v8.s[2]
ubfx x11,x13,#14,#26
umlal v20.2d,v17.2s,v6.s[2]
add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
umlal v19.2d,v17.2s,v4.s[2]
fmov d15,x6
add v11.2s,v11.2s,v26.2s
add x12,x3,x12,lsr#40
umlal v23.2d,v18.2s,v0.s[2]
add x13,x3,x13,lsr#40
umlal v22.2d,v18.2s,v8.s[2]
add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
umlal v21.2d,v18.2s,v6.s[2]
add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
umlal v20.2d,v18.2s,v4.s[2]
fmov d16,x8
umlal v19.2d,v18.2s,v2.s[2]
fmov d17,x10
////////////////////////////////////////////////////////////////
// (hash+inp[0:1])*r^4 and accumulate
add v9.2s,v9.2s,v24.2s
fmov d18,x12
umlal v22.2d,v11.2s,v1.s[0]
ldp x8,x12,[x1],#16 // inp[0:1]
umlal v19.2d,v11.2s,v6.s[0]
ldp x9,x13,[x1],#48
umlal v23.2d,v11.2s,v3.s[0]
umlal v20.2d,v11.2s,v8.s[0]
umlal v21.2d,v11.2s,v0.s[0]
#ifdef __ARMEB__
rev x8,x8
rev x12,x12
rev x9,x9
rev x13,x13
#endif
add v10.2s,v10.2s,v25.2s
umlal v22.2d,v9.2s,v5.s[0]
umlal v23.2d,v9.2s,v7.s[0]
and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
umlal v21.2d,v9.2s,v3.s[0]
and x5,x9,#0x03ffffff
umlal v19.2d,v9.2s,v0.s[0]
ubfx x6,x8,#26,#26
umlal v20.2d,v9.2s,v1.s[0]
ubfx x7,x9,#26,#26
add v12.2s,v12.2s,v27.2s
add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
umlal v22.2d,v10.2s,v3.s[0]
extr x8,x12,x8,#52
umlal v23.2d,v10.2s,v5.s[0]
extr x9,x13,x9,#52
umlal v19.2d,v10.2s,v8.s[0]
add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
umlal v21.2d,v10.2s,v1.s[0]
fmov d9,x4
umlal v20.2d,v10.2s,v0.s[0]
and x8,x8,#0x03ffffff
add v13.2s,v13.2s,v28.2s
and x9,x9,#0x03ffffff
umlal v22.2d,v12.2s,v0.s[0]
ubfx x10,x12,#14,#26
umlal v19.2d,v12.2s,v4.s[0]
ubfx x11,x13,#14,#26
umlal v23.2d,v12.2s,v1.s[0]
add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
umlal v20.2d,v12.2s,v6.s[0]
fmov d10,x6
umlal v21.2d,v12.2s,v8.s[0]
add x12,x3,x12,lsr#40
umlal v22.2d,v13.2s,v8.s[0]
add x13,x3,x13,lsr#40
umlal v19.2d,v13.2s,v2.s[0]
add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
umlal v23.2d,v13.2s,v0.s[0]
add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
umlal v20.2d,v13.2s,v4.s[0]
fmov d11,x8
umlal v21.2d,v13.2s,v6.s[0]
fmov d12,x10
fmov d13,x12
/////////////////////////////////////////////////////////////////
// lazy reduction as discussed in "NEON crypto" by D.J. Bernstein
// and P. Schwabe
//
// [see discussion in poly1305-armv4 module]
ushr v29.2d,v22.2d,#26
xtn v27.2s,v22.2d
ushr v30.2d,v19.2d,#26
and v19.16b,v19.16b,v31.16b
add v23.2d,v23.2d,v29.2d // h3 -> h4
bic v27.2s,#0xfc,lsl#24 // &=0x03ffffff
add v20.2d,v20.2d,v30.2d // h0 -> h1
ushr v29.2d,v23.2d,#26
xtn v28.2s,v23.2d
ushr v30.2d,v20.2d,#26
xtn v25.2s,v20.2d
bic v28.2s,#0xfc,lsl#24
add v21.2d,v21.2d,v30.2d // h1 -> h2
add v19.2d,v19.2d,v29.2d
shl v29.2d,v29.2d,#2
shrn v30.2s,v21.2d,#26
xtn v26.2s,v21.2d
add v19.2d,v19.2d,v29.2d // h4 -> h0
bic v25.2s,#0xfc,lsl#24
add v27.2s,v27.2s,v30.2s // h2 -> h3
bic v26.2s,#0xfc,lsl#24
shrn v29.2s,v19.2d,#26
xtn v24.2s,v19.2d
ushr v30.2s,v27.2s,#26
bic v27.2s,#0xfc,lsl#24
bic v24.2s,#0xfc,lsl#24
add v25.2s,v25.2s,v29.2s // h0 -> h1
add v28.2s,v28.2s,v30.2s // h3 -> h4
b.hi .Loop_neon
.Lskip_loop:
dup v16.2d,v16.d[0]
add v11.2s,v11.2s,v26.2s
////////////////////////////////////////////////////////////////
// multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1
adds x2,x2,#32
b.ne .Long_tail
dup v16.2d,v11.d[0]
add v14.2s,v9.2s,v24.2s
add v17.2s,v12.2s,v27.2s
add v15.2s,v10.2s,v25.2s
add v18.2s,v13.2s,v28.2s
.Long_tail:
dup v14.2d,v14.d[0]
umull2 v19.2d,v16.4s,v6.4s
umull2 v22.2d,v16.4s,v1.4s
umull2 v23.2d,v16.4s,v3.4s
umull2 v21.2d,v16.4s,v0.4s
umull2 v20.2d,v16.4s,v8.4s
dup v15.2d,v15.d[0]
umlal2 v19.2d,v14.4s,v0.4s
umlal2 v21.2d,v14.4s,v3.4s
umlal2 v22.2d,v14.4s,v5.4s
umlal2 v23.2d,v14.4s,v7.4s
umlal2 v20.2d,v14.4s,v1.4s
dup v17.2d,v17.d[0]
umlal2 v19.2d,v15.4s,v8.4s
umlal2 v22.2d,v15.4s,v3.4s
umlal2 v21.2d,v15.4s,v1.4s
umlal2 v23.2d,v15.4s,v5.4s
umlal2 v20.2d,v15.4s,v0.4s
dup v18.2d,v18.d[0]
umlal2 v22.2d,v17.4s,v0.4s
umlal2 v23.2d,v17.4s,v1.4s
umlal2 v19.2d,v17.4s,v4.4s
umlal2 v20.2d,v17.4s,v6.4s
umlal2 v21.2d,v17.4s,v8.4s
umlal2 v22.2d,v18.4s,v8.4s
umlal2 v19.2d,v18.4s,v2.4s
umlal2 v23.2d,v18.4s,v0.4s
umlal2 v20.2d,v18.4s,v4.4s
umlal2 v21.2d,v18.4s,v6.4s
b.eq .Lshort_tail
////////////////////////////////////////////////////////////////
// (hash+inp[0:1])*r^4:r^3 and accumulate
add v9.2s,v9.2s,v24.2s
umlal v22.2d,v11.2s,v1.2s
umlal v19.2d,v11.2s,v6.2s
umlal v23.2d,v11.2s,v3.2s
umlal v20.2d,v11.2s,v8.2s
umlal v21.2d,v11.2s,v0.2s
add v10.2s,v10.2s,v25.2s
umlal v22.2d,v9.2s,v5.2s
umlal v19.2d,v9.2s,v0.2s
umlal v23.2d,v9.2s,v7.2s
umlal v20.2d,v9.2s,v1.2s
umlal v21.2d,v9.2s,v3.2s
add v12.2s,v12.2s,v27.2s
umlal v22.2d,v10.2s,v3.2s
umlal v19.2d,v10.2s,v8.2s
umlal v23.2d,v10.2s,v5.2s
umlal v20.2d,v10.2s,v0.2s
umlal v21.2d,v10.2s,v1.2s
add v13.2s,v13.2s,v28.2s
umlal v22.2d,v12.2s,v0.2s
umlal v19.2d,v12.2s,v4.2s
umlal v23.2d,v12.2s,v1.2s
umlal v20.2d,v12.2s,v6.2s
umlal v21.2d,v12.2s,v8.2s
umlal v22.2d,v13.2s,v8.2s
umlal v19.2d,v13.2s,v2.2s
umlal v23.2d,v13.2s,v0.2s
umlal v20.2d,v13.2s,v4.2s
umlal v21.2d,v13.2s,v6.2s
.Lshort_tail:
////////////////////////////////////////////////////////////////
// horizontal add
addp v22.2d,v22.2d,v22.2d
ldp d8,d9,[sp,#16] // meet ABI requirements
addp v19.2d,v19.2d,v19.2d
ldp d10,d11,[sp,#32]
addp v23.2d,v23.2d,v23.2d
ldp d12,d13,[sp,#48]
addp v20.2d,v20.2d,v20.2d
ldp d14,d15,[sp,#64]
addp v21.2d,v21.2d,v21.2d
////////////////////////////////////////////////////////////////
// lazy reduction, but without narrowing
ushr v29.2d,v22.2d,#26
and v22.16b,v22.16b,v31.16b
ushr v30.2d,v19.2d,#26
and v19.16b,v19.16b,v31.16b
add v23.2d,v23.2d,v29.2d // h3 -> h4
add v20.2d,v20.2d,v30.2d // h0 -> h1
ushr v29.2d,v23.2d,#26
and v23.16b,v23.16b,v31.16b
ushr v30.2d,v20.2d,#26
and v20.16b,v20.16b,v31.16b
add v21.2d,v21.2d,v30.2d // h1 -> h2
add v19.2d,v19.2d,v29.2d
shl v29.2d,v29.2d,#2
ushr v30.2d,v21.2d,#26
and v21.16b,v21.16b,v31.16b
add v19.2d,v19.2d,v29.2d // h4 -> h0
add v22.2d,v22.2d,v30.2d // h2 -> h3
ushr v29.2d,v19.2d,#26
and v19.16b,v19.16b,v31.16b
ushr v30.2d,v22.2d,#26
and v22.16b,v22.16b,v31.16b
add v20.2d,v20.2d,v29.2d // h0 -> h1
add v23.2d,v23.2d,v30.2d // h3 -> h4
////////////////////////////////////////////////////////////////
// write the result, can be partially reduced
st4 {v19.s,v20.s,v21.s,v22.s}[0],[x0],#16
st1 {v23.s}[0],[x0]
.Lno_data_neon:
ldr x29,[sp],#80
ret
ENDPROC(poly1305_blocks_neon)
.align 5
ENTRY(poly1305_emit_neon)
ldr x17,[x0,#24]
cbz x17,poly1305_emit_arm
ldp w10,w11,[x0] // load hash value base 2^26
ldp w12,w13,[x0,#8]
ldr w14,[x0,#16]
add x4,x10,x11,lsl#26 // base 2^26 -> base 2^64
lsr x5,x12,#12
adds x4,x4,x12,lsl#52
add x5,x5,x13,lsl#14
adc x5,x5,xzr
lsr x6,x14,#24
adds x5,x5,x14,lsl#40
adc x6,x6,xzr // can be partially reduced...
ldp x10,x11,[x2] // load nonce
and x12,x6,#-4 // ... so reduce
add x12,x12,x6,lsr#2
and x6,x6,#3
adds x4,x4,x12
adcs x5,x5,xzr
adc x6,x6,xzr
adds x12,x4,#5 // compare to modulus
adcs x13,x5,xzr
adc x14,x6,xzr
tst x14,#-4 // see if it's carried/borrowed
csel x4,x4,x12,eq
csel x5,x5,x13,eq
#ifdef __ARMEB__
ror x10,x10,#32 // flip nonce words
ror x11,x11,#32
#endif
adds x4,x4,x10 // accumulate nonce
adc x5,x5,x11
#ifdef __ARMEB__
rev x4,x4 // flip output bytes
rev x5,x5
#endif
stp x4,x5,[x1] // write result
ret
ENDPROC(poly1305_emit_neon)
.align 5
.Lzeros:
.long 0,0,0,0,0,0,0,0

0
crypto/poly1305_x64_gas.s Normal file → Executable file
View file

View file

@ -7,6 +7,7 @@
#include "tunsafe_types.h"
#include <string.h>
#if defined(COMPILER_MSVC)
#include <intrin.h>
#endif // defined(COMPILER_MSVC)

BIN
downarrow.bmp Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 142 B

View file

@ -1,3 +1,26 @@
2018-08-11 - TunSafe v1.4-rc1
1.Subfolders in the Config/ directory now show up as submenus.
2.Added a way to run TunSafe as a Windows Service.
Foreground Mode: The service will disconnect when TunSafe closes.
Background Mode: The service will stay connected in the background.
No longer required to run the TunSafe client as Admin as long as
the service is running.
3.New config setting [Interface].ExcludedIPs to configure IPs that
should not be routed through TunSafe.
4.Can now automatically start TunSafe when Windows starts
5.New UI with tabs and graphs
6.Cache DNS queries to ensure DNS will succeed if connection fails
7.Recreate tray icon when explorer.exe restarts
8.Renamed window title to TunSafe instead of TunSafe VPN Client
9.Main window is now resizable
10.Disallow roaming endpoint when using AllowedIPs=0.0.0.0/0
Only the original endpoint is added in the routing table so
this would result in an endless loop of packets.
11.Display approximate Wireguard framing overhead in stats
12.Preparations for protocol handling with multiple threads
13.Delete the routes we made when disconnecting
14.Fix error message about unable to delete a route when connecting
2018-06-20 - TunSafe v1.3-rc3
Changes:

419
installer/servicelib.nsh Normal file
View file

@ -0,0 +1,419 @@
; NSIS SERVICE LIBRARY - servicelib.nsh
; Version 1.8.1 - Jun 21th, 2013
; Questions/Comments - dselkirk@hotmail.com
;
; Description:
; Provides an interface to window services
;
; Inputs:
; action - systemlib action ie. create, delete, start, stop, pause,
; continue, installed, running, status
; name - name of service to manipulate
; param - action parameters; usage: var1=value1;var2=value2;...etc.
; (don't forget to add a ';' after the last value!)
;
; Actions:
; create - creates a new windows service
; Parameters:
; path - path to service executable
; autostart - automatically start with system ie. 1|0
; interact - interact with the desktop ie. 1|0
; depend - service dependencies
; user - user that runs the service
; password - password of the above user
; display - display name in service's console
; description - Description of service
; starttype - start type (supersedes autostart)
; servicetype - service type (supersedes interact)
;
; delete - deletes a windows service
; start - start a stopped windows service
; stop - stops a running windows service
; pause - pauses a running windows service
; continue - continues a paused windows service
; installed - is the provided service installed
; Parameters:
; action - if true then invokes the specified action
; running - is the provided service running
; Parameters:
; action - if true then invokes the specified action
; status - check the status of the provided service
;
; Usage:
; Method 1:
; Push "action"
; Push "name"
; Push "param"
; Call Service
; Pop $0 ;response
;
; Method 2:
; !insertmacro SERVICE "action" "name" "param"
;
; History:
; 1.0 - 09/15/2003 - Initial release
; 1.1 - 09/16/2003 - Changed &l to i, thx brainsucker
; 1.2 - 02/29/2004 - Fixed documentation.
; 1.3 - 01/05/2006 - Fixed interactive flag and pop order (Kichik)
; 1.4 - 12/07/2006 - Added display and depend, fixed datatypes (Vitoco)
; 1.5 - 06/25/2008 - Added description of service.(DeSafe.com/liuqixing#gmail.com)
; 1.5.1 - 06/12/2009 - Added use of __UNINSTALL__
; 1.6 - 08/02/2010 - Fixed description implementation (Anders)
; 1.7 - 04/11/2010 - Added get running service process id (Nico)
; 1.8 - 24/03/2011 - Added starttype and servicetype (Sergius)
; 1.8.1 - 21/06/2013 - Added dynamic ASCII & Unicode support (Zinthose)
!ifndef SERVICELIB
!define SERVICELIB
!define SC_MANAGER_ALL_ACCESS 0x3F
!define SC_STATUS_PROCESS_INFO 0x0
!define SERVICE_ALL_ACCESS 0xF01FF
!define SERVICE_CONTROL_STOP 1
!define SERVICE_CONTROL_PAUSE 2
!define SERVICE_CONTROL_CONTINUE 3
!define SERVICE_STOPPED 0x1
!define SERVICE_START_PENDING 0x2
!define SERVICE_STOP_PENDING 0x3
!define SERVICE_RUNNING 0x4
!define SERVICE_CONTINUE_PENDING 0x5
!define SERVICE_PAUSE_PENDING 0x6
!define SERVICE_PAUSED 0x7
!define SERVICE_KERNEL_DRIVER 0x00000001
!define SERVICE_FILE_SYSTEM_DRIVER 0x00000002
!define SERVICE_WIN32_OWN_PROCESS 0x00000010
!define SERVICE_WIN32_SHARE_PROCESS 0x00000020
!define SERVICE_INTERACTIVE_PROCESS 0x00000100
!define SERVICE_BOOT_START 0x00000000
!define SERVICE_SYSTEM_START 0x00000001
!define SERVICE_AUTO_START 0x00000002
!define SERVICE_DEMAND_START 0x00000003
!define SERVICE_DISABLED 0x00000004
## Added by Zinthose for Native Unicode Support
!ifdef NSIS_UNICODE
!define APITAG "W"
!else
!define APITAG "A"
!endif
!macro SERVICE ACTION NAME PARAM
Push '${ACTION}'
Push '${NAME}'
Push '${PARAM}'
!ifdef __UNINSTALL__
Call un.Service
!else
Call Service
!endif
!macroend
!macro FUNC_GETPARAM
Push $0
Push $1
Push $2
Push $3
Push $4
Push $5
Push $6
Push $7
Exch 8
Pop $1 ;name
Exch 8
Pop $2 ;source
StrCpy $0 ""
StrLen $7 $2
StrCpy $3 0
lbl_loop:
IntCmp $3 $7 0 0 lbl_done
StrLen $4 "$1="
StrCpy $5 $2 $4 $3
StrCmp $5 "$1=" 0 lbl_next
IntOp $5 $3 + $4
StrCpy $3 $5
lbl_loop2:
IntCmp $3 $7 0 0 lbl_done
StrCpy $6 $2 1 $3
StrCmp $6 ";" 0 lbl_next2
IntOp $6 $3 - $5
StrCpy $0 $2 $6 $5
Goto lbl_done
lbl_next2:
IntOp $3 $3 + 1
Goto lbl_loop2
lbl_next:
IntOp $3 $3 + 1
Goto lbl_loop
lbl_done:
Pop $5
Pop $4
Pop $3
Pop $2
Pop $1
Exch 2
Pop $6
Pop $7
Exch $0
!macroend
!macro CALL_GETPARAM VAR NAME DEFAULT LABEL
Push $1
Push ${NAME}
Call ${UN}GETPARAM
Pop $6
StrCpy ${VAR} "${DEFAULT}"
StrCmp $6 "" "${LABEL}" 0
StrCpy ${VAR} $6
!macroend
!macro FUNC_SERVICE UN
Push $0
Push $1
Push $2
Push $3
Push $4
Push $5
Push $6
Push $7
Exch 8
Pop $1 ;param
Exch 8
Pop $2 ;name
Exch 8
Pop $3 ;action
;$0 return
;$4 OpenSCManager
;$5 OpenService
StrCpy $0 "false"
System::Call 'advapi32::OpenSCManager${APITAG}(n, n, i ${SC_MANAGER_ALL_ACCESS}) i.r4'
IntCmp $4 0 lbl_done
StrCmp $3 "create" lbl_create
System::Call 'advapi32::OpenService${APITAG}(i r4, t r2, i ${SERVICE_ALL_ACCESS}) i.r5'
IntCmp $5 0 lbl_done
lbl_select:
StrCmp $3 "delete" lbl_delete
StrCmp $3 "start" lbl_start
StrCmp $3 "stop" lbl_stop
StrCmp $3 "pause" lbl_pause
StrCmp $3 "continue" lbl_continue
StrCmp $3 "installed" lbl_installed
StrCmp $3 "running" lbl_running
StrCmp $3 "status" lbl_status
StrCmp $3 "processid" lbl_processid
Goto lbl_done
; create service
lbl_create:
Push $R1 ;depend
Push $R2 ;user
Push $R3 ;password
Push $R4 ;servicetype/interact
Push $R5 ;starttype/autostart
Push $R6 ;path
Push $R7 ;display
Push $R8 ;description
!insertmacro CALL_GETPARAM $R1 "depend" "n" "lbl_depend"
StrCpy $R1 't "$R1"'
lbl_depend:
StrCmp $R1 "n" 0 lbl_machine ;old name of depend param
!insertmacro CALL_GETPARAM $R1 "machine" "n" "lbl_machine"
StrCpy $R1 't "$R1"'
lbl_machine:
!insertmacro CALL_GETPARAM $R2 "user" "n" "lbl_user"
StrCpy $R2 't "$R2"'
lbl_user:
!insertmacro CALL_GETPARAM $R3 "password" "n" "lbl_password"
StrCpy $R3 't "$R3"'
lbl_password:
!insertmacro CALL_GETPARAM $R4 "interact" "${SERVICE_WIN32_OWN_PROCESS}" "lbl_interact"
StrCpy $6 ${SERVICE_WIN32_OWN_PROCESS}
IntCmp $R4 0 +2
IntOp $6 $6 | ${SERVICE_INTERACTIVE_PROCESS}
StrCpy $R4 $6
lbl_interact:
!insertmacro CALL_GETPARAM $R4 "servicetype" "$R4" "lbl_servicetype"
lbl_servicetype:
!insertmacro CALL_GETPARAM $R5 "autostart" "${SERVICE_DEMAND_START}" "lbl_autostart"
StrCpy $6 ${SERVICE_DEMAND_START}
IntCmp $R5 0 +2
StrCpy $6 ${SERVICE_AUTO_START}
StrCpy $R5 $6
lbl_autostart:
!insertmacro CALL_GETPARAM $R5 "starttype" "$R5" "lbl_starttype"
lbl_starttype:
!insertmacro CALL_GETPARAM $R6 "path" "n" "lbl_path"
lbl_path:
!insertmacro CALL_GETPARAM $R7 "display" "$2" "lbl_display"
lbl_display:
!insertmacro CALL_GETPARAM $R8 "description" "$2" "lbl_description"
lbl_description:
System::Call 'advapi32::CreateService${APITAG}(i r4, t r2, t R7, i ${SERVICE_ALL_ACCESS}, \
i R4, i R5, i 0, t R6, n, n, $R1, $R2, $R3) i.r6'
; write description of service (SERVICE_CONFIG_DESCRIPTION)
System::Call 'advapi32::ChangeServiceConfig2${APITAG}(ir6,i1,*t "$R8")i.R7'
strcmp $R7 "error" 0 lbl_descriptioncomplete
WriteRegStr HKLM "SYSTEM\CurrentControlSet\Services\$2" "Description" $R8
lbl_descriptioncomplete:
Pop $R8
Pop $R7
Pop $R6
Pop $R5
Pop $R4
Pop $R3
Pop $R2
Pop $R1
StrCmp $6 0 lbl_done lbl_good
; delete service
lbl_delete:
System::Call 'advapi32::DeleteService(i r5) i.r6'
StrCmp $6 0 lbl_done lbl_good
; start service
lbl_start:
System::Call 'advapi32::StartService${APITAG}(i r5, i 0, i 0) i.r6'
StrCmp $6 0 lbl_done lbl_good
; stop service
lbl_stop:
Push $R1
System::Call '*(i,i,i,i,i,i,i) i.R1'
System::Call 'advapi32::ControlService(i r5, i ${SERVICE_CONTROL_STOP}, i $R1) i'
System::Free $R1
Pop $R1
StrCmp $6 0 lbl_done lbl_good
; pause service
lbl_pause:
Push $R1
System::Call '*(i,i,i,i,i,i,i) i.R1'
System::Call 'advapi32::ControlService(i r5, i ${SERVICE_CONTROL_PAUSE}, i $R1) i'
System::Free $R1
Pop $R1
StrCmp $6 0 lbl_done lbl_good
; continue service
lbl_continue:
Push $R1
System::Call '*(i,i,i,i,i,i,i) i.R1'
System::Call 'advapi32::ControlService(i r5, i ${SERVICE_CONTROL_CONTINUE}, i $R1) i'
System::Free $R1
Pop $R1
StrCmp $6 0 lbl_done lbl_good
; is installed
lbl_installed:
!insertmacro CALL_GETPARAM $7 "action" "" "lbl_good"
StrCpy $3 $7
Goto lbl_select
; is service running
lbl_running:
Push $R1
System::Call '*(i,i,i,i,i,i,i) i.R1'
System::Call 'advapi32::QueryServiceStatus(i r5, i $R1) i'
System::Call '*$R1(i, i.r6)'
System::Free $R1
Pop $R1
IntFmt $6 "0x%X" $6
StrCmp $6 ${SERVICE_RUNNING} 0 lbl_done
!insertmacro CALL_GETPARAM $7 "action" "" "lbl_good"
StrCpy $3 $7
Goto lbl_select
lbl_status:
Push $R1
System::Call '*(i,i,i,i,i,i,i) i.R1'
System::Call 'advapi32::QueryServiceStatus(i r5, i $R1) i'
System::Call '*$R1(i, i .r6)'
System::Free $R1
Pop $R1
IntFmt $6 "0x%X" $6
StrCpy $0 "running"
IntCmp $6 ${SERVICE_RUNNING} lbl_done
StrCpy $0 "stopped"
IntCmp $6 ${SERVICE_STOPPED} lbl_done
StrCpy $0 "start_pending"
IntCmp $6 ${SERVICE_START_PENDING} lbl_done
StrCpy $0 "stop_pending"
IntCmp $6 ${SERVICE_STOP_PENDING} lbl_done
StrCpy $0 "running"
IntCmp $6 ${SERVICE_RUNNING} lbl_done
StrCpy $0 "continue_pending"
IntCmp $6 ${SERVICE_CONTINUE_PENDING} lbl_done
StrCpy $0 "pause_pending"
IntCmp $6 ${SERVICE_PAUSE_PENDING} lbl_done
StrCpy $0 "paused"
IntCmp $6 ${SERVICE_PAUSED} lbl_done
StrCpy $0 "unknown"
Goto lbl_done
lbl_processid:
Push $R1
Push $R2
System::Call '*(i,i,i,i,i,i,i,i,i) i.R1'
System::Call '*(i 0) i.R2'
System::Call "advapi32::QueryServiceStatusEx(i r5, i ${SC_STATUS_PROCESS_INFO}, i $R1, i 36, i $R2) i"
System::Call "*$R1(i,i,i,i,i,i,i, i .r0)"
System::Free $R2
System::Free $R1
Pop $R2
Pop $R1
Goto lbl_done
lbl_good:
StrCpy $0 "true"
lbl_done:
IntCmp $5 0 +2
System::Call 'advapi32::CloseServiceHandle(i r5) n'
IntCmp $4 0 +2
System::Call 'advapi32::CloseServiceHandle(i r4) n'
Pop $4
Pop $3
Pop $2
Pop $1
Exch 3
Pop $5
Pop $7
Pop $6
Exch $0
!macroend
Function Service
!insertmacro FUNC_SERVICE ""
FunctionEnd
Function un.Service
!insertmacro FUNC_SERVICE "un."
FunctionEnd
Function GetParam
!insertmacro FUNC_GETPARAM
FunctionEnd
Function un.GetParam
!insertmacro FUNC_GETPARAM
FunctionEnd
!undef APITAG
!endif

View file

@ -9,6 +9,7 @@ SetCompressor /SOLID lzma
!include "x64.nsh"
!define MULTIUSER_EXECUTIONLEVEL Admin
!include "MultiUser.nsh"
!include "servicelib.nsh"
!insertmacro GetParameters
!insertmacro GetOptions
@ -130,6 +131,7 @@ again:
Sleep 500
Goto again
done:
!insertmacro SERVICE stop TunSafeService ""
FunctionEnd
Function .onInit
@ -198,6 +200,10 @@ Function un.onInit
FunctionEnd
Section "Uninstall"
!insertmacro SERVICE stop "TunSafeService" ""
!insertmacro SERVICE delete "TunSafeService" ""
Delete "$INSTDIR\TunSafe.exe"
Delete "$INSTDIR\License.txt"
Delete "$INSTDIR\ChangeLog.txt"

97
ip_to_peer_map.cpp Normal file
View file

@ -0,0 +1,97 @@
// SPDX-License-Identifier: AGPL-1.0-only
// Copyright (C) 2018 Ludvig Strigeus <info@tunsafe.com>. All Rights Reserved.
#include "stdafx.h"
#include "ip_to_peer_map.h"
#include "bit_ops.h"
#include <string.h>
IpToPeerMap::IpToPeerMap() {
}
IpToPeerMap::~IpToPeerMap() {
}
bool IpToPeerMap::InsertV4(const void *addr, int cidr, void *peer) {
uint32 mask = cidr == 32 ? 0xffffffff : ~(0xffffffff >> cidr);
Entry4 e = {ReadBE32(addr) & mask, mask, peer};
ipv4_.push_back(e);
return true;
}
bool IpToPeerMap::InsertV6(const void *addr, int cidr, void *peer) {
Entry6 e;
e.cidr_len = cidr;
e.peer = peer;
memcpy(e.ip, addr, 16);
ipv6_.push_back(e);
return true;
}
void *IpToPeerMap::LookupV4(uint32 ip) {
uint32 best_mask = 0;
void *best_peer = NULL;
for (auto it = ipv4_.begin(); it != ipv4_.end(); ++it) {
if (it->ip == (ip & it->mask) && it->mask >= best_mask) {
best_mask = it->mask;
best_peer = it->peer;
}
}
return best_peer;
}
void *IpToPeerMap::LookupV4DefaultPeer() {
for (auto it = ipv4_.begin(); it != ipv4_.end(); ++it) {
if (it->mask == 0)
return it->peer;
}
return NULL;
}
void *IpToPeerMap::LookupV6DefaultPeer() {
for (auto it = ipv6_.begin(); it != ipv6_.end(); ++it) {
if (it->cidr_len == 0)
return it->peer;
}
return NULL;
}
static int CalculateIPv6CommonPrefix(const uint8 *a, const uint8 *b) {
uint64 x = ToBE64(*(uint64*)&a[0] ^ *(uint64*)&b[0]);
uint64 y = ToBE64(*(uint64*)&a[8] ^ *(uint64*)&b[8]);
return x ? 64 - FindHighestSetBit64(x) : 128 - FindHighestSetBit64(y);
}
void *IpToPeerMap::LookupV6(const void *addr) {
int best_len = 0;
void *best_peer = NULL;
for (auto it = ipv6_.begin(); it != ipv6_.end(); ++it) {
int len = CalculateIPv6CommonPrefix((const uint8*)addr, it->ip);
if (len >= it->cidr_len && len >= best_len) {
best_len = len;
best_peer = it->peer;
}
}
return best_peer;
}
void IpToPeerMap::RemovePeer(void *peer) {
{
size_t n = ipv4_.size();
Entry4 *r = &ipv4_[0], *w = r;
for (size_t i = 0; i != n; i++, r++) {
if (r->peer != peer)
*w++ = *r;
}
ipv4_.resize(w - &ipv4_[0]);
}
{
size_t n = ipv6_.size();
Entry6 *r = &ipv6_[0], *w = r;
for (size_t i = 0; i != n; i++, r++) {
if (r->peer != peer)
*w++ = *r;
}
ipv6_.resize(w - &ipv6_[0]);
}
}

41
ip_to_peer_map.h Normal file
View file

@ -0,0 +1,41 @@
// SPDX-License-Identifier: AGPL-1.0-only
// Copyright (C) 2018 Ludvig Strigeus <info@tunsafe.com>. All Rights Reserved.
#pragma once
#include "tunsafe_types.h"
#include <vector>
// Maps CIDR addresses to a peer, always returning the longest match
// Slow O(n) implementation
class IpToPeerMap {
public:
IpToPeerMap();
~IpToPeerMap();
// Inserts an IP address of a given CIDR length into the lookup table, pointing to peer.
bool InsertV4(const void *addr, int cidr, void *peer);
bool InsertV6(const void *addr, int cidr, void *peer);
// Lookup the peer matching the IP Address
void *LookupV4(uint32 ip);
void *LookupV6(const void *addr);
void *LookupV4DefaultPeer();
void *LookupV6DefaultPeer();
// Remove a peer from the table
void RemovePeer(void *peer);
private:
struct Entry4 {
uint32 ip;
uint32 mask;
void *peer;
};
struct Entry6 {
uint8 ip[16];
uint8 cidr_len;
void *peer;
};
std::vector<Entry4> ipv4_;
std::vector<Entry6> ipv6_;
};

View file

@ -1 +1,2 @@
// this is a placeholder for a packet compression algorithm not yet released.
#include "stdafx.h"
// this is a placeholder for a packet compression algorithm not yet released.

1
ipzip2/ipzip2.h Normal file
View file

@ -0,0 +1 @@
// this is a placeholder for a packet compression algorithm not yet released.

View file

@ -121,6 +121,9 @@ public:
// This holds all cidr addresses to add as additional routing entries
std::vector<WgCidrAddr> extra_routes;
// This holds all the ips to exclude
std::vector<WgCidrAddr> excluded_ips;
// This holds the pre/post commands
PrePostCommands pre_post_commands;
};

View file

@ -41,6 +41,11 @@
#include <linux/rtnetlink.h>
#endif
void tunsafe_die(const char *msg) {
fprintf(stderr, "%s\n", msg);
exit(1);
}
void SetThreadName(const char *name) {
#if defined(OS_LINUX)
prctl(PR_SET_NAME, name, 0, 0, 0);
@ -438,11 +443,11 @@ static void ComputeIpv6DefaultRoute(const uint8 *ipv6_address, uint8 ipv6_cidr,
default_route_v6[15] ^= 3;
}
void TunsafeBackendBsd::AddRoute(uint32 ip, uint32 cidr, uint32 gw) {
void TunsafeBackendBsd::AddRoute(uint32 ip, uint32 cidr, uint32 gw, const char *dev) {
uint32 ip_be, gw_be;
WriteBE32(&ip_be, ip);
WriteBE32(&gw_be, gw);
AddRoute(AF_INET, &ip_be, cidr, &gw_be);
AddRoute(AF_INET, &ip_be, cidr, &gw_be, dev);
}
static void AddOrRemoveRoute(const RouteInfo &cd, bool remove) {
@ -452,13 +457,12 @@ static void AddOrRemoveRoute(const RouteInfo &cd, bool remove) {
print_ip_prefix(buf2, cd.family, cd.gw, -1);
#if defined(OS_LINUX)
const char *cmd = remove ? "delete" : "add";
if (cd.family == AF_INET) {
const char *net_or_host = (cd.cidr == 32) ? "-host" : "-net";
RunCommand("/sbin/route %s %s %s gw %s", cmd, net_or_host, buf1, buf2);
const char *cmd = remove ? "del" : "add";
const char *proto = (cd.family == AF_INET) ? NULL : "-6";
if (cd.dev.empty()) {
RunCommand("/sbin/ip %s route %s %s via %s", proto, cmd, buf1, buf2);
} else {
const char *net_or_host = (cd.cidr == 128) ? "-host" : "-net";
RunCommand("/sbin/route %s %s inet6 %s gw %s", cmd, net_or_host, buf1, buf2);
RunCommand("/sbin/ip %s route %s %s dev %s", proto, cmd, buf1, cd.dev.c_str());
}
#elif defined(OS_MACOSX) || defined(OS_FREEBSD)
const char *cmd = remove ? "delete" : "add";
@ -470,9 +474,10 @@ static void AddOrRemoveRoute(const RouteInfo &cd, bool remove) {
#endif
}
bool TunsafeBackendBsd::AddRoute(int family, const void *dest, int dest_prefix, const void *gateway) {
bool TunsafeBackendBsd::AddRoute(int family, const void *dest, int dest_prefix, const void *gateway, const char *dev) {
RouteInfo c;
c.dev = dev ? dev : "";
c.family = family;
size_t len = (family == AF_INET) ? 4 : 16;
memcpy(c.ip, dest, len);
@ -493,7 +498,6 @@ static bool IsIpv6AddressSet(const void *p) {
// Called to initialize tun
bool TunsafeBackendBsd::Initialize(const TunConfig &&config, TunConfigOut *out) override {
char def_iface[12];
char devname[16];
if (!RunPrePostCommand(config.pre_post_commands.pre_up)) {
@ -513,20 +517,24 @@ bool TunsafeBackendBsd::Initialize(const TunConfig &&config, TunConfigOut *out)
uint32 default_route_v4 = ComputeIpv4DefaultRoute(config.ip, netmask);
RunCommand("/sbin/ifconfig %s %A mtu %d %A netmask %A up", devname, config.ip, config.mtu, config.ip, netmask);
AddRoute(config.ip & netmask, config.cidr, config.ip);
AddRoute(config.ip & netmask, config.cidr, config.ip, devname);
if (config.use_ipv4_default_route) {
if (config.default_route_endpoint_v4) {
uint32 gw;
if (!GetDefaultRoute(def_iface, sizeof(def_iface), &gw)) {
uint32 ipv4_default_gw;
char default_iface[16];
if (!GetDefaultRoute(default_iface, sizeof(default_iface), &ipv4_default_gw)) {
RERROR("Unable to determine default interface.");
return false;
}
AddRoute(config.default_route_endpoint_v4, 32, gw);
AddRoute(config.default_route_endpoint_v4, 32, ipv4_default_gw, NULL);
for (auto it = config.excluded_ips.begin(); it != config.excluded_ips.end(); ++it) {
if (it->size == 32)
AddRoute(ReadBE32(it->addr), it->cidr, ipv4_default_gw, default_iface);
}
}
AddRoute(0x00000000, 1, default_route_v4);
AddRoute(0x80000000, 1, default_route_v4);
AddRoute(0x00000000, 1, default_route_v4, devname);
AddRoute(0x80000000, 1, default_route_v4, devname);
}
uint8 default_route_v6[16];
@ -537,23 +545,23 @@ bool TunsafeBackendBsd::Initialize(const TunConfig &&config, TunConfigOut *out)
ComputeIpv6DefaultRoute(config.ipv6_address, config.ipv6_cidr, default_route_v6);
RunCommand("/sbin/ifconfig %s inet6 %s", devname, print_ip_prefix(buf, AF_INET6, config.ipv6_address, config.ipv6_cidr));
RunCommand("/sbin/ifconfig %s inet6 add %s", devname, print_ip_prefix(buf, AF_INET6, config.ipv6_address, config.ipv6_cidr));
if (config.use_ipv6_default_route) {
if (IsIpv6AddressSet(config.default_route_endpoint_v6)) {
RERROR("default_route_endpoint_v6 not supported");
}
AddRoute(AF_INET6, matchall_1_route + 1, 1, default_route_v6);
AddRoute(AF_INET6, matchall_1_route + 0, 1, default_route_v6);
AddRoute(AF_INET6, matchall_1_route + 1, 1, default_route_v6, devname);
AddRoute(AF_INET6, matchall_1_route + 0, 1, default_route_v6, devname);
}
}
// Add all the extra routes
for (auto it = config.extra_routes.begin(); it != config.extra_routes.end(); ++it) {
if (it->size == 32) {
AddRoute(ReadBE32(it->addr), it->cidr, default_route_v4);
AddRoute(ReadBE32(it->addr), it->cidr, default_route_v4, devname);
} else if (it->size == 128 && config.ipv6_cidr) {
AddRoute(AF_INET6, it->addr, it->cidr, default_route_v6);
AddRoute(AF_INET6, it->addr, it->cidr, default_route_v6, devname);
}
}
@ -688,34 +696,38 @@ void InitCpuFeatures();
void Benchmark();
uint32 g_ui_ip;
const char *print_ip(char buf[kSizeOfAddress], in_addr_t ip) {
snprintf(buf, kSizeOfAddress, "%d.%d.%d.%d", (ip >> 24) & 0xff, (ip >> 16) & 0xff, (ip >> 8) & 0xff, (ip >> 0) & 0xff);
return buf;
}
class MyProcessorDelegate : public ProcessorDelegate {
public:
virtual void OnConnected(in_addr_t my_ip) {
if (my_ip != g_ui_ip) {
if (my_ip) {
char buf[kSizeOfAddress];
print_ip(buf, my_ip);
RINFO("Connection established. IP %s", buf);
}
g_ui_ip = my_ip;
MyProcessorDelegate() {
wg_processor_ = NULL;
is_connected_ = false;
}
virtual void OnConnected() override {
if (!is_connected_) {
uint32 ipv4_ip = ReadBE32(wg_processor_->tun_addr().addr);
char buf[kSizeOfAddress];
RINFO("Connection established. IP %s", print_ip(buf, ipv4_ip));
is_connected_ = true;
}
}
virtual void OnDisconnected() {
MyProcessorDelegate::OnConnected(0);
virtual void OnConnectionRetry(uint32 attempts) override {
if (is_connected_ && attempts >= 3) {
is_connected_ = false;
RINFO("Reconnecting...");
}
}
WireguardProcessor *wg_processor_;
bool is_connected_;
};
int main(int argc, char **argv) {
bool exit_flag = false;
InitCpuFeatures();
if (argc == 2 && strcmp(argv[1], "--benchmark") == 0) {
@ -739,9 +751,12 @@ int main(int argc, char **argv) {
MyProcessorDelegate my_procdel;
TunsafeBackendBsd *socket_loop = CreateTunsafeBackendBsd();
WireguardProcessor wg(socket_loop, socket_loop, &my_procdel);
my_procdel.wg_processor_ = &wg;
socket_loop->SetProcessor(&wg);
if (!ParseWireGuardConfigFile(&wg, argv[1], &exit_flag)) return 1;
DnsResolver dns_resolver(NULL);
if (!ParseWireGuardConfigFile(&wg, argv[1], &dns_resolver)) return 1;
if (!wg.Start()) return 1;
socket_loop->RunLoop();

View file

@ -6,12 +6,14 @@
#include "netapi.h"
#include "wireguard.h"
#include "wireguard_config.h"
#include <string>
struct RouteInfo {
uint8 family;
uint8 cidr;
uint8 ip[16];
uint8 gw[16];
std::string dev;
};
class TunsafeBackendBsd : public TunInterface, public UdpInterface {
@ -34,9 +36,9 @@ protected:
virtual bool InitializeTun(char devname[16]) = 0;
virtual void RunLoopInner() = 0;
void AddRoute(uint32 ip, uint32 cidr, uint32 gw);
void AddRoute(uint32 ip, uint32 cidr, uint32 gw, const char *dev);
void DelRoute(const RouteInfo &cd);
bool AddRoute(int family, const void *dest, int dest_prefix, const void *gateway);
bool AddRoute(int family, const void *dest, int dest_prefix, const void *gateway, const char *dev);
bool RunPrePostCommand(const std::vector<std::string> &vec);
WireguardProcessor *processor_;

View file

@ -1,8 +1,11 @@
// SPDX-License-Identifier: AGPL-1.0-only
// Copyright (C) 2018 Ludvig Strigeus <info@tunsafe.com>. All Rights Reserved.
// Note: This is an experimental implementation that doesn't work, there's no way
// for the alarm signal to interrupt the tunsafe main thread.
#include "network_bsd_common.h"
#include "tunsafe_endian.h"
#include "tunsafe_config.h"
#include "tunsafe_threading.h"
#include "util.h"
#include <stdio.h>
@ -91,7 +94,7 @@ private:
bool shutting_down_;
bool got_sig_alarm_;
pthread_mutex_t lock_;
Mutex lock_;
pthread_cond_t cond_;
};
@ -120,7 +123,7 @@ private:
bool shutting_down_;
pthread_mutex_t lock_;
Mutex lock_;
pthread_cond_t cond_;
};
@ -147,7 +150,7 @@ private:
WorkerLoop *worker_;
pthread_t read_tid_, write_tid_;
Packet *queue_, **queue_end_;
pthread_mutex_t lock_;
Mutex lock_;
pthread_cond_t cond_;
};
@ -158,12 +161,11 @@ WorkerLoop::WorkerLoop() {
shutting_down_ = false;
got_sig_alarm_ = false;
processor_ = NULL;
pthread_mutex_init(&lock_, NULL);
pthread_cond_init(&cond_, NULL);
if (pthread_cond_init(&cond_, NULL) != 0)
tunsafe_die("pthread_cond_init failed");
}
WorkerLoop::~WorkerLoop() {
pthread_mutex_destroy(&lock_);
pthread_cond_destroy(&cond_);
}
@ -174,13 +176,14 @@ bool WorkerLoop::Initialize(WireguardProcessor *processor) {
void WorkerLoop::StartThread() {
assert(tid_ == 0);
pthread_create(&tid_, NULL, &ThreadMainStatic, this);
if (pthread_create(&tid_, NULL, &ThreadMainStatic, this) != 0)
tunsafe_die("pthread_create failed");
}
void WorkerLoop::StopThread() {
pthread_mutex_lock(&lock_);
lock_.Acquire();
shutting_down_ = true;
pthread_mutex_unlock(&lock_);
lock_.Release();
if (tid_) {
void *x;
@ -198,16 +201,16 @@ void WorkerLoop::NotifyStop() {
void WorkerLoop::HandlePacket(Packet *packet, int target) {
// RINFO("WorkerLoop::HandlePacket");
packet->post_target = target;
pthread_mutex_lock(&lock_);
lock_.Acquire();
Packet *old_queue = queue_;
*queue_end_ = packet;
queue_end_ = &packet->next;
packet->next = NULL;
if (old_queue == NULL) {
pthread_mutex_unlock(&lock_);
lock_.Release();
pthread_cond_signal(&cond_);
} else {
pthread_mutex_unlock(&lock_);
lock_.Release();
}
}
@ -218,19 +221,19 @@ void *WorkerLoop::ThreadMainStatic(void *x) {
void *WorkerLoop::ThreadMain() {
Packet *packet_queue;
pthread_mutex_lock(&lock_);
lock_.Acquire();
for (;;) {
// Grab the whole list
for (;;) {
while (got_sig_alarm_) {
got_sig_alarm_ = false;
pthread_mutex_unlock(&lock_);
lock_.Release();
processor_->SecondLoop();
pthread_mutex_lock(&lock_);
lock_.Acquire();
}
if (shutting_down_ || queue_ != NULL)
break;
pthread_cond_wait(&cond_, &lock_);
pthread_cond_wait(&cond_, lock_.impl());
}
if (shutting_down_)
break;
@ -238,7 +241,7 @@ void *WorkerLoop::ThreadMain() {
queue_ = NULL;
queue_end_ = &queue_;
pthread_mutex_unlock(&lock_);
lock_.Release();
// And send all items in the list
while (packet_queue != NULL) {
Packet *next = packet_queue->next;
@ -249,9 +252,9 @@ void *WorkerLoop::ThreadMain() {
}
packet_queue = next;
}
pthread_mutex_lock(&lock_);
lock_.Acquire();
}
pthread_mutex_unlock(&lock_);
lock_.Release();
return NULL;
}
@ -265,14 +268,13 @@ UdpLoop::UdpLoop() {
worker_ = NULL;
queue_ = NULL;
queue_end_ = &queue_;
pthread_mutex_init(&lock_, NULL);
pthread_cond_init(&cond_, NULL);
if (pthread_cond_init(&cond_, NULL) != 0)
tunsafe_die("pthread_cond_init failed");
}
UdpLoop::~UdpLoop() {
if (fd_ != -1)
close(fd_);
pthread_mutex_destroy(&lock_);
pthread_cond_destroy(&cond_);
}
@ -286,16 +288,18 @@ bool UdpLoop::Initialize(int listen_port, WorkerLoop *worker) {
}
void UdpLoop::Start() {
pthread_create(&read_tid_, NULL, &ReaderMainStatic, this);
pthread_create(&write_tid_, NULL, &WriterMainStatic, this);
if (pthread_create(&read_tid_, NULL, &ReaderMainStatic, this) != 0)
tunsafe_die("pthread_create failed");
if (pthread_create(&write_tid_, NULL, &WriterMainStatic, this) != 0)
tunsafe_die("pthread_create failed");
}
void UdpLoop::Stop() {
void *x;
pthread_mutex_lock(&lock_);
lock_.Acquire();
shutting_down_ = true;
pthread_mutex_unlock(&lock_);
lock_.Release();
pthread_cond_signal(&cond_);
pthread_kill(read_tid_, SIGUSR1);
@ -345,17 +349,17 @@ void *UdpLoop::ReaderMain() {
void *UdpLoop::WriterMain() {
Packet *queue;
pthread_mutex_lock(&lock_);
lock_.Acquire();
for (;;) {
// Grab the whole list
while (!shutting_down_ && queue_ == NULL)
pthread_cond_wait(&cond_, &lock_);
pthread_cond_wait(&cond_, lock_.impl());
if (shutting_down_)
break;
queue = queue_;
queue_ = NULL;
queue_end_ = &queue_;
pthread_mutex_unlock(&lock_);
lock_.Release();
// And send all items in the list
while (queue != NULL) {
int r = sendto(fd_, queue->data, queue->size, 0,
@ -370,9 +374,9 @@ void *UdpLoop::WriterMain() {
queue = queue->next;
FreePacket(to_free);
}
pthread_mutex_lock(&lock_);
lock_.Acquire();
}
pthread_mutex_unlock(&lock_);
lock_.Release();
return NULL;
}
@ -380,15 +384,15 @@ void UdpLoop::WriteUdpPacket(Packet *packet) {
// RINFO("write udp packet to queue!");
packet->next = NULL;
pthread_mutex_lock(&lock_);
lock_.Acquire();
Packet *old_queue = queue_;
*queue_end_ = packet;
queue_end_ = &packet->next;
if (old_queue == NULL) {
pthread_mutex_unlock(&lock_);
lock_.Release();
pthread_cond_signal(&cond_);
} else {
pthread_mutex_unlock(&lock_);
lock_.Release();
}
}
@ -400,14 +404,13 @@ TunLoop::TunLoop() {
write_tid_ = 0;
queue_ = NULL;
queue_end_ = &queue_;
pthread_mutex_init(&lock_, NULL);
pthread_cond_init(&cond_, NULL);
if (pthread_cond_init(&cond_, NULL) != 0)
tunsafe_die("pthread_cond_init failed");
}
TunLoop::~TunLoop() {
if (fd_ != -1)
close(fd_);
pthread_mutex_destroy(&lock_);
pthread_cond_destroy(&cond_);
}
@ -421,16 +424,18 @@ bool TunLoop::Initialize(char devname[16], WorkerLoop *worker) {
}
void TunLoop::Start() {
pthread_create(&read_tid_, NULL, &ReaderMainStatic, this);
pthread_create(&write_tid_, NULL, &WriterMainStatic, this);
if (pthread_create(&read_tid_, NULL, &ReaderMainStatic, this) != 0)
tunsafe_die("pthread_create failed");
if (pthread_create(&write_tid_, NULL, &WriterMainStatic, this) != 0)
tunsafe_die("pthread_create failed");
}
void TunLoop::Stop() {
void *x;
pthread_mutex_lock(&lock_);
lock_.Acquire();
shutting_down_ = true;
pthread_mutex_unlock(&lock_);
lock_.Release();
pthread_kill(read_tid_, SIGUSR1);
pthread_kill(write_tid_, SIGUSR1);
@ -469,18 +474,18 @@ void *TunLoop::ReaderMain() {
void *TunLoop::WriterMain() {
Packet *queue;
pthread_mutex_lock(&lock_);
lock_.Acquire();
for (;;) {
// Grab the whole list
while (!shutting_down_ && queue_ == NULL) {
pthread_cond_wait(&cond_, &lock_);
pthread_cond_wait(&cond_, lock_.impl());
}
if (shutting_down_)
break;
queue = queue_;
queue_ = NULL;
queue_end_ = &queue_;
pthread_mutex_unlock(&lock_);
lock_.Release();
// And send all items in the list
while (queue != NULL) {
if (TUN_PREFIX_BYTES)
@ -494,24 +499,24 @@ void *TunLoop::WriterMain() {
queue = queue->next;
FreePacket(to_free);
}
pthread_mutex_lock(&lock_);
lock_.Acquire();
}
pthread_mutex_unlock(&lock_);
lock_.Release();
return NULL;
}
void TunLoop::WriteTunPacket(Packet *packet) {
packet->next = NULL;
pthread_mutex_lock(&lock_);
lock_.Acquire();
Packet *old_queue = queue_;
*queue_end_ = packet;
queue_end_ = &packet->next;
if (old_queue == NULL) {
pthread_mutex_unlock(&lock_);
lock_.Release();
pthread_cond_signal(&cond_);
} else {
pthread_mutex_unlock(&lock_);
lock_.Release();
}
}

File diff suppressed because it is too large Load diff

View file

@ -6,14 +6,18 @@
#include "tunsafe_types.h"
#include "netapi.h"
#include "network_win32_api.h"
#include "network_win32_dnsblock.h"
#include "wireguard_config.h"
#include "tunsafe_threading.h"
#include <functional>
struct Packet;
class WireguardProcessor;
class TunsafeBackendWin32;
class ThreadedPacketQueue {
public:
explicit ThreadedPacketQueue(WireguardProcessor *wg, NetworkStats *stats);
explicit ThreadedPacketQueue(WireguardProcessor *wg, TunsafeBackendWin32 *backend);
~ThreadedPacketQueue();
enum {
@ -39,7 +43,7 @@ private:
Packet **last_ptr_;
uint32 packets_in_queue_;
uint32 need_notify_;
CRITICAL_SECTION mutex_;
Mutex mutex_;
HANDLE event_;
HANDLE timer_handle_;
@ -47,7 +51,7 @@ private:
WireguardProcessor *wg_;
bool exit_flag_;
bool timer_interrupt_;
NetworkStats *stats_;
TunsafeBackendWin32 *backend_;
};
// Encapsulates a UDP socket, optionally listening for incoming packets
@ -74,7 +78,7 @@ private:
// All packets queued for writing. Locked by |mutex_|
Packet *wqueue_, **wqueue_end_;
CRITICAL_SECTION mutex_;
Mutex mutex_;
ThreadedPacketQueue *packet_handler_;
SOCKET socket_;
@ -85,22 +89,26 @@ private:
bool exit_thread_;
};
class DnsBlocker;
class TunWin32Adapter {
public:
TunWin32Adapter();
TunWin32Adapter(DnsBlocker *dns_blocker);
~TunWin32Adapter();
bool OpenAdapter(bool *exit_thread, DWORD open_flags);
bool OpenAdapter(unsigned int *exit_thread, DWORD open_flags);
bool InitAdapter(const TunInterface::TunConfig &&config, TunInterface::TunConfigOut *out);
void CloseAdapter();
HANDLE handle() { return handle_; }
void DisassociateDnsBlocker() { dns_blocker_ = NULL; }
private:
bool RunPrePostCommand(const std::vector<std::string> &vec);
HANDLE handle_;
HANDLE current_dns_block_;
DnsBlocker *dns_blocker_;
std::vector<MIB_IPFORWARD_ROW2> routes_to_undo_;
uint8 mac_adress_[6];
@ -113,7 +121,7 @@ private:
// Implementation of TUN interface handling using IO Completion Ports
class TunWin32Iocp : public TunInterface {
public:
explicit TunWin32Iocp();
explicit TunWin32Iocp(DnsBlocker *blocker, TunsafeBackendWin32 *backend);
~TunWin32Iocp();
void SetPacketHandler(ThreadedPacketQueue *packet_handler) { packet_handler_ = packet_handler; }
@ -125,6 +133,8 @@ public:
virtual bool Initialize(const TunConfig &&config, TunConfigOut *out) override;
virtual void WriteTunPacket(Packet *packet) override;
TunWin32Adapter &adapter() { return adapter_; }
private:
void CloseTun();
void ThreadMain();
@ -134,20 +144,21 @@ private:
HANDLE completion_port_handle_;
HANDLE thread_;
CRITICAL_SECTION mutex_;
Mutex mutex_;
bool exit_thread_;
// All packets queued for writing
Packet *wqueue_, **wqueue_end_;
TunsafeBackendWin32 *backend_;
TunWin32Adapter adapter_;
};
// Implementation of TUN interface handling using Overlapped IO
class TunWin32Overlapped : public TunInterface {
public:
explicit TunWin32Overlapped();
explicit TunWin32Overlapped(DnsBlocker *blocker, TunsafeBackendWin32 *backend);
~TunWin32Overlapped();
void SetPacketHandler(ThreadedPacketQueue *packet_handler) { packet_handler_ = packet_handler; }
@ -167,7 +178,7 @@ private:
ThreadedPacketQueue *packet_handler_;
HANDLE thread_;
CRITICAL_SECTION mutex_;
Mutex mutex_;
HANDLE read_event_, write_event_, wake_event_;
@ -176,4 +187,111 @@ private:
Packet *wqueue_, **wqueue_end_;
TunWin32Adapter adapter_;
TunsafeBackendWin32 *backend_;
};
class TunsafeBackendWin32 : public TunsafeBackend, public ProcessorDelegate {
friend class ThreadedPacketQueue;
friend class TunWin32Iocp;
friend class TunWin32Overlapped;
public:
TunsafeBackendWin32(Delegate *delegate);
~TunsafeBackendWin32();
// -- from TunsafeBackend
virtual bool Initialize() override;
virtual void Teardown() override;
virtual void Start(const char *config_file) override;
virtual void Stop() override;
virtual void RequestStats(bool enable) override;
virtual void ResetStats() override;
virtual InternetBlockState GetInternetBlockState(bool *is_activated) override;
virtual void SetInternetBlockState(InternetBlockState s) override;
virtual void SetServiceStartupFlags(uint32 flags) override;
virtual LinearizedGraph *GetGraph(int type) override;
virtual std::string GetConfigFileName() override;
// -- from ProcessorDelegate
virtual void OnConnected() override;
virtual void OnConnectionRetry(uint32 attempts) override;
void SetPublicKey(const uint8 key[32]);
void TunAdapterFailed();
private:
void StopInner(bool is_restart);
static DWORD WINAPI WorkerThread(void *x);
void PushStats();
HANDLE worker_thread_;
enum {
MODE_NONE = 0,
MODE_EXIT = 1,
MODE_RESTART = 2,
MODE_TUN_FAILED = 3,
};
bool want_periodic_stats_;
unsigned int stop_mode_;
Delegate *delegate_;
char *config_file_;
DnsBlocker dns_blocker_;
DnsResolver dns_resolver_;
WireguardProcessor *wg_processor_;
uint32 last_tun_adapter_failed_;
StatsCollector stats_collector_;
Mutex stats_mutex_;
WgProcessorStats stats_;
};
// This class ensures that all callbacks get rescheduled to another thread
class TunsafeBackendDelegateThreaded : public TunsafeBackend::Delegate {
public:
TunsafeBackendDelegateThreaded(TunsafeBackend::Delegate *delegate, const std::function<void(void)> &callback);
~TunsafeBackendDelegateThreaded();
private:
virtual void OnGetStats(const WgProcessorStats &stats);
virtual void OnGraphAvailable();
virtual void OnStateChanged();
virtual void OnClearLog();
virtual void OnLogLine(const char **s);
virtual void OnStatusCode(TunsafeBackend::StatusCode status);
virtual void DoWork();
enum Which {
Id_OnGetStats,
Id_OnStateChanged,
Id_OnClearLog,
Id_OnLogLine,
Id_OnUpdateUI,
Id_OnStatusCode,
Id_OnGraphAvailable,
};
void AddEntry(Which which, intptr_t lparam = 0, uint32 wparam = 0);
TunsafeBackend::Delegate *delegate_;
std::function<void(void)> callback_;
struct Entry {
uint8 which;
uint32 wparam;
intptr_t lparam;
Entry(uint8 which, uint32 wparam, intptr_t lparam) : which(which), wparam(wparam), lparam(lparam) {}
};
static void FreeEntry(Entry *e);
Mutex mutex_;
std::vector<Entry> incoming_entry_;
std::vector<Entry> processing_entry_;
};

View file

@ -6,44 +6,115 @@
#include "tunsafe_types.h"
#include "wireguard.h"
struct NetworkStats {
bool reset_stats;
CRITICAL_SECTION mutex;
ProcessorStats packet_stats;
};
#include <functional>
class TunsafeBackendWin32 {
struct StatsCollector {
public:
TunsafeBackendWin32();
~TunsafeBackendWin32();
void Start(ProcessorDelegate *procdel, const char *config_file);
void Stop();
ProcessorStats GetStats();
void ResetStats() { stats_.reset_stats = true; }
bool is_started() const { return worker_thread_ != NULL; }
enum {
CHANNELS = 2,
TIMEVALS = 4,
};
StatsCollector() { Init(); }
void AddSamples(float data[CHANNELS]);
struct TimeSeries {
float *data;
int size;
int shift;
};
const TimeSeries *GetTimeSeries(int channel, int timeval) { return &accum_[channel][timeval].data; }
private:
static DWORD WINAPI WorkerThread(void *x);
NetworkStats stats_;
HANDLE worker_thread_;
bool exit_flag_;
ProcessorDelegate *procdel_;
char *config_file_;
struct Accumulator {
float acc;
int acc_count;
int acc_max;
bool dirty;
TimeSeries data;
};
void Init();
static void AddToGraphDataSource(StatsCollector::TimeSeries *ts, float value);
static void AddToAccumulators(StatsCollector::Accumulator *acc, float rval);
Accumulator accum_[CHANNELS][TIMEVALS];
};
struct LinearizedGraph {
uint32 total_size;
uint32 graph_type;
uint8 num_charts;
uint8 reserved[7];
};
class TunsafeBackend {
public:
// All codes < 0 are permanent errors
enum StatusCode {
kStatusStopped = 0,
kStatusInitializing = 1,
kStatusConnecting = 2,
kStatusConnected = 3,
kStatusReconnecting = 4,
kStatusTunRetrying = 10,
InternetBlockState GetInternetBlockState(bool *is_activated);
kErrorInitialize = -1,
kErrorTunPermanent = -2,
kErrorServiceLost = -3,
};
// Returns if reconnect is needed
void SetInternetBlockState(InternetBlockState s);
static bool IsPermanentError(StatusCode status) {
return (int32)status < 0;
}
class Delegate {
public:
virtual ~Delegate();
virtual void OnGetStats(const WgProcessorStats &stats) = 0;
virtual void OnGraphAvailable() = 0;
virtual void OnStateChanged() = 0;
virtual void OnClearLog() = 0;
virtual void OnLogLine(const char **s) = 0;
virtual void OnStatusCode(TunsafeBackend::StatusCode status) = 0;
// This function is needed for CreateTunsafeBackendDelegateThreaded,
// It's expected to be called on the main thread and then all callbacks will arrive
// on the right thread.
virtual void DoWork();
};
TunsafeBackend();
virtual ~TunsafeBackend();
// Setup/teardown the connection to the local service (if any)
virtual bool Initialize() = 0;
virtual void Teardown() = 0;
virtual void Start(const char *config_file) = 0;
virtual void Stop() = 0;
virtual void RequestStats(bool enable) = 0;
virtual void ResetStats() = 0;
virtual InternetBlockState GetInternetBlockState(bool *is_activated) = 0;
virtual void SetInternetBlockState(InternetBlockState s) = 0;
virtual void SetServiceStartupFlags(uint32 flags) = 0;
virtual std::string GetConfigFileName() = 0;
virtual LinearizedGraph *GetGraph(int type) = 0;
bool is_started() { return is_started_; }
bool is_remote() { return is_remote_; }
const uint8 *public_key() { return public_key_; }
StatusCode status() { return status_; }
uint32 GetIP() { return ipv4_ip_; }
protected:
bool is_started_;
bool is_remote_;
StatusCode status_;
uint32 ipv4_ip_;
uint8 public_key_[32];
};
TunsafeBackend *CreateNativeTunsafeBackend(TunsafeBackend::Delegate *delegate);
TunsafeBackend::Delegate *CreateTunsafeBackendDelegateThreaded(TunsafeBackend::Delegate *delegate, const std::function<void(void)> &callback);
extern int tpq_last_qsize;
extern int g_tun_reads, g_tun_writes;

View file

@ -5,6 +5,7 @@
#include "network_win32_dnsblock.h"
#include <fwpmu.h>
#include <fwpmtypes.h>
#include <string.h>
#pragma comment (lib, "Fwpuclnt.lib")
@ -43,11 +44,19 @@ static inline bool FwpmFilterAddCheckedAleConnect(HANDLE handle, FWPM_FILTER0 *f
return false;
}
}
return true;
}
HANDLE BlockDnsExceptOnAdapter(const NET_LUID &luid, bool also_ipv6) {
DnsBlocker::DnsBlocker() {
also_ipv6_ = false;
handle_ = NULL;
}
DnsBlocker::~DnsBlocker() {
RestoreDns();
}
bool DnsBlocker::BlockDnsExceptOnAdapter(const NET_LUID &luid, bool also_ipv6) {
FWPM_SUBLAYER0 *sublayer = NULL;
FWP_BYTE_BLOB *fwp_appid = NULL;
@ -56,6 +65,14 @@ HANDLE BlockDnsExceptOnAdapter(const NET_LUID &luid, bool also_ipv6) {
DWORD err;
HANDLE handle = NULL;
// Check if it already matches
if (handle_ != NULL) {
if (memcmp(&luid, &luid_, sizeof(luid)) == 0 && also_ipv6_)
return true;
FwpmEngineClose0(handle_);
handle_ = NULL;
}
{
FWPM_SESSION0 session = {0};
session.flags = FWPM_SESSION_FLAG_DYNAMIC;
@ -69,7 +86,7 @@ HANDLE BlockDnsExceptOnAdapter(const NET_LUID &luid, bool also_ipv6) {
{
FWPM_SUBLAYER0 sublayer = {0};
sublayer.subLayerKey = TUNSAFE_DNS_SUBLAYER;
sublayer.displayData.name = L"TunSafe";
sublayer.displayData.name = L"TunSafe DNS Block";
sublayer.weight = 0x100;
err = FwpmSubLayerAdd0(handle, &sublayer, NULL);
if (err != 0) {
@ -96,7 +113,7 @@ HANDLE BlockDnsExceptOnAdapter(const NET_LUID &luid, bool also_ipv6) {
filter.filterCondition = filter_condition;
filter.numFilterConditions = 2;
filter.subLayerKey = TUNSAFE_DNS_SUBLAYER;
filter.displayData.name = L"TunSafe";
filter.displayData.name = L"TunSafe DNS Block";
filter.weight.type = FWP_UINT8;
filter.weight.uint8 = 15;
filter.action.type = FWP_ACTION_PERMIT;
@ -127,15 +144,21 @@ getout:
success:
if (fwp_appid)
FwpmFreeMemory0((void **)&fwp_appid);
return handle;
handle_ = handle;
also_ipv6_ = also_ipv6;
luid_ = luid;
return handle != NULL;
}
void RestoreDnsExceptOnAdapter(HANDLE h) {
if (h)
void DnsBlocker::RestoreDns() {
HANDLE h = handle_;
if (h) {
handle_ = NULL;
FwpmEngineClose0(h);
}
}
static bool RemovePersistentInternetBlockingInner(HANDLE handle) {
FWPM_FILTER_ENUM_TEMPLATE0 enum_template = {0};
HANDLE enum_handle = NULL;
@ -337,6 +360,10 @@ getout:
return false;
}
void ClearInternetFwBlockingStateCache() {
internet_fw_blocking_state = 0;
}
uint8 GetInternetFwBlockingState() {
if (internet_fw_blocking_state != 0)
return internet_fw_blocking_state;

View file

@ -2,13 +2,25 @@
// Copyright (C) 2018 Ludvig Strigeus <info@tunsafe.com>. All Rights Reserved.
#pragma once
HANDLE BlockDnsExceptOnAdapter(const NET_LUID &luid, bool also_ipv6 );
void RestoreDnsExceptOnAdapter(HANDLE h);
class DnsBlocker {
public:
DnsBlocker();
~DnsBlocker();
bool BlockDnsExceptOnAdapter(const NET_LUID &luid, bool also_ipv6);
void RestoreDns();
bool IsActive() { return handle_ != NULL; }
// Current state
NET_LUID luid_;
HANDLE handle_;
bool also_ipv6_;
};
bool AddPersistentInternetBlocking(const NET_LUID *default_interface, const NET_LUID &luid_to_allow, bool also_ipv6);
enum {
IBS_UNKOWN,
IBS_INACTIVE,
@ -18,3 +30,4 @@ enum {
void SetInternetFwBlockingState(bool want);
uint8 GetInternetFwBlockingState();
void ClearInternetFwBlockingStateCache();

Binary file not shown.

1179
service_win32.cpp Normal file

File diff suppressed because it is too large Load diff

171
service_win32.h Normal file
View file

@ -0,0 +1,171 @@
// SPDX-License-Identifier: AGPL-1.0-only
// Copyright (C) 2018 Ludvig Strigeus <info@tunsafe.com>. All Rights Reserved.
#pragma once
#include "service_win32_api.h"
#include <strsafe.h>
#include "util.h"
#include "network_win32_api.h"
#include "tunsafe_threading.h"
#include <algorithm>
#include <string>
#include <assert.h>
struct ServiceState {
uint8 is_started : 1;
uint8 internet_block_state_active : 1;
uint8 internet_block_state;
uint8 reserved[26+64];
uint32 ipv4_ip;
uint8 public_key[32];
};
STATIC_ASSERT(sizeof(ServiceState) == 128, ServiceState_wrong_size);
class PipeMessageHandler {
public:
class Delegate {
public:
virtual bool HandleMessage(int type, uint8 *data, size_t size) = 0;
virtual bool HandleNotify() = 0;
virtual bool HandleNewConnection() = 0;
virtual void HandleDisconnect() = 0;
};
PipeMessageHandler(const char *pipe_name, bool is_server_pipe, Delegate *delegate);
~PipeMessageHandler();
bool StartThread();
void StopThread();
bool WritePacket(int type, const uint8 *data, size_t data_size);
HANDLE notify_handle() { return wait_handles_[1]; }
HANDLE pipe_handle() { return pipe_; }
bool VerifyThread();
void FlushWrites(int delay);
bool is_connected() { return connection_established_; }
private:
bool InitializeServerPipe();
bool InitializeClientPipe();
void ClosePipe();
DWORD ThreadMain();
void SendNextQueuedWrite();
uint8 *ReadNamedPipeAsync(size_t *packet_size);
bool ConnectNamedPipeAsync();
bool WaitAndHandleWrites(int delay);
static DWORD WINAPI StaticThreadMain(void *x);
Delegate *delegate_;
HANDLE pipe_;
HANDLE thread_;
HANDLE wait_handles_[3];
OVERLAPPED write_overlapped_;
bool write_overlapped_active_;
bool exit_;
bool is_server_pipe_;
bool connection_established_;
char *pipe_name_;
struct OutgoingPacket {
OutgoingPacket *next;
uint32 size;
uint8 data[0];
};
OutgoingPacket *packets_, **packets_end_;
Mutex packets_mutex_;
DWORD thread_id_;
};
class TunsafeServiceImpl : public TunsafeBackend::Delegate, public PipeMessageHandler::Delegate {
public:
TunsafeServiceImpl();
virtual ~TunsafeServiceImpl();
// -- from TunsafeBackend::Delegate
virtual void OnGetStats(const WgProcessorStats &stats);
virtual void OnClearLog();
virtual void OnLogLine(const char **s);
virtual void OnStateChanged();
virtual void OnStatusCode(TunsafeBackend::StatusCode status);
virtual void OnGraphAvailable();
// -- from PipeMessageHandler::Delegate
virtual bool HandleMessage(int type, uint8 *data, size_t size);
virtual bool HandleNotify();
virtual bool HandleNewConnection();
virtual void HandleDisconnect();
// virtual methods
virtual unsigned OnStart(int argc, wchar_t **argv);
virtual void OnStop();
virtual void OnShutdown();
TunsafeBackend::Delegate *delegate() { return thread_delegate_; }
private:
void SendQueuedLogLines();
bool AuthenticateUser();
bool did_send_getstate_;
bool did_authenticate_user_;
uint32 want_graph_type_;
HKEY hkey_;
TunsafeBackend *backend_;
TunsafeBackend::Delegate *thread_delegate_;
PipeMessageHandler message_handler_;
uint32 historical_log_lines_pos_;
uint32 historical_log_lines_count_;
uint32 last_line_sent_;
std::string current_filename_;
enum {
LOGLINE_COUNT = 256
};
char *historical_log_lines_[LOGLINE_COUNT];
};
class TunsafeServiceClient : public TunsafeBackend, public PipeMessageHandler::Delegate {
public:
TunsafeServiceClient(TunsafeBackend::Delegate *delegate);
virtual ~TunsafeServiceClient();
virtual bool Initialize();
virtual void Teardown();
virtual void Start(const char *config_file);
virtual void Stop();
virtual void RequestStats(bool enable);
virtual void ResetStats();
virtual InternetBlockState GetInternetBlockState(bool *is_activated);
virtual void SetInternetBlockState(InternetBlockState s);
virtual std::string GetConfigFileName();
virtual void SetServiceStartupFlags(uint32 flags);
virtual LinearizedGraph *GetGraph(int type);
// -- from PipeMessageHandler::Delegate
virtual bool HandleMessage(int type, uint8 *data, size_t size);
virtual bool HandleNotify();
virtual bool HandleNewConnection();
virtual void HandleDisconnect();
protected:
TunsafeBackend::Delegate *delegate_;
uint8 want_stats_;
bool got_state_from_control_;
ServiceState service_state_;
std::string config_file_;
PipeMessageHandler message_handler_;
LinearizedGraph *cached_graph_;
uint32 last_graph_type_;
Mutex mutex_;
};

24
service_win32_api.h Normal file
View file

@ -0,0 +1,24 @@
// SPDX-License-Identifier: AGPL-1.0-only
// Copyright (C) 2018 Ludvig Strigeus <info@tunsafe.com>. All Rights Reserved.
#pragma once
#include "network_win32_api.h"
enum StartupFlags {
kStartupFlag_ForegroundService = 1,
kStartupFlag_BackgroundService = 2,
kStartupFlag_ConnectWhenWindowsStarts = 4,
kStartupFlag_MinimizeToTrayWhenWindowsStarts = 8,
};
BOOL RunProcessAsTunsafeServiceProcess();
void StopTunsafeService();
bool IsTunSafeServiceInstalled();
bool IsTunsafeServiceRunning();
void InstallTunSafeWindowsService();
bool UninstallTunSafeWindowsService();
TunsafeBackend *CreateTunsafeServiceClient(TunsafeBackend::Delegate *delegate);

View file

@ -21,7 +21,7 @@
#include <ws2ipdef.h>
#include <iphlpapi.h>
#include <mstcpip.h>
#include <Windowsx.h>
#include <tchar.h>
#else
@ -31,3 +31,5 @@
#include <stdio.h>
#include <stddef.h>
#undef min

View file

@ -1,6 +1,8 @@
// SPDX-License-Identifier: AGPL-1.0-only
// Copyright (C) 2018 Ludvig Strigeus <info@tunsafe.com>. All Rights Reserved.
#pragma once
#define TUNSAFE_VERSION_STRING "TunSafe 1.3-rc3"
#define TUNSAFE_VERSION_STRING "TunSafe 1.4-rc1"
#define WITH_HANDSHAKE_EXT 0
#define WITH_SHORT_HEADERS 0

View file

@ -10,6 +10,16 @@
#include <string.h>
static char *strcpy_e(char *dst, char *end, const char *copy) {
size_t len = strlen(copy);
if (len >= (size_t)(end - dst)) return end;
memcpy(dst, copy, len + 1);
return dst + len;
}
#if defined(ARCH_CPU_X86_FAMILY)
uint32 x86_pcap[3];
#if !defined(COMPILER_MSVC)
@ -22,6 +32,7 @@ static inline void __cpuid(int info[4], int func) {
}
#endif
void InitCpuFeatures() {
unsigned nIds, nExIds;
@ -45,13 +56,6 @@ void InitCpuFeatures() {
}
}
static char *strcpy_e(char *dst, char *end, const char *copy) {
size_t len = strlen(copy);
if (len >= (size_t)(end - dst)) return end;
memcpy(dst, copy, len + 1);
return dst + len;
}
void PrintCpuFeatures() {
char capbuf[2048], *end = capbuf + 2048, *s = capbuf;
@ -66,3 +70,22 @@ void PrintCpuFeatures() {
RINFO("Using:%s", capbuf);
}
#endif // defined(ARCH_CPU_X86_FAMILY)
#if defined(ARCH_CPU_ARM_FAMILY)
uint32 arm_pcap[1];
void InitCpuFeatures() {
arm_pcap[0] = 0xffffffff;
}
void PrintCpuFeatures() {
char capbuf[2048], *end = capbuf + 2048, *s = capbuf;
if (ARM_PCAP_NEON) s = strcpy_e(s, end, " neon");
RINFO("Using:%s", capbuf);
}
#endif // defined(ARCH_CPU_ARM_FAMILY)

View file

@ -5,6 +5,9 @@
#include "tunsafe_types.h"
#if defined(ARCH_CPU_X86_FAMILY)
extern uint32 x86_pcap[3];
// cpuid 1, edx
@ -22,8 +25,19 @@ extern uint32 x86_pcap[3];
#define X86_PCAP_AVX512F (x86_pcap[2] & (1 << 16))
#define X86_PCAP_AVX512VL (x86_pcap[2] & (1 << 31))
#endif // defined(ARCH_CPU_X86_FAMILY)
#if defined(ARCH_CPU_ARM_FAMILY)
extern uint32 arm_pcap[1];
#define ARM_PCAP_NEON (arm_pcap[0] & (1 << 0))
#endif // defined(ARCH_CPU_ARM_FAMILY)
void InitCpuFeatures();
void PrintCpuFeatures();
#endif // TUNSAFE_CPU_H_
#endif // TUNSAFE_CPU_H_

View file

@ -70,6 +70,7 @@
#define ReadBE32Aligned(pt) ToBE32(*(uint32*)(pt))
#define WriteBE32Aligned(ct, st) (*(uint32*)(ct) = ToBE32(st))
// todo: these need to support unaligned pointers
#define ReadBE16(pt) ToBE16(*(uint16*)(pt))
#define WriteBE16(ct, st) (*(uint16*)(ct) = ToBE16(st))
#define ReadBE32(pt) ToBE32(*(uint32*)(pt))

57
tunsafe_threading.cpp Normal file
View file

@ -0,0 +1,57 @@
// SPDX-License-Identifier: AGPL-1.0-only
// Copyright (C) 2018 Ludvig Strigeus <info@tunsafe.com>. All Rights Reserved.
#include "stdafx.h"
#include "tunsafe_threading.h"
#include <stdlib.h>
MultithreadedDelayedDelete::MultithreadedDelayedDelete() {
table_ = NULL;
num_threads_ = 0;
}
MultithreadedDelayedDelete::~MultithreadedDelayedDelete() {
free(table_);
}
void MultithreadedDelayedDelete::Initialize(uint32 num_threads) {
num_threads_ = num_threads;
table_ = (CheckpointData*)calloc(sizeof(CheckpointData), num_threads);
}
void MultithreadedDelayedDelete::Add(DoDeleteFunc *func, void *param) {
if (num_threads_ == 0) {
func(param);
return;
}
lock_.Acquire();
Entry e = {func, param};
curr_.push_back(e);
lock_.Release();
}
void MultithreadedDelayedDelete::Checkpoint(uint32 thread_id) {
table_[thread_id].value.store(1);
}
void MultithreadedDelayedDelete::MainCheckpoint() {
// Wait for all threads to signal that they reached the checkpoint
for (size_t i = 0; i < num_threads_; i++) {
if (table_[i].value.load() == 0)
return;
}
// All threads reached the checkpoint, clear the values
for (size_t i = 0; i < num_threads_; i++)
table_[i].value.store(0);
// Swap curr and next, and delete all nexts.
lock_.Acquire();
std::swap(curr_, next_);
std::swap(curr_, to_delete_);
lock_.Release();
for (auto it = to_delete_.begin(); it != to_delete_.end(); ++it) {
it->func(it->param);
}
to_delete_.clear();
}

175
tunsafe_threading.h Normal file
View file

@ -0,0 +1,175 @@
// SPDX-License-Identifier: AGPL-1.0-only
// Copyright (C) 2018 Ludvig Strigeus <info@tunsafe.com>. All Rights Reserved.
#pragma once
#include "tunsafe_types.h"
#include <atomic>
#include <vector>
#include <assert.h>
#if !defined(OS_WIN)
#include <pthread.h>
#endif // !defined(OS_WIN)
#if defined(OS_WIN)
class ReaderWriterLock {
public:
ReaderWriterLock() : lock_(SRWLOCK_INIT) {}
void AcquireExclusive() { AcquireSRWLockExclusive(&lock_); }
void AcquireShared() { AcquireSRWLockShared(&lock_); }
void ReleaseExclusive() { ReleaseSRWLockExclusive(&lock_); }
void ReleaseShared() { ReleaseSRWLockShared(&lock_); }
private:
SRWLOCK lock_;
};
class Mutex {
public:
#if defined(_DEBUG)
bool locked_;
bool IsLocked() { return locked_; }
#define Mutex_SETLOCKED(x) locked_ = x;
#else
bool IsLocked() { return false; }
#define Mutex_SETLOCKED(x)
#endif
Mutex() : lock_(SRWLOCK_INIT) { Mutex_SETLOCKED(false); }
~Mutex() { }
void Acquire() {
AcquireSRWLockExclusive(&lock_);
Mutex_SETLOCKED(true);
}
void Release() {
Mutex_SETLOCKED(false);
ReleaseSRWLockExclusive(&lock_);
}
private:
SRWLOCK lock_;
};
typedef uint32 ThreadId;
static inline bool CurrentThreadIdEquals(ThreadId thread_id) {
return thread_id == GetCurrentThreadId();
}
#else // defined(OS_WIN)
class ReaderWriterLock {
public:
ReaderWriterLock() {
if (pthread_rwlock_init(&lock_, NULL) != 0)
tunsafe_die("pthread_rwlock_init failed");
}
~ReaderWriterLock() {
pthread_rwlock_destroy(&lock_);
}
void AcquireExclusive() { int rv = pthread_rwlock_wrlock(&lock_); assert(rv == 0); }
void AcquireShared() { int rv = pthread_rwlock_rdlock(&lock_); assert(rv == 0); }
void ReleaseExclusive() { int rv = pthread_rwlock_unlock(&lock_); assert(rv == 0); }
void ReleaseShared() { int rv = pthread_rwlock_unlock(&lock_); assert(rv == 0); }
private:
pthread_rwlock_t lock_;
};
class Mutex {
public:
#if defined(_DEBUG)
bool locked_;
bool IsLocked() { return locked_; }
#define Mutex_SETLOCKED(x) locked_ = x;
#else
bool IsLocked() { return false; }
#define Mutex_SETLOCKED(x)
#endif
Mutex() {
if (pthread_mutex_init(&lock_, NULL) != 0)
tunsafe_die("pthread_mutex_init failed");
Mutex_SETLOCKED(false);
}
~Mutex() {
pthread_mutex_destroy(&lock_);
}
void Acquire() {
int rv = pthread_mutex_lock(&lock_);
assert(rv == 0);
Mutex_SETLOCKED(true);
}
void Release() {
Mutex_SETLOCKED(false);
int rv = pthread_mutex_unlock(&lock_);
assert(rv == 0);
}
pthread_mutex_t *impl() { return &lock_; }
private:
pthread_mutex_t lock_;
};
typedef pthread_t ThreadId;
static inline bool CurrentThreadIdEquals(ThreadId thread_id) {
return pthread_equal(thread_id, pthread_self()) != 0;
}
static inline ThreadId GetCurrentThreadId() {
return pthread_self();
}
#endif // !defined(OS_WIN)
class ScopedLockShared {
public:
ScopedLockShared(ReaderWriterLock *lock) : lock_(lock) { lock->AcquireShared(); }
~ScopedLockShared() { lock_->ReleaseShared(); }
private:
ReaderWriterLock *lock_;
};
class ScopedLockExclusive {
public:
ScopedLockExclusive(ReaderWriterLock *lock) : lock_(lock) { lock->AcquireExclusive(); }
~ScopedLockExclusive() { lock_->ReleaseExclusive(); }
private:
ReaderWriterLock *lock_;
};
class ScopedLock {
public:
ScopedLock(Mutex *lock) : lock_(lock) { lock->Acquire(); }
~ScopedLock() { lock_->Release(); }
private:
Mutex *lock_;
};
// This class deletes objects delayed. All participating threads will call a function,
// and then once all threads did, all registered objects will get deleted.
class MultithreadedDelayedDelete {
public:
MultithreadedDelayedDelete();
~MultithreadedDelayedDelete();
typedef void DoDeleteFunc(void *x);
void Add(DoDeleteFunc *func, void *param);
void Initialize(uint32 num_threads);
void Checkpoint(uint32 thread_id);
void MainCheckpoint();
private:
struct Entry {
DoDeleteFunc *func;
void *param;
};
struct CheckpointData {
std::atomic<uint32> value;
uint8 align[60];
};
uint32 num_threads_;
std::vector<Entry> curr_, next_, to_delete_;
CheckpointData *table_;
Mutex lock_;
};

View file

@ -68,6 +68,6 @@ static inline uint32 rol32(uint32 x, int8_t r) {
void RERROR(const char *msg, ...);
void RINFO(const char *msg, ...);
void tunsafe_die(const char *msg);
#endif // TINYVPN_TYPES_H_

File diff suppressed because it is too large Load diff

View file

@ -17,6 +17,7 @@
#include <arpa/inet.h>
#endif
#include <algorithm>
#include "tunsafe_types.h"
static char base64_alphabet[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
@ -133,6 +134,7 @@ int RunCommand(const char *fmt, ...) {
char *args[33];
char *envp[1] = {NULL};
int nargs = 0;
bool didadd = false;
va_start(va, fmt);
for (;;) {
c = *fmt++;
@ -140,13 +142,14 @@ int RunCommand(const char *fmt, ...) {
c = *fmt++;
if (c == 0) goto ZERO;
if (c == 's') {
tmp += va_arg(va, char*);
char *arg = va_arg(va, char*);
if (arg != NULL) {
tmp += arg;
didadd = true;
}
} else if (c == 'd') {
snprintf(buf, 32, "%d", va_arg(va, int));
tmp += buf;
} else if (c == 'u') {
snprintf(buf, 32, "%u", va_arg(va, int));
tmp += buf;
} else if (c == '%') {
tmp += '%';
} else if (c == 'A') {
@ -156,9 +159,12 @@ int RunCommand(const char *fmt, ...) {
}
} else if (c == ' ' || c == 0) {
ZERO:
args[nargs++] = _strdup(tmp.c_str());
tmp.clear();
if (nargs == 32 || c == 0) break;
if (!tmp.empty() || didadd) {
args[nargs++] = _strdup(tmp.c_str());
tmp.clear();
if (nargs == 32 || c == 0) break;
}
didadd = false;
} else {
tmp += c;
}
@ -187,7 +193,7 @@ ZERO:
#endif
if (ret != 0)
RERROR("Command %s failed %d!", fmt_org, ret);
RERROR("Command failed %d!", ret);
return ret;
}
@ -265,3 +271,29 @@ void RINFO(const char *msg, ...) {
fputs("\n", stderr);
}
}
void *memdup(const void *p, size_t size) {
void *x = malloc(size);
if (x)
memcpy(x, p, size);
return x;
}
char *my_strndup(const char *p, size_t size) {
char *x = (char*)malloc(size + 1);
if (x) {
x[size] = 0;
memcpy(x, p, size);
}
return x;
}
size_t my_strlcpy(char *dst, size_t dstsize, const char *src) {
size_t len = strlen(src);
if (dstsize) {
size_t lenx = std::min<size_t>(dstsize - 1, len);
dst[lenx] = 0;
memcpy(dst, src, lenx);
}
return len;
}

11
util.h
View file

@ -12,3 +12,14 @@ typedef void Logger(const char *msg);
extern Logger *g_logger;
void *memdup(const void *p, size_t size);
char *my_strndup(const char *p, size_t size);
size_t my_strlcpy(char *dst, size_t dstsize, const char *src);
template<typename T, typename U> static inline T postinc(T&x, U v) {
T t = x;
x += v;
return t;
}

378
util_win32.cpp Normal file
View file

@ -0,0 +1,378 @@
// SPDX-License-Identifier: AGPL-1.0-only
// Copyright (C) 2018 Ludvig Strigeus <info@tunsafe.com>. All Rights Reserved.
#include "stdafx.h"
#include "util_win32.h"
#include <stdlib.h>
#include <string.h>
#include <shldisp.h>
#include <shlobj.h>
#include <exdisp.h>
#include <atlbase.h>
const char *FindFilenameComponent(const char *s) {
size_t len = strlen(s);
for (;;) {
if (len == 0)
return "";
len--;
if (s[len] == '\\' || s[len] == '/')
break;
}
return s + len + 1;
}
void str_set(char **x, const char *s) {
free(*x);
*x = _strdup(s);
}
char *str_cat_alloc(const char * const *a, size_t n) {
if (n > 32) return NULL;
size_t len[32], totlen = 0;
for (size_t i = 0; i < n; i++) {
len[i] = strlen(a[i]);
totlen += len[i];
}
char *r = (char *)malloc(totlen + 1);
totlen = 0;
for (size_t i = 0; i < n; i++) {
size_t n = len[i];
memcpy(r + totlen, a[i], n);
totlen += n;
}
r[totlen] = 0;
return r;
}
char *str_cat_alloc(const char *a, const char *b) {
const char * x[2] = {a, b};
return str_cat_alloc(x, 2);
}
char *str_cat_alloc(const char *a, const char *b, const char *c) {
const char * x[3] = {a, b, c};
return str_cat_alloc(x, 3);
}
int RegReadInt(HKEY hkey, const char *key, int def) {
DWORD value = def, n = sizeof(value);
RegQueryValueEx(hkey, key, NULL, NULL, (BYTE*)&value, &n);
return value;
}
void RegWriteInt(HKEY hkey, const char *key, int value) {
RegSetValueEx(hkey, key, NULL, REG_DWORD, (BYTE*)&value, sizeof(value));
}
char *RegReadStr(HKEY hkey, const char *key, const char *def) {
char buf[1024];
DWORD n = sizeof(buf) - 1;
DWORD type = 0;
if (RegQueryValueEx(hkey, key, NULL, &type, (BYTE*)buf, &n) != ERROR_SUCCESS || type != REG_SZ)
return def ? _strdup(def) : NULL;
if (n && buf[n - 1] == 0)
n--;
buf[n] = 0;
return _strdup(buf);
}
void RegWriteStr(HKEY hkey, const char *key, const char *v) {
RegSetValueEx(hkey, key, NULL, REG_SZ, (BYTE*)v, (DWORD)strlen(v) + 1);
}
bool GetProcessElevationType(TOKEN_ELEVATION_TYPE *pOutElevationType) {
*pOutElevationType = TokenElevationTypeDefault;
bool fResult = false;
HANDLE hProcToken = NULL;
if (::OpenProcessToken(::GetCurrentProcess(), TOKEN_QUERY, &hProcToken)) {
DWORD dwSize = 0;
TOKEN_ELEVATION_TYPE elevationType = TokenElevationTypeDefault;
if (::GetTokenInformation(hProcToken, TokenElevationType, &elevationType, sizeof(elevationType), &dwSize)
&& dwSize == sizeof(elevationType)) {
*pOutElevationType = elevationType;
fResult = true;
}
::CloseHandle(hProcToken);
}
return fResult;
}
/*++
Routine Description: This routine returns TRUE if the caller's
process is a member of the Administrators local group. Caller is NOT
expected to be impersonating anyone and is expected to be able to
open its own process and process token.
Arguments: None.
Return Value:
TRUE - Caller has Administrators local group.
FALSE - Caller does not have Administrators local group. --
*/
BOOL IsUserAdmin(VOID) {
BOOL b;
SID_IDENTIFIER_AUTHORITY NtAuthority = SECURITY_NT_AUTHORITY;
PSID AdministratorsGroup;
b = AllocateAndInitializeSid(
&NtAuthority,
2,
SECURITY_BUILTIN_DOMAIN_RID,
DOMAIN_ALIAS_RID_ADMINS,
0, 0, 0, 0, 0, 0,
&AdministratorsGroup);
if (b) {
if (!CheckTokenMembership(NULL, AdministratorsGroup, &b)) {
b = FALSE;
}
FreeSid(AdministratorsGroup);
}
return(b);
}
const wchar_t *SkipAppNameInCommandLineArgs(const wchar_t *s) {
if (*s == '\"') {
for (;;) {
s++;
if (*s == 0) return s;
if (*s == '\"') return s + 1;
}
} else {
for (;;) {
if (*s == 0) return s;
if (*s == ' ') return s + 1;
s++;
}
}
}
uint8* LoadFileSane(const char *name, size_t *size) {
FILE *f = fopen(name, "rb");
uint8 *new_file = NULL, *file = NULL;
size_t j, i, n;
if (!f) return false;
fseek(f, 0, SEEK_END);
long x = ftell(f);
fseek(f, 0, SEEK_SET);
if (x < 0 || x >= 65536) goto error;
file = (uint8*)malloc(x + 1);
if (!file) goto error;
n = fread(file, 1, x + 1, f);
if (n != x || !SanityCheckBuf(file, n))
goto error;
// Convert the file to DOS new lines
for (i = j = 0; i < n; i++)
j += (file[i] == '\n');
new_file = (uint8*)malloc(n + 1 + j);
if (!new_file) goto error;
for (i = j = 0; i < n; i++) {
uint8 c = file[i];
if (c == '\r')
continue;
if (c == '\n')
new_file[j++] = '\r';
new_file[j++] = c;
}
new_file[j] = 0;
*size = j;
error:
fclose(f);
free(file);
return new_file;
}
int WriteOutFile(const char *filename, uint8 *filedata, size_t filesize) {
FILE *f = fopen(filename, "wb");
if (!f) return kWriteOutFile_AccessError;
if (fwrite(filedata, 1, filesize, f) != filesize) {
fclose(f);
return kWriteOutFile_OtherError;
}
fclose(f);
return kWriteOutFile_Ok;
}
bool FileExists(const CHAR *fileName) {
DWORD fileAttr = GetFileAttributes(fileName);
return (0xFFFFFFFF != fileAttr);
}
__int64 FileSize(const char* name) {
WIN32_FILE_ATTRIBUTE_DATA fad;
if (!GetFileAttributesEx(name, GetFileExInfoStandard, &fad))
return -1; // error condition, could call GetLastError to find out more
LARGE_INTEGER size;
size.HighPart = fad.nFileSizeHigh;
size.LowPart = fad.nFileSizeLow;
return size.QuadPart;
}
static bool is_space(uint8_t c) {
return c == ' ' || c == '\r' || c == '\n' || c == '\t';
}
static bool is_valid(uint8_t c) {
return c >= ' ' || c == '\r' || c == '\n' || c == '\t';
}
bool SanityCheckBuf(uint8 *buf, size_t n) {
for (size_t i = 0; i < n; i++) {
if (!is_space(buf[i])) {
if (buf[i] != '[' && buf[i] != '#')
return false;
for (; i < n; i++)
if (!is_valid(buf[i]))
return false;
return true;
}
}
return false;
}
void FindDesktopFolderView(REFIID riid, void **ppv) {
CComPtr<IShellWindows> spShellWindows;
spShellWindows.CoCreateInstance(CLSID_ShellWindows);
CComVariant vtLoc(CSIDL_DESKTOP);
CComVariant vtEmpty;
long lhwnd;
CComPtr<IDispatch> spdisp;
spShellWindows->FindWindowSW(
&vtLoc, &vtEmpty,
SWC_DESKTOP, &lhwnd, SWFO_NEEDDISPATCH, &spdisp);
CComPtr<IShellBrowser> spBrowser;
CComQIPtr<IServiceProvider>(spdisp)->
QueryService(SID_STopLevelBrowser,
IID_PPV_ARGS(&spBrowser));
CComPtr<IShellView> spView;
spBrowser->QueryActiveShellView(&spView);
spView->QueryInterface(riid, ppv);
}
void GetDesktopAutomationObject(REFIID riid, void **ppv) {
CComPtr<IShellView> spsv;
FindDesktopFolderView(IID_PPV_ARGS(&spsv));
CComPtr<IDispatch> spdispView;
spsv->GetItemObject(SVGIO_BACKGROUND, IID_PPV_ARGS(&spdispView));
spdispView->QueryInterface(riid, ppv);
}
void ShellExecuteFromExplorer(
PCSTR pszFile,
PCSTR pszParameters,
PCSTR pszDirectory,
PCSTR pszOperation,
int nShowCmd) {
CComPtr<IShellFolderViewDual> spFolderView;
GetDesktopAutomationObject(IID_PPV_ARGS(&spFolderView));
CComPtr<IDispatch> spdispShell;
spFolderView->get_Application(&spdispShell);
CComQIPtr<IShellDispatch2>(spdispShell)
->ShellExecute(CComBSTR(pszFile),
CComVariant(pszParameters ? pszParameters : ""),
CComVariant(pszDirectory ? pszDirectory : ""),
CComVariant(pszOperation ? pszOperation : ""),
CComVariant(nShowCmd));
}
size_t GetConfigPath(char *path, size_t path_size) {
if (!GetModuleFileName(NULL, path, (DWORD)path_size)) {
*path = 0;
return 0;
}
char *last = (char *)FindFilenameComponent(path);
if (!*last || last + 8 > path + path_size) {
*path = 0;
return 0;
}
memcpy(last, "Config\\", 8 * sizeof(last[0]));
return last + 7 - path;
}
static bool ContainsDotDot(const char *path) {
for (uint8 last = 0, cur; (cur = path[0]) != '\0'; last = cur, path++)
if (cur == '.' && last == cur)
return true;
return false;
}
bool EnsureValidConfigPath(const char *path) {
char buf[1024];
size_t len = GetConfigPath(buf, sizeof(buf));
return (len != 0) && (strlen(path) > len && memcmp(path, buf, len) == 0 && !ContainsDotDot(path + len));
}
bool RunProcessAsAdminWithArgs(const char *args, bool wait_for_exit) {
SHELLEXECUTEINFO shExecInfo = {0};
char buf[1024];
if (!GetModuleFileName(NULL, buf, 1024))
return false;
shExecInfo.cbSize = sizeof(shExecInfo);
shExecInfo.lpVerb = "runas";
shExecInfo.lpFile = buf;
shExecInfo.lpParameters = args;
shExecInfo.nShow = SW_SHOW;
shExecInfo.fMask = SEE_MASK_NOASYNC | wait_for_exit * SEE_MASK_NOCLOSEPROCESS;
if (!ShellExecuteExA(&shExecInfo))
return false;
if (shExecInfo.hProcess) {
WaitForSingleObject(shExecInfo.hProcess, 10000);
CloseHandle(shExecInfo.hProcess);
}
return true;
}
bool RestartProcessAsAdministrator() {
SHELLEXECUTEINFOW shExecInfo = {0};
wchar_t buf[1024];
if (!GetModuleFileNameW(NULL, buf, 1024))
return false;
// shExecInfo.hwnd = window;
shExecInfo.cbSize = sizeof(shExecInfo);
shExecInfo.lpVerb = L"runas";
shExecInfo.lpFile = buf;
shExecInfo.lpParameters = SkipAppNameInCommandLineArgs(GetCommandLineW());
shExecInfo.nShow = SW_SHOW;
return ShellExecuteExW(&shExecInfo) != 0;
}
bool SetClipboardString(const char *string) {
bool ok = false;
if (OpenClipboard(NULL)) {
HGLOBAL hglb;
size_t len = strlen(string);
hglb = GlobalAlloc(GMEM_SHARE | GMEM_MOVEABLE, (len + 1) * sizeof(char));
LPSTR lptstr = (LPSTR)GlobalLock(hglb);
memcpy(lptstr, string, len + 1);
GlobalUnlock(hglb);
EmptyClipboard();
ok = SetClipboardData(CF_TEXT, hglb) != 0;
CloseClipboard();
}
return ok;
}
RECT GetParentRect(HWND wnd) {
RECT btrect;
GetClientRect(wnd, &btrect);
MapWindowPoints(wnd, GetParent(wnd), (LPPOINT)&btrect, 2);
return btrect;
}
RECT MakeRect(int l, int t, int r, int b) {
RECT rr = { l, t, r, b };
return rr;
}

56
util_win32.h Normal file
View file

@ -0,0 +1,56 @@
// SPDX-License-Identifier: AGPL-1.0-only
// Copyright (C) 2018 Ludvig Strigeus <info@tunsafe.com>. All Rights Reserved.
#include "tunsafe_types.h"
#pragma once
const char *FindFilenameComponent(const char *s);
void str_set(char **x, const char *s);
char *str_cat_alloc(const char * const *a, size_t n);
char *str_cat_alloc(const char *a, const char *b);
char *str_cat_alloc(const char *a, const char *b, const char *c);
int RegReadInt(HKEY hkey, const char *key, int def);
void RegWriteInt(HKEY hkey, const char *key, int value);
char *RegReadStr(HKEY hkey, const char *key, const char *def);
void RegWriteStr(HKEY hkey, const char *key, const char *v);
// TokenElevationTypeDefault -- User is not using a split token. (e.g. UAC disabled or local admin "Administrator" account which UAC may not apply to.)
// TokenElevationTypeFull -- User has a split token, and the process is running elevated.
// TokenElevationTypeLimited -- User has a split token, but the process is not running elevated.
bool GetProcessElevationType(TOKEN_ELEVATION_TYPE *pOutElevationType);
const wchar_t *SkipAppNameInCommandLineArgs(const wchar_t *s);
uint8* LoadFileSane(const char *name, size_t *size);
enum {
kWriteOutFile_Ok = 0,
kWriteOutFile_AccessError = 1,
kWriteOutFile_OtherError = 2,
};
int WriteOutFile(const char *filename, uint8 *filedata, size_t filesize);
bool SanityCheckBuf(uint8 *buf, size_t n);
__int64 FileSize(const char* name);
bool FileExists(const CHAR *fileName);
void ShellExecuteFromExplorer(
PCSTR pszFile,
PCSTR pszParameters = nullptr,
PCSTR pszDirectory = nullptr,
PCSTR pszOperation = nullptr,
int nShowCmd = SW_SHOWNORMAL);
size_t GetConfigPath(char *path, size_t path_size);
bool EnsureValidConfigPath(const char *path);
bool RunProcessAsAdminWithArgs(const char *args, bool wait_for_exit);
bool RestartProcessAsAdministrator();
bool SetClipboardString(const char *string);
RECT GetParentRect(HWND wnd);
RECT MakeRect(int l, int t, int r, int b);

View file

@ -12,7 +12,9 @@
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include "ipzip2/ipzip2.h"
#include "wireguard.h"
#include "wireguard_config.h"
uint64 OsGetMilliseconds();
@ -35,11 +37,23 @@ WireguardProcessor::WireguardProcessor(UdpInterface *udp, TunInterface *tun, Pro
dns_blocking_ = true;
internet_blocking_ = kBlockInternet_Default;
dns6_addr_.sin.sin_family = dns_addr_.sin.sin_family = 0;
stats_last_bytes_in_ = 0;
stats_last_bytes_out_ = 0;
stats_last_ts_ = OsGetMilliseconds();
main_thread_scheduled_ = NULL;
main_thread_scheduled_last_ = &main_thread_scheduled_;
}
WireguardProcessor::~WireguardProcessor() {
}
void WireguardProcessor::SetListenPort(int listen_port) {
listen_port_ = listen_port;
}
bool WireguardProcessor::AddDnsServer(const IpAddr &sin) {
IpAddr *target = (sin.sin.sin_family == AF_INET6) ? &dns6_addr_ : &dns_addr_;
if (target->sin.sin_family != 0)
@ -48,7 +62,6 @@ bool WireguardProcessor::AddDnsServer(const IpAddr &sin) {
return true;
}
bool WireguardProcessor::SetTunAddress(const WgCidrAddr &addr) {
WgCidrAddr *target = (addr.size == 128) ? &tun6_addr_ : &tun_addr_;
if (target->size != 0)
@ -57,9 +70,37 @@ bool WireguardProcessor::SetTunAddress(const WgCidrAddr &addr) {
return true;
}
void WireguardProcessor::AddExcludedIp(const WgCidrAddr &cidr_addr) {
excluded_ips_.push_back(cidr_addr);
}
ProcessorStats WireguardProcessor::GetStats() {
stats_.last_complete_handskake_timestamp = dev_.last_complete_handskake_timestamp();
void WireguardProcessor::SetMtu(int mtu) {
if (mtu >= 576 && mtu <= 10000)
mtu_ = mtu;
}
void WireguardProcessor::SetAddRoutesMode(bool mode) {
add_routes_mode_ = mode;
}
void WireguardProcessor::SetDnsBlocking(bool dns_blocking) {
dns_blocking_ = dns_blocking;
}
void WireguardProcessor::SetInternetBlocking(InternetBlockState internet_blocking) {
internet_blocking_ = internet_blocking;
}
void WireguardProcessor::SetHeaderObfuscation(const char *key) {
dev_.SetHeaderObfuscation(key);
}
WgProcessorStats WireguardProcessor::GetStats() {
// todo: only supports one peer but i want this in the ui for now.
stats_.endpoint.sin.sin_family = 0;
WgPeer *peer = dev_.first_peer();
if (peer)
stats_.endpoint = peer->endpoint_;
return stats_;
}
@ -92,6 +133,7 @@ static bool IsWgCidrAddrSubsetOf(const WgCidrAddr &inner, const WgCidrAddr &oute
}
bool WireguardProcessor::Start() {
assert(dev_.IsMainThread());
if (!udp_->Initialize(listen_port_))
return false;
@ -101,7 +143,7 @@ bool WireguardProcessor::Start() {
}
if (tun_addr_.cidr >= 31) {
RERROR("The TAP driver is not compatible with Address using CIDR /31 or /32. Changing to /24");
RERROR("TAP is not compatible CIDR /31 or /32. Changing to /24");
tun_addr_.cidr = 24;
}
@ -110,7 +152,8 @@ bool WireguardProcessor::Start() {
config.cidr = tun_addr_.cidr;
config.mtu = mtu_;
config.pre_post_commands = pre_post_;
config.excluded_ips = excluded_ips_;
uint32 netmask = tun_addr_.cidr == 32 ? 0xffffffff : 0xffffffff << (32 - tun_addr_.cidr);
uint32 ipv4_broadcast_addr = (netmask == 0xffffffff) ? 0xffffffff : config.ip | ~netmask;
@ -130,6 +173,7 @@ bool WireguardProcessor::Start() {
config.default_route_endpoint_v4 = (peer->endpoint_.sin.sin_family == AF_INET) ? ReadBE32(&peer->endpoint_.sin.sin_addr) : 0;
// Set the default route to something
config.use_ipv4_default_route = true;
peer->allow_endpoint_change_ = false;
}
// Also configure ipv6 gw?
@ -139,6 +183,7 @@ bool WireguardProcessor::Start() {
if (peer->endpoint_.sin.sin_family == AF_INET6)
memcpy(&config.default_route_endpoint_v6, &peer->endpoint_.sin6.sin6_addr, 16);
config.use_ipv6_default_route = true;
peer->allow_endpoint_change_ = false;
}
}
@ -158,7 +203,8 @@ bool WireguardProcessor::Start() {
uint8 dhcp_options[6];
config.block_dns_on_adapters = dns_blocking_;
config.block_dns_on_adapters = dns_blocking_ && ((config.use_ipv4_default_route && dns_addr_.sin.sin_family == AF_INET) ||
(config.use_ipv6_default_route && dns6_addr_.sin6.sin6_family == AF_INET6));
config.internet_blocking = internet_blocking_;
if (dns_addr_.sin.sin_family == AF_INET) {
@ -187,7 +233,7 @@ bool WireguardProcessor::Start() {
peer->ipv4_broadcast_addr_ = ipv4_broadcast_addr;
if (peer->endpoint_.sin.sin_family != 0) {
RINFO("Sending handshake...");
SendHandshakeInitiationAndResetRetries(peer);
SendHandshakeInitiation(peer);
}
}
@ -222,10 +268,8 @@ struct ICMPv6NaPacketWithoutTarget {
uint8 reserved[3];
uint8 target[16];
};
#pragma pack (pop)
static uint16 ComputeIcmpv6Checksum(const uint8 *buf, int buf_size, const uint8 src_addr[16], const uint8 dst_addr[16]) {
uint32 sum = 0;
for (int i = 0; i < buf_size - 1; i += 2)
@ -242,28 +286,25 @@ static uint16 ComputeIcmpv6Checksum(const uint8 *buf, int buf_size, const uint8
return ((uint16)~sum);
}
bool WireguardProcessor::HandleIcmpv6NeighborSolicitation(const byte *data, size_t data_size) {
if (data_size < 48 + 16)
return false;
// Filter out neighbor solicitation
if (data[40] != kICMPv6_NeighborSolicitation || data[41] != 0)
return false;
if (!network_discovery_spoofing_)
if (data[40] != kICMPv6_NeighborSolicitation || data[41] != 0 || !network_discovery_spoofing_)
return false;
bool is_broadcast = true;
if (memcmp(data + 24, kIcmpv6NeighborMulticastPrefix, sizeof(kIcmpv6NeighborMulticastPrefix)) != 0) {
if (memcmp(data + 24, data + 48, 16) != 0)
return false;
is_broadcast = false;
}
// Target address must match a peer's range.
WG_ACQUIRE_RWLOCK_SHARED(dev_.ip_to_peer_map_lock_);
WgPeer *peer = (WgPeer*)dev_.ip_to_peer_map().LookupV6(data + 48);
WG_RELEASE_RWLOCK_SHARED(dev_.ip_to_peer_map_lock_)
if (peer == NULL)
return false;
@ -273,8 +314,7 @@ bool WireguardProcessor::HandleIcmpv6NeighborSolicitation(const byte *data, size
return false;
byte *odata = out->data;
int packet_size = is_broadcast ? sizeof(ICMPv6NaPacket) : sizeof(ICMPv6NaPacketWithoutTarget);
size_t packet_size = is_broadcast ? sizeof(ICMPv6NaPacket) : sizeof(ICMPv6NaPacketWithoutTarget);
memcpy(odata, data, 4);
WriteBE16(odata + 4, packet_size);
@ -298,10 +338,10 @@ bool WireguardProcessor::HandleIcmpv6NeighborSolicitation(const byte *data, size
// For some reason this is openvpn's 'related mac'
((ICMPv6NaPacket*)(odata + 40))->target_mac[2] += 1;
}
uint16 checksum = ComputeIcmpv6Checksum(odata + 40, packet_size, odata + 8, odata + 24);
uint16 checksum = ComputeIcmpv6Checksum(odata + 40, (int)packet_size, odata + 8, odata + 24);
WriteBE16(&((ICMPv6NaPacket*)(odata + 40))->checksum, checksum);
out->size = 40 + packet_size;
out->size = (unsigned)(40 + packet_size);
tun_->WriteTunPacket(out);
return true;
}
@ -317,9 +357,6 @@ void WireguardProcessor::HandleTunPacket(Packet *packet) {
unsigned ip_version, size_from_header;
WgPeer *peer;
stats_.tun_bytes_in += data_size;
stats_.tun_packets_in++;
// Sanity check that it looks like a valid ipv4 or ipv6 packet,
// and determine the destination peer from the ip header
if (data_size < IPV4_HEADER_SIZE)
@ -328,7 +365,9 @@ void WireguardProcessor::HandleTunPacket(Packet *packet) {
ip_version = *data >> 4;
if (ip_version == 4) {
uint32 ip = ReadBE32(data + 16);
WG_ACQUIRE_RWLOCK_SHARED(dev_.ip_to_peer_map_lock_);
peer = (WgPeer*)dev_.ip_to_peer_map().LookupV4(ip);
WG_RELEASE_RWLOCK_SHARED(dev_.ip_to_peer_map_lock_)
if (peer == NULL)
goto getout;
if ((ip >= (224 << 24) || ip == peer->ipv4_broadcast_addr_) && !peer->allow_multicast_through_peer_)
@ -346,7 +385,9 @@ void WireguardProcessor::HandleTunPacket(Packet *packet) {
if (data[6] == kIpProto_ICMPv6 && HandleIcmpv6NeighborSolicitation(data, data_size))
goto getout;
WG_ACQUIRE_RWLOCK_SHARED(dev_.ip_to_peer_map_lock_);
peer = (WgPeer*)dev_.ip_to_peer_map().LookupV6(data + 24);
WG_RELEASE_RWLOCK_SHARED(dev_.ip_to_peer_map_lock_)
if (peer == NULL)
goto getout;
@ -359,10 +400,10 @@ void WireguardProcessor::HandleTunPacket(Packet *packet) {
}
if (size_from_header > data_size)
goto getout;
if (peer->endpoint_.sin.sin_family == 0)
goto getout;
WritePacketToUdp(peer, packet);
// WriteAndEncryptPacketToUdp needs a held lock
WG_ACQUIRE_LOCK(peer->mutex_);
WriteAndEncryptPacketToUdp_WillUnlock(peer, packet);
return;
getout:
@ -370,25 +411,52 @@ getout:
FreePacket(packet);
}
void WireguardProcessor::WritePacketToUdp(WgPeer *peer, Packet *packet) {
byte *data = packet->data;
size_t size = packet->size;
void WgPeer::AddPacketToPeerQueue(Packet *packet) {
assert(IsPeerLocked());
// Keep only the first MAX_QUEUED_PACKETS packets.
while (num_queued_packets_ >= MAX_QUEUED_PACKETS_PER_PEER) {
Packet *packet = first_queued_packet_;
first_queued_packet_ = packet->next;
num_queued_packets_--;
FreePacket(packet);
}
// Add the packet to the out queue that will get sent once handshake completes
*last_queued_packet_ptr_ = packet;
last_queued_packet_ptr_ = &packet->next;
packet->next = NULL;
num_queued_packets_++;
}
// This function must be called with the peer lock held. It will remove the lock
void WireguardProcessor::WriteAndEncryptPacketToUdp_WillUnlock(WgPeer *peer, Packet *packet) {
assert(peer->IsPeerLocked());
uint8 *data = packet->data, *ad;
size_t size = packet->size, ad_len, orig_size = size;
bool want_handshake;
WgKeypair *keypair;
uint64 send_ctr;
WgKeypair *keypair = peer->curr_keypair_;
if (keypair == NULL ||
keypair->send_key_state == WgKeypair::KEY_INVALID ||
keypair->send_ctr >= REJECT_AFTER_MESSAGES)
goto getout_handshake;
want_handshake = (keypair->send_ctr >= REKEY_AFTER_MESSAGES ||
keypair->send_key_state == WgKeypair::KEY_WANT_REFRESH);
// Ensure packet will fit including the biggest padding
if (size > kPacketCapacity - 15 - CHACHA20POLY1305_AUTHTAGLEN)
if (peer->endpoint_.sin.sin_family == 0 ||
size > kPacketCapacity - 15 - CHACHA20POLY1305_AUTHTAGLEN)
goto getout_discard;
if ((keypair = peer->curr_keypair_) == NULL ||
(send_ctr = keypair->send_ctr) >= REJECT_AFTER_MESSAGES) {
peer->AddPacketToPeerQueue(packet);
WG_RELEASE_LOCK(peer->mutex_);
ScheduleNewHandshake(peer);
return;
}
stats_.tun_bytes_in += size;
stats_.tun_packets_in++;
want_handshake = (send_ctr >= REKEY_AFTER_MESSAGES ||
keypair->send_key_state == WgKeypair::KEY_WANT_REFRESH);
keypair->send_ctr = send_ctr + 1;
packet->addr = peer->endpoint_;
if (size == 0) {
peer->OnKeepaliveSent();
} else {
@ -416,7 +484,6 @@ add_padding:
size += padding;
}
}
send_ctr = keypair->send_ctr++;
#if WITH_SHORT_HEADERS
if (keypair->enabled_features[WG_FEATURE_ID_SHORT_HEADER]) {
@ -434,8 +501,9 @@ add_padding:
WriteLE32(write -= 4, (uint32)next_expected_packet);
inner_tag = WG_ACK_HEADER_COUNTER_4;
} else {
WriteLE64(write -= 8, next_expected_packet);
inner_tag = WG_ACK_HEADER_COUNTER_8;
WriteLE32(write -= 4, (uint32)next_expected_packet);
WriteLE16(write -= 2, (uint16)(next_expected_packet>>32));
inner_tag = WG_ACK_HEADER_COUNTER_6;
}
if (keypair->broadcast_short_key != 0) {
inner_tag += keypair->addr_entry_slot;
@ -448,6 +516,7 @@ add_padding:
*--write = keypair->addr_entry_slot;
tag += WG_SHORT_HEADER_ACK;
}
byte *write_after_ack_header = write;
// Determine the distance from the most recently acked packet,
// be conservative when picking a suitable packet length to send.
@ -471,61 +540,54 @@ add_padding:
WriteLE32(write -= 4, keypair->remote_key_id);
*--write = tag;
// Not using any fields from now on
WG_RELEASE_LOCK(peer->mutex_);
header_size = data - write;
stats_.compression_wg_saved_out += (int64)16 - header_size;
packet->data = data - header_size;
packet->size = (int)(size + header_size + keypair->auth_tag_length);
WgKeypairEncryptPayload(data, size, write, data - write, send_ctr, keypair);
// todo: figure out what to actually use as ad.
ad = write_after_ack_header;
ad_len = data - write_after_ack_header;
} else {
need_big_packet:
#else
{
#endif // #if WITH_SHORT_HEADERS
// Not using any fields from now on
WG_RELEASE_LOCK(peer->mutex_);
((MessageData*)data)[-1].type = ToLE32(MESSAGE_DATA);
((MessageData*)data)[-1].receiver_id = keypair->remote_key_id;
((MessageData*)data)[-1].counter = ToLE64(send_ctr);
packet->data = data - sizeof(MessageData);
packet->size = (int)(size + sizeof(MessageData) + keypair->auth_tag_length);
WgKeypairEncryptPayload(data, size, NULL, 0, send_ctr, keypair);
ad = NULL;
ad_len = 0;
}
packet->addr = peer->endpoint_;
WgKeypairEncryptPayload(data, size, ad, ad_len, send_ctr, keypair);
DoWriteUdpPacket(packet);
if (want_handshake)
SendHandshakeInitiationAndResetRetries(peer);
ScheduleNewHandshake(peer);
return;
getout_discard:
WG_RELEASE_LOCK(peer->mutex_);
FreePacket(packet);
return;
getout_handshake:
// Keep only the first MAX_QUEUED_PACKETS packets.
while (peer->num_queued_packets_ >= MAX_QUEUED_PACKETS_PER_PEER) {
Packet *packet = peer->first_queued_packet_;
peer->first_queued_packet_ = packet->next;
peer->num_queued_packets_--;
FreePacket(packet);
}
// Add the packet to the out queue that will get sent once handshake completes
*peer->last_queued_packet_ptr_ = packet;
peer->last_queued_packet_ptr_ = &packet->next;
packet->next = NULL;
peer->num_queued_packets_++;
SendHandshakeInitiationAndResetRetries(peer);
}
// This scrambles the initial 16 bytes of the packet with the
// trailing 8 bytes of the packet.
// next 8 bytes of the packet as a seed.
static void ScrambleUnscramblePacket(Packet *packet, ScramblerSiphashKeys *keys) {
uint8 *data = packet->data;
size_t data_size = packet->size;
if (data_size < 8)
if (data_size <= 8)
return;
uint64 last_uint64 = ReadLE64(data_size >= 24 ? data + 16 : data + data_size - 8);
@ -537,10 +599,12 @@ static void ScrambleUnscramblePacket(Packet *packet, ScramblerSiphashKeys *keys)
((uint64*)data)[0] ^= a;
((uint64*)data)[1] ^= b;
} else {
struct { uint64 a, b; } scramblers = {a, b};
uint8 *s = (uint8*)&scramblers;
union {
uint64 d[2];
uint8 s[16];
} scrambler = {{a,b}};
for (size_t i = 0; i < data_size - 8; i++)
data[i] ^= s[i];
data[i] ^= scrambler.s[i];
}
}
@ -560,38 +624,81 @@ void WireguardProcessor::DoWriteUdpPacket(Packet *packet) {
ScrambleUnscrambleAndWrite(packet, &dev_.header_obfuscation_key_, udp_);
}
void WireguardProcessor::SendHandshakeInitiationAndResetRetries(WgPeer *peer) {
peer->handshake_attempts_ = 0;
SendHandshakeInitiation(peer);
void WireguardProcessor::ScheduleNewHandshake(WgPeer *peer) {
if (peer->main_thread_scheduled_.fetch_or(WgPeer::kMainThreadScheduled_ScheduleHandshake) == 0) {
peer->main_thread_scheduled_next_ = NULL;
WG_ACQUIRE_LOCK(main_thread_scheduled_lock_);
*main_thread_scheduled_last_ = peer;
main_thread_scheduled_last_ = &peer->main_thread_scheduled_next_;
WG_RELEASE_LOCK(main_thread_scheduled_lock_);
// todo: in multithreaded impl need to trigger |RunAllMainThreadScheduled| to get called
}
}
void WireguardProcessor::RunAllMainThreadScheduled() {
assert(dev_.IsMainThread());
if (main_thread_scheduled_ == NULL)
return;
WG_ACQUIRE_LOCK(main_thread_scheduled_lock_);
WgPeer *peer = main_thread_scheduled_;
main_thread_scheduled_ = NULL;
main_thread_scheduled_last_ = &main_thread_scheduled_;
WG_RELEASE_LOCK(main_thread_scheduled_lock_);
while (peer) {
// todo: for the multithreaded use case figure out whether to use atomic_thread_fence here.
WgPeer *next = peer->main_thread_scheduled_next_;
uint32 ev = peer->main_thread_scheduled_.exchange(0);
if (ev & WgPeer::kMainThreadScheduled_ScheduleHandshake) {
peer->handshake_attempts_ = 0;
SendHandshakeInitiation(peer);
}
peer = next;
}
}
void WireguardProcessor::SendHandshakeInitiation(WgPeer *peer) {
// Send out a handshake init packet to trigger the handshake procedure
assert(dev_.IsMainThread());
if (!peer->CheckHandshakeRateLimit())
return;
stats_.handshakes_out++;
Packet *packet = AllocPacket();
if (!packet)
return;
peer->CreateMessageHandshakeInitiation(packet);
if (packet) {
peer->CreateMessageHandshakeInitiation(packet);
WG_ACQUIRE_LOCK(peer->mutex_);
int attempts = ++peer->total_handshake_attempts_;
if (procdel_)
procdel_->OnConnectionRetry(attempts);
peer->OnHandshakeInitSent();
packet->addr = peer->endpoint_;
WG_RELEASE_LOCK(peer->mutex_);
DoWriteUdpPacket(packet);
if (attempts > 1 && attempts <= 20)
RINFO("Retrying handshake, attempt %d...%s", attempts, (attempts == 20) ? " (last notice)" : "");
}
}
packet->addr = peer->endpoint_;
DoWriteUdpPacket(packet);
peer->OnHandshakeInitSent();
bool WireguardProcessor::IsMainThreadPacket(Packet *packet) {
// TODO(ludde): Support header obfuscation
return packet->size == 0 || (packet->data[0] != MESSAGE_DATA && !(packet->data[0] & WG_SHORT_HEADER_BIT));
}
// Handles an incoming WireGuard packet from the UDP side, decrypt etc.
void WireguardProcessor::HandleUdpPacket(Packet *packet, bool overload) {
uint32 type;
stats_.udp_bytes_in += packet->size;
stats_.udp_packets_in++;
// Unscramble incoming packets
#if WITH_HEADER_OBFUSCATION
if (dev_.header_obfuscation_)
ScrambleUnscramblePacket(packet, &dev_.header_obfuscation_key_);
#endif // WITH_HEADER_OBFUSCATION
stats_.udp_bytes_in += packet->size;
stats_.udp_packets_in++;
if (packet->size < sizeof(uint32))
goto invalid_size;
type = ReadLE32((uint32*)packet->data);
@ -604,22 +711,23 @@ void WireguardProcessor::HandleUdpPacket(Packet *packet, bool overload) {
HandleShortHeaderFormatPacket(type, packet);
#endif // WITH_SHORT_HEADERS
} else if (type == MESSAGE_HANDSHAKE_COOKIE) {
assert(dev_.IsMainThread());
if (packet->size != sizeof(MessageHandshakeCookie))
goto invalid_size;
HandleHandshakeCookiePacket(packet);
} else if (type == MESSAGE_HANDSHAKE_INITIATION) {
assert(dev_.IsMainThread());
if (WITH_HANDSHAKE_EXT ? (packet->size < sizeof(MessageHandshakeInitiation)) : (packet->size != sizeof(MessageHandshakeInitiation)))
goto invalid_size;
if (!CheckIncomingHandshakeRateLimit(packet, overload))
return;
HandleHandshakeInitiationPacket(packet);
stats_.handshakes_in++;
if (CheckIncomingHandshakeRateLimit(packet, overload))
HandleHandshakeInitiationPacket(packet);
} else if (type == MESSAGE_HANDSHAKE_RESPONSE) {
assert(dev_.IsMainThread());
if (WITH_HANDSHAKE_EXT ? (packet->size < sizeof(MessageHandshakeResponse)) : (packet->size != sizeof(MessageHandshakeResponse)))
goto invalid_size;
if (!CheckIncomingHandshakeRateLimit(packet, overload))
return;
HandleHandshakeResponsePacket(packet);
if (CheckIncomingHandshakeRateLimit(packet, overload))
HandleHandshakeResponsePacket(packet);
} else {
// unknown packet
invalid_size:
@ -628,7 +736,7 @@ invalid_size:
}
// Returns nonzero if two endpoints are different.
static uint32 CompareEndpoint(const IpAddr *a, const IpAddr *b) {
static uint32 CompareIpAddr(const IpAddr *a, const IpAddr *b) {
uint32 rv = b->sin.sin_family ^ a->sin.sin_family;
if (b->sin.sin_family != AF_INET6) {
rv |= b->sin.sin_addr.s_addr ^ a->sin.sin_addr.s_addr;
@ -642,9 +750,10 @@ static uint32 CompareEndpoint(const IpAddr *a, const IpAddr *b) {
return rv;
}
void WgPeer::CopyEndpointToPeer(WgKeypair *keypair, const IpAddr *addr) {
void WgPeer::CopyEndpointToPeer_Locked(WgKeypair *keypair, const IpAddr *addr) {
// Remember how to send packets to this peer
if (CompareEndpoint(&keypair->peer->endpoint_, addr)) {
if (keypair->peer->allow_endpoint_change_ &&
CompareIpAddr(&keypair->peer->endpoint_, addr)) {
#if WITH_SHORT_HEADERS
// When the endpoint changes, forget about using the short key.
keypair->broadcast_short_key = 0;
@ -660,28 +769,21 @@ void WireguardProcessor::HandleShortHeaderFormatPacket(uint32 tag, Packet *packe
size_t bytes_left = packet->size - 1;
WgKeypair *keypair;
uint64 counter, acked_counter;
uint8 ack_tag;
uint8 ack_tag, *ack_start;
if ((tag & WG_SHORT_HEADER_KEY_ID_MASK) == 0x00) {
// The key_id is explicitly included in the packet.
if (bytes_left < 4) goto getout;
uint32 key_id = ReadLE32(data);
data += 4, bytes_left -= 4;
auto it = dev_.key_id_lookup().find(key_id);
if (it == dev_.key_id_lookup().end()) goto getout;
keypair = it->second.second;
keypair = dev_.LookupKeypairByKeyId(key_id);
} else {
// Lookup the packet source ip and port in the address mapping
uint64 addr_id = packet->addr.sin.sin_addr.s_addr | ((uint64)packet->addr.sin.sin_port << 32);
auto it = dev_.addr_entry_map().find(addr_id);
if (it == dev_.addr_entry_map().end())
goto getout;
WgAddrEntry *addr_entry = it->second;
keypair = addr_entry->keys[((tag / WG_SHORT_HEADER_KEY_ID) & 3) - 1];
keypair = dev_.LookupKeypairInAddrEntryMap(addr_id, ((tag / WG_SHORT_HEADER_KEY_ID) & 3) - 1);
}
if (!keypair || keypair->recv_key_state == WgKeypair::KEY_INVALID ||
!keypair->enabled_features[WG_FEATURE_ID_SHORT_HEADER])
if (!keypair || !keypair->enabled_features[WG_FEATURE_ID_SHORT_HEADER])
goto getout;
// Pick the closest possible counter value with the same low bits.
@ -709,11 +811,13 @@ void WireguardProcessor::HandleShortHeaderFormatPacket(uint32 tag, Packet *packe
acked_counter = 0;
ack_tag = 0;
ack_start = data;
// If the acknowledge header is present, then parse it so we may
// get an ack for the highest seen packet.
if (tag & WG_SHORT_HEADER_ACK) {
if (bytes_left == 0) goto getout;
ack_tag = *data;
if (ack_tag & 0xF0) goto getout; // undefined bits
data += 1, bytes_left -= 1;
switch (ack_tag & WG_ACK_HEADER_COUNTER_MASK) {
@ -727,83 +831,104 @@ void WireguardProcessor::HandleShortHeaderFormatPacket(uint32 tag, Packet *packe
acked_counter = ReadLE32(data);
data += 4, bytes_left -= 4;
break;
case WG_ACK_HEADER_COUNTER_8:
if (bytes_left < 8) goto getout;
acked_counter = ReadLE64(data);
data += 8, bytes_left -= 8;
case WG_ACK_HEADER_COUNTER_6:
if (bytes_left < 6) goto getout;
acked_counter = ReadLE32(data) | ((uint64)ReadLE16(data + 4) << 32);
data += 6, bytes_left -= 6;
break;
default:
break;
goto getout;
}
}
if (counter >= REJECT_AFTER_MESSAGES)
goto getout;
// Authenticate the packet before we can apply the state changes.
if (!WgKeypairDecryptPayload(data, bytes_left, packet->data, data - packet->data, counter, keypair))
if (!WgKeypairDecryptPayload(data, bytes_left, ack_start, data - ack_start, counter, keypair))
goto getout;
WG_ACQUIRE_LOCK(keypair->peer->mutex_);
if (keypair->recv_key_state == WgKeypair::KEY_INVALID)
goto getout_unlock;
if (!keypair->replay_detector.CheckReplay(counter))
goto getout;
goto getout_unlock;
stats_.compression_wg_saved_in += 16 - (data - packet->data);
keypair->send_ctr_acked = std::max<uint64>(keypair->send_ctr_acked, acked_counter);
keypair->incoming_packet_count++;
WgPeer::CopyEndpointToPeer(keypair, &packet->addr);
WgPeer::CopyEndpointToPeer_Locked(keypair, &packet->addr);
// Periodically broadcast out the short key
if ((tag & WG_SHORT_HEADER_KEY_ID_MASK) == 0x00 && !keypair->did_attempt_remember_ip_port) {
keypair->did_attempt_remember_ip_port = true;
if (keypair->enabled_features[WG_FEATURE_ID_SKIP_KEYID_IN]) {
uint64 addr_id = packet->addr.sin.sin_addr.s_addr | ((uint64)packet->addr.sin.sin_port << 32);
dev_.UpdateKeypairAddrEntry(addr_id, keypair);
dev_.UpdateKeypairAddrEntry_Locked(addr_id, keypair);
}
}
// Ack header may also signal that we can omit the key id in packets from now on.
if (tag & WG_SHORT_HEADER_ACK)
keypair->can_use_short_key_for_outgoing = (ack_tag & WG_ACK_HEADER_KEY_MASK) * WG_SHORT_HEADER_KEY_ID;
HandleAuthenticatedDataPacket(keypair, packet, data, bytes_left - keypair->auth_tag_length);
HandleAuthenticatedDataPacket_WillUnlock(keypair, packet, data, bytes_left - keypair->auth_tag_length);
return;
getout_unlock:
WG_RELEASE_LOCK(keypair->peer->mutex_);
getout:
FreePacket(packet);
return;
}
#endif // WITH_SHORT_HEADERS
void WireguardProcessor::HandleAuthenticatedDataPacket(WgKeypair *keypair, Packet *packet, uint8 *data, size_t data_size) {
void WireguardProcessor::NotifyHandshakeComplete() {
uint64 now = OsGetMilliseconds();
// todo: should lock something
stats_.last_complete_handshake_timestamp = now;
if (stats_.first_complete_handshake_timestamp == 0)
stats_.first_complete_handshake_timestamp = now;
if (procdel_)
procdel_->OnConnected();
}
void WireguardProcessor::HandleAuthenticatedDataPacket_WillUnlock(WgKeypair *keypair, Packet *packet, uint8 *data, size_t data_size) {
WgPeer *peer = keypair->peer;
assert(peer->IsPeerLocked());
// Promote the next key to the current key when we receive a data packet,
// the handshake is now complete.
if (peer->CheckSwitchToNextKey(keypair)) {
if (procdel_) {
procdel_->OnConnected(ReadBE32(tun_addr_.addr));
}
if (peer->CheckSwitchToNextKey_Locked(keypair)) {
stats_.handshakes_in_success++;
peer->OnHandshakeFullyComplete();
SendQueuedPackets(peer);
NotifyHandshakeComplete();
SendQueuedPackets_Locked(peer);
}
// Refresh when current key gets too old
if (peer->curr_keypair_ && peer->curr_keypair_->recv_key_state == WgKeypair::KEY_WANT_REFRESH) {
peer->curr_keypair_->recv_key_state = WgKeypair::KEY_DID_REFRESH;
SendHandshakeInitiationAndResetRetries(peer);
WgKeypair *curr_keypair = peer->curr_keypair_;
if (curr_keypair && curr_keypair->recv_key_state == WgKeypair::KEY_WANT_REFRESH) {
curr_keypair->recv_key_state = WgKeypair::KEY_DID_REFRESH;
ScheduleNewHandshake(peer);
}
if (data_size == 0) {
peer->OnKeepaliveReceived();
WG_RELEASE_LOCK(peer->mutex_);
goto getout;
}
peer->OnDataReceived();
WG_RELEASE_LOCK(peer->mutex_);
#if WITH_HANDSHAKE_EXT
// Unpack the packet headers using ipzip
if (keypair->enabled_features[WG_FEATURE_ID_IPZIP]) {
uint32 rv = IpzipDecompress(data, (uint32)data_size, &keypair->ipzip_state_, IPZIP_RECV_BY_CLIENT);
if (rv == (uint32)-1)
goto getout; // ipzip failed decompress
goto getout;
stats_.compression_hdr_saved_in += (int64)rv - data_size;
data -= (int64)rv - data_size, data_size = rv;
}
@ -816,36 +941,30 @@ void WireguardProcessor::HandleAuthenticatedDataPacket(WgKeypair *keypair, Packe
ip_version = *data >> 4;
if (ip_version == 4) {
if (data_size < IPV4_HEADER_SIZE) {
// too small ipv4 header
goto getout;
}
if (data_size < IPV4_HEADER_SIZE)
goto getout_error_header;
WG_ACQUIRE_RWLOCK_SHARED(dev_.ip_to_peer_map_lock_);
peer_from_header = (WgPeer*)dev_.ip_to_peer_map().LookupV4(ReadBE32(data + 12));
WG_RELEASE_RWLOCK_SHARED(dev_.ip_to_peer_map_lock_)
size_from_header = ReadBE16(data + 2);
if (size_from_header < IPV4_HEADER_SIZE) {
// too small packet?
goto getout;
goto getout_error_header;
}
} else if (ip_version == 6) {
if (data_size < IPV6_HEADER_SIZE) {
// too small ipv6 header
goto getout;
}
if (data_size < IPV6_HEADER_SIZE)
goto getout_error_header;
WG_ACQUIRE_RWLOCK_SHARED(dev_.ip_to_peer_map_lock_);
peer_from_header = (WgPeer*)dev_.ip_to_peer_map().LookupV6(data + 8);
WG_RELEASE_RWLOCK_SHARED(dev_.ip_to_peer_map_lock_)
size_from_header = IPV6_HEADER_SIZE + ReadBE16(data + 4);
} else {
// invalid ip version
goto getout;
goto getout_error_header;
}
if (size_from_header > data_size) {
// oversized packet?
goto getout;
}
if (peer_from_header != peer) {
// source address mismatch?
goto getout;
}
//RINFO("Outgoing TUN packet of size %d", (int)size_from_header);
if (peer_from_header != peer || size_from_header > data_size)
goto getout_error_header;
packet->data = data;
packet->size = size_from_header;
@ -855,9 +974,10 @@ void WireguardProcessor::HandleAuthenticatedDataPacket(WgKeypair *keypair, Packe
tun_->WriteTunPacket(packet);
return;
getout_error_header:
stats_.error_header++;
getout:
FreePacket(packet);
return;
}
void WireguardProcessor::HandleDataPacket(Packet *packet) {
@ -865,29 +985,33 @@ void WireguardProcessor::HandleDataPacket(Packet *packet) {
size_t data_size = packet->size;
uint32 key_id = ((MessageData*)data)->receiver_id;
uint64 counter = ToLE64((((MessageData*)data)->counter));
WgKeypair *keypair;
auto it = dev_.key_id_lookup().find(key_id);
if (it == dev_.key_id_lookup().end() ||
(keypair = it->second.second) == NULL ||
keypair->recv_key_state == WgKeypair::KEY_INVALID) {
WgKeypair *keypair = dev_.LookupKeypairByKeyId(key_id);
if (keypair == NULL || counter >= REJECT_AFTER_MESSAGES) {
stats_.error_key_id++;
getout:
FreePacket(packet);
return;
}
if (counter >= REJECT_AFTER_MESSAGES)
goto getout;
if (!WgKeypairDecryptPayload(data + sizeof(MessageData), data_size - sizeof(MessageData),
NULL, 0, counter, keypair)) {
NULL, 0, counter, keypair)) {
stats_.error_mac++;
goto getout;
}
if (!keypair->replay_detector.CheckReplay(counter))
goto getout;
WgPeer::CopyEndpointToPeer(keypair, &packet->addr);
HandleAuthenticatedDataPacket(keypair, packet, data + sizeof(MessageData), data_size - sizeof(MessageData) - keypair->auth_tag_length);
WG_ACQUIRE_LOCK(keypair->peer->mutex_);
if (keypair->recv_key_state == WgKeypair::KEY_INVALID) {
stats_.error_key_id++;
WG_RELEASE_LOCK(keypair->peer->mutex_);
goto getout;
} else if (!keypair->replay_detector.CheckReplay(counter)) {
stats_.error_duplicate++;
WG_RELEASE_LOCK(keypair->peer->mutex_);
goto getout;
} else {
WgPeer::CopyEndpointToPeer_Locked(keypair, &packet->addr);
HandleAuthenticatedDataPacket_WillUnlock(keypair, packet, data + sizeof(MessageData), data_size - sizeof(MessageData) - keypair->auth_tag_length);
}
}
static uint64 GetIpForRateLimit(Packet *packet) {
@ -899,54 +1023,55 @@ static uint64 GetIpForRateLimit(Packet *packet) {
}
bool WireguardProcessor::CheckIncomingHandshakeRateLimit(Packet *packet, bool overload) {
assert(dev_.IsMainThread());
WgRateLimit::RateLimitResult rr = dev_.rate_limiter()->CheckRateLimit(GetIpForRateLimit(packet));
if ((overload && rr.is_rate_limited()) || !dev_.CheckCookieMac1(packet)) {
FreePacket(packet);
return false;
}
dev_.rate_limiter()->CommitResult(rr);
if (overload && !rr.is_first_ip() && !dev_.CheckCookieMac2(packet)) {
dev_.rate_limiter()->CommitResult(rr);
dev_.CreateCookieMessage((MessageHandshakeCookie*)packet->data, packet, ((MessageHandshakeInitiation*)packet->data)->sender_key_id);
packet->size = sizeof(MessageHandshakeCookie);
DoWriteUdpPacket(packet);
return false;
}
dev_.rate_limiter()->CommitResult(rr);
return true;
}
// server receives this when client wants to setup a session
void WireguardProcessor::HandleHandshakeInitiationPacket(Packet *packet) {
assert(dev_.IsMainThread());
WgPeer *peer = WgPeer::ParseMessageHandshakeInitiation(&dev_, packet);
if (!peer) {
if (peer) {
DoWriteUdpPacket(packet);
} else {
FreePacket(packet);
return;
}
peer->OnHandshakeAuthComplete();
DoWriteUdpPacket(packet);
}
// client receives this after session is established
void WireguardProcessor::HandleHandshakeResponsePacket(Packet *packet) {
assert(dev_.IsMainThread());
WgPeer *peer = WgPeer::ParseMessageHandshakeResponse(&dev_, packet);
if (!peer) {
FreePacket(packet);
return;
if (peer) {
stats_.handshakes_out_success++;
WG_SCOPED_LOCK(peer->mutex_);
if (peer->allow_endpoint_change_)
peer->endpoint_ = packet->addr;
peer->OnHandshakeAuthComplete();
peer->OnHandshakeFullyComplete();
NotifyHandshakeComplete();
SendKeepalive_Locked(peer);
}
peer->endpoint_ = packet->addr;
FreePacket(packet);
peer->OnHandshakeAuthComplete();
peer->OnHandshakeFullyComplete();
if (procdel_)
procdel_->OnConnected(ReadBE32(tun_addr_.addr));
SendKeepalive(peer);
}
void WireguardProcessor::SendKeepalive(WgPeer *peer) {
void WireguardProcessor::SendKeepalive_Locked(WgPeer *peer) {
assert(dev_.IsMainThread() && peer->IsPeerLocked());
// can't send keepalive if no endpoint is configured
if (peer->endpoint_.sin.sin_family == 0)
return;
// If nothing is queued, insert a keepalive packet
if (peer->first_queued_packet_ == NULL) {
Packet *packet = AllocPacket();
@ -956,43 +1081,70 @@ void WireguardProcessor::SendKeepalive(WgPeer *peer) {
packet->next = NULL;
peer->first_queued_packet_ = packet;
}
SendQueuedPackets(peer);
SendQueuedPackets_Locked(peer);
}
void WireguardProcessor::SendQueuedPackets(WgPeer *peer) {
// Steal the packets
void WireguardProcessor::SendQueuedPackets_Locked(WgPeer *peer) {
assert(peer->IsPeerLocked());
// Steal the queue of all packets and send them all.
Packet *packet = peer->first_queued_packet_;
peer->first_queued_packet_ = NULL;
peer->last_queued_packet_ptr_ = &peer->first_queued_packet_;
peer->num_queued_packets_ = 0;
while (packet) {
while (packet != NULL) {
Packet *next = packet->next;
WritePacketToUdp(peer, packet);
WriteAndEncryptPacketToUdp_WillUnlock(peer, packet);
packet = next;
WG_ACQUIRE_LOCK(peer->mutex_); // WriteAndEncryptPacketToUdp_WillUnlock releases the lock
}
}
void WireguardProcessor::HandleHandshakeCookiePacket(Packet *packet) {
assert(dev_.IsMainThread());
WgPeer::ParseMessageHandshakeCookie(&dev_, (MessageHandshakeCookie *)packet->data);
}
// Only one thread may run the second loop
void WireguardProcessor::SecondLoop() {
assert(dev_.IsMainThread());
uint64 now = OsGetMilliseconds();
uint64 bytes_in = stats_.tun_bytes_in - stats_last_bytes_in_;
uint64 bytes_out = stats_.tun_bytes_out - stats_last_bytes_out_;
stats_last_bytes_in_ = stats_.tun_bytes_in;
stats_last_bytes_out_ = stats_.tun_bytes_out;
uint64 millis = now - stats_last_ts_;
stats_last_ts_ = now;
double f = 1000.0 / std::max<uint32>((uint32)millis, 500);
stats_.tun_bytes_in_per_second = (float)(bytes_in * f);
stats_.tun_bytes_out_per_second = (float)(bytes_out * f);
for (WgPeer *peer = dev_.first_peer(); peer; peer = peer->next_peer_) {
WgKeypair *keypair = peer->curr_keypair_;
// Allow ip/port to be remembered again for this keypair
if (peer->curr_keypair_)
peer->curr_keypair_->did_attempt_remember_ip_port = false;
if (keypair)
keypair->did_attempt_remember_ip_port = false;
uint32 mask = peer->CheckTimeouts(now);
if (mask == 0)
continue;
if (mask & WgPeer::ACTION_SEND_KEEPALIVE)
SendKeepalive(peer);
if (mask & WgPeer::ACTION_SEND_HANDSHAKE)
SendHandshakeInitiation(peer);
// Avoid taking the lock if it seems unneccessary
if (now >= peer->time_of_next_key_event_ || peer->timers_ != 0) {
uint32 mask;
{
WG_SCOPED_LOCK(peer->mutex_);
mask = peer->CheckTimeouts(now);
if (mask == 0)
continue;
if (mask & WgPeer::ACTION_SEND_KEEPALIVE)
SendKeepalive_Locked(peer);
}
if (mask & WgPeer::ACTION_SEND_HANDSHAKE)
SendHandshakeInitiation(peer);
}
}
dev_.SecondLoop(now);
}

View file

@ -5,24 +5,50 @@
#include "tunsafe_types.h"
#include "wireguard_proto.h"
struct ProcessorStats {
// Number of bytes sent/received over the physical UDP connections
int64 udp_bytes_in, udp_bytes_out;
int64 udp_packets_in, udp_packets_out;
// Number of bytes sent/received over the TUN interface
int64 tun_bytes_in, tun_bytes_out;
int64 tun_packets_in, tun_packets_out;
uint64 last_complete_handskake_timestamp;
// todo: for multithreaded use case need to use atomic ops.
struct WgProcessorStats {
// Number of bytes sent/received over the physical UDP connection
uint64 udp_bytes_in, udp_bytes_out;
uint64 udp_packets_in, udp_packets_out;
// Number of valid packets sent/received over the TUN interface
uint64 tun_bytes_in, tun_bytes_out;
uint64 tun_packets_in, tun_packets_out;
// Error types
uint32 error_key_id;
uint32 error_mac;
uint32 error_duplicate;
uint32 error_source_addr;
uint32 error_header;
// Current speed of TUN packets
float tun_bytes_in_per_second, tun_bytes_out_per_second;
// Timestamp of handshakes
uint64 first_complete_handshake_timestamp;
uint64 last_complete_handshake_timestamp;
// How much saved from header compression
int64 compression_hdr_saved_in, compression_hdr_saved_out;
int64 compression_wg_saved_in, compression_wg_saved_out;
// Number of handshakes received and sent
// Number of successful handshakes in and out
uint32 handshakes_in, handshakes_out;
uint32 handshakes_in_success, handshakes_out_success;
// Key stuff
uint8 public_key[32];
// Address of the endpoint
IpAddr endpoint;
};
class ProcessorDelegate {
public:
virtual void OnConnected(in_addr_t my_ip) = 0;
virtual void OnDisconnected() = 0;
virtual void OnConnected() = 0;
virtual void OnConnectionRetry(uint32 attempts) = 0;
};
enum InternetBlockState {
@ -42,62 +68,46 @@ public:
WireguardProcessor(UdpInterface *udp, TunInterface *tun, ProcessorDelegate *procdel);
~WireguardProcessor();
void SetListenPort(int listen_port) {
listen_port_ = listen_port;
}
bool SetTunAddress(const WgCidrAddr &addr);
void SetListenPort(int listen_port);
bool AddDnsServer(const IpAddr &sin);
void SetMtu(int mtu) {
if (mtu >= 576 && mtu <= 10000)
mtu_ = mtu;
}
void SetAddRoutesMode(bool mode) {
add_routes_mode_ = mode;
}
void SetDnsBlocking(bool dns_blocking) {
dns_blocking_ = dns_blocking;
}
void SetInternetBlocking(InternetBlockState internet_blocking) {
internet_blocking_ = internet_blocking;
}
void SetHeaderObfuscation(const char *key) {
dev_.SetHeaderObfuscation(key);
}
bool SetTunAddress(const WgCidrAddr &addr);
void AddExcludedIp(const WgCidrAddr &cidr_addr);
void SetMtu(int mtu);
void SetAddRoutesMode(bool mode);
void SetDnsBlocking(bool dns_blocking);
void SetInternetBlocking(InternetBlockState internet_blocking);
void SetHeaderObfuscation(const char *key);
void HandleTunPacket(Packet *packet);
void HandleUdpPacket(Packet *packet, bool overload);
static bool IsMainThreadPacket(Packet *packet);
void SecondLoop();
ProcessorStats GetStats();
WgProcessorStats GetStats();
void ResetStats();
bool Start();
WgDevice &dev() { return dev_; }
TunInterface::PrePostCommands &prepost() { return pre_post_; }
const WgCidrAddr &tun_addr() { return tun_addr_; }
void RunAllMainThreadScheduled();
private:
void DoWriteUdpPacket(Packet *packet);
void WritePacketToUdp(WgPeer *peer, Packet *packet);
void WriteAndEncryptPacketToUdp_WillUnlock(WgPeer *peer, Packet *packet);
void SendHandshakeInitiation(WgPeer *peer);
void SendHandshakeInitiationAndResetRetries(WgPeer *peer);
void SendKeepalive(WgPeer *peer);
void SendQueuedPackets(WgPeer *peer);
void ScheduleNewHandshake(WgPeer *peer);
void SendKeepalive_Locked(WgPeer *peer);
void SendQueuedPackets_Locked(WgPeer *peer);
void HandleHandshakeInitiationPacket(Packet *packet);
void HandleHandshakeResponsePacket(Packet *packet);
void HandleHandshakeCookiePacket(Packet *packet);
void HandleDataPacket(Packet *packet);
void HandleAuthenticatedDataPacket(WgKeypair *keypair, Packet *packet, uint8 *data, size_t data_size);
void HandleAuthenticatedDataPacket_WillUnlock(WgKeypair *keypair, Packet *packet, uint8 *data, size_t data_size);
void HandleShortHeaderFormatPacket(uint32 tag, Packet *packet);
@ -106,6 +116,7 @@ private:
bool HandleIcmpv6NeighborSolicitation(const byte *data, size_t data_size);
void SetupCompressionHeader(WgPacketCompressionVer01 *c);
void NotifyHandshakeComplete();
int listen_port_;
@ -113,12 +124,13 @@ private:
TunInterface *tun_;
UdpInterface *udp_;
int mtu_;
ProcessorStats stats_;
WgProcessorStats stats_;
bool dns_blocking_;
uint8 internet_blocking_;
bool add_routes_mode_;
bool network_discovery_spoofing_;
bool did_have_first_handshake_;
uint8 network_discovery_mac_[6];
WgDevice dev_;
@ -129,5 +141,15 @@ private:
IpAddr dns_addr_, dns6_addr_;
TunInterface::PrePostCommands pre_post_;
// Queue of things scheduled to run on the main thread.
WG_DECLARE_LOCK(main_thread_scheduled_lock_);
WgPeer *main_thread_scheduled_, **main_thread_scheduled_last_;
uint64 stats_last_bytes_in_, stats_last_bytes_out_;
uint64 stats_last_ts_;
// IPs we want to map to the default route
std::vector<WgCidrAddr> excluded_ips_;
};

View file

@ -20,6 +20,10 @@
#include <netdb.h>
#endif
#if defined(OS_WIN)
#include "network_win32_dnsblock.h"
#endif
const char *print_ip_prefix(char buf[kSizeOfAddress], int family, const void *ip, int prefixlen) {
if (!inet_ntop(family, ip, buf, kSizeOfAddress - 8)) {
memcpy(buf, "unknown", 8);
@ -29,6 +33,17 @@ const char *print_ip_prefix(char buf[kSizeOfAddress], int family, const void *ip
return buf;
}
char *PrintIpAddr(const IpAddr &addr, char buf[kSizeOfAddress]) {
if (addr.sin.sin_family == AF_INET) {
print_ip_prefix(buf, addr.sin.sin_family, &addr.sin.sin_addr, -1);
} else if (addr.sin.sin_family == AF_INET) {
print_ip_prefix(buf, addr.sin.sin_family, &addr.sin6.sin6_addr, -1);
} else {
buf[0] = 0;
}
return buf;
}
struct Addr {
byte addr[4];
uint8 cidr;
@ -58,19 +73,71 @@ static bool ParseCidrAddr(char *s, WgCidrAddr *out) {
return false;
}
struct hostent *gethostbyname_retry_on_failure(const char * name, bool *exit_flag) {
DnsResolver::DnsResolver(DnsBlocker *dns_blocker) {
dns_blocker_ = dns_blocker;
abort_flag_ = false;
}
DnsResolver::~DnsResolver() {
}
void DnsResolver::ClearCache() {
cache_.clear();
}
bool DnsResolver::Resolve(const char *hostname, IpAddr *result) {
int attempt = 0;
static const uint8 retry_delays[] = {1, 2, 3, 5, 10, 20, 40, 60};
static const uint8 retry_delays[] = {1, 2, 3, 5, 10};
char buf[kSizeOfAddress];
memset(result, 0, sizeof(IpAddr));
if (inet_pton(AF_INET6, hostname, &result->sin6.sin6_addr) == 1) {
result->sin.sin_family = AF_INET6;
return true;
}
if (inet_pton(AF_INET, hostname, &result->sin.sin_addr) == 1) {
result->sin.sin_family = AF_INET;
return true;
}
// First check cache
for (auto it = cache_.begin(); it != cache_.end(); ++it) {
if (it->name == hostname) {
*result = it->ip;
RINFO("Resolved %s to %s%s", hostname, PrintIpAddr(*result, buf), " (cached)");
return true;
}
}
#if defined(OS_WIN)
// Then disable dns blocker (otherwise the windows dns client service can't resolve)
if (dns_blocker_ && dns_blocker_->IsActive()) {
RINFO("Disabling DNS blocker to resolve %s", hostname);
dns_blocker_->RestoreDns();
}
#endif // defined(OS_WIN)
for (;;) {
hostent *he = gethostbyname(name);
if (he || exit_flag == NULL || *exit_flag)
return he;
hostent *he = gethostbyname(hostname);
if (abort_flag_)
return false;
RINFO("Unable to resolve %s. Trying again in %d second(s)", name, retry_delays[attempt]);
if (he) {
result->sin.sin_family = AF_INET;
result->sin.sin_port = 0;
memcpy(&result->sin.sin_addr, he->h_addr_list[0], 4);
// add to cache
cache_.emplace_back(hostname, *result);
RINFO("Resolved %s to %s%s", hostname, PrintIpAddr(*result, buf), "");
return true;
}
RINFO("Unable to resolve %s. Trying again in %d second(s)", hostname, retry_delays[attempt]);
OsInterruptibleSleep(retry_delays[attempt] * 1000);
if (*exit_flag)
return NULL;
if (abort_flag_)
return false;
if (attempt != ARRAY_SIZE(retry_delays) - 1)
attempt++;
@ -78,7 +145,9 @@ struct hostent *gethostbyname_retry_on_failure(const char * name, bool *exit_fla
}
static bool ParseSockaddrInWithPort(char *s, IpAddr *sin, bool *exit_flag) {
static bool ParseSockaddrInWithPort(char *s, IpAddr *sin, DnsResolver *resolver) {
memset(sin, 0, sizeof(IpAddr));
if (*s == '[') {
char *end = strchr(s, ']');
@ -97,30 +166,20 @@ static bool ParseSockaddrInWithPort(char *s, IpAddr *sin, bool *exit_flag) {
char *x = strchr(s, ':');
if (!x) return false;
*x = 0;
hostent *he = gethostbyname_retry_on_failure(s, exit_flag);
if (!he) {
if (!resolver->Resolve(s, sin)) {
RERROR("Unable to resolve %s", s);
return false;
}
sin->sin.sin_family = AF_INET;
sin->sin.sin_port = htons(atoi(x + 1));
memcpy(&sin->sin.sin_addr, he->h_addr_list[0], 4);
return true;
}
static bool ParseSockaddrInWithoutPort(char *s, IpAddr *sin, bool *exit_flag) {
memset(sin, 0, sizeof(IpAddr));
if (inet_pton(AF_INET6, s, &sin->sin6.sin6_addr) == 1) {
sin->sin.sin_family = AF_INET6;
return true;
}
hostent *he = gethostbyname_retry_on_failure(s, exit_flag);
if (!he) {
static bool ParseSockaddrInWithoutPort(char *s, IpAddr *sin, DnsResolver *resolver) {
if (!resolver->Resolve(s, sin)) {
RERROR("Unable to resolve %s", s);
return false;
}
sin->sin.sin_family = AF_INET;
memcpy(&sin->sin.sin_addr, he->h_addr_list[0], 4);
return true;
}
@ -131,7 +190,7 @@ static bool ParseBase64Key(const char *s, uint8 key[32]) {
class WgFileParser {
public:
WgFileParser(WireguardProcessor *wg, bool *exit_flag) : wg_(wg), exit_flag_(exit_flag) {}
WgFileParser(WireguardProcessor *wg, DnsResolver *resolver) : wg_(wg), dns_resolver_(resolver) {}
bool ParseFlag(const char *group, const char *key, char *value);
WireguardProcessor *wg_;
@ -142,7 +201,7 @@ public:
};
Peer pi_;
WgPeer *peer_ = NULL;
bool *exit_flag_;
DnsResolver *dns_resolver_;
bool had_interface_ = false;
};
@ -271,7 +330,7 @@ bool WgFileParser::ParseFlag(const char *group, const char *key, char *value) {
} else if (strcmp(key, "DNS") == 0) {
SplitString(value, ',', &ss);
for (size_t i = 0; i < ss.size(); i++) {
if (!ParseSockaddrInWithoutPort(ss[i], &sin, exit_flag_))
if (!ParseSockaddrInWithoutPort(ss[i], &sin, dns_resolver_))
return false;
if (!wg_->AddDnsServer(sin)) {
RERROR("Multiple DNS not allowed.");
@ -315,6 +374,13 @@ bool WgFileParser::ParseFlag(const char *group, const char *key, char *value) {
wg_->prepost().pre_up.emplace_back(value);
} else if (strcmp(key, "PreDown") == 0) {
wg_->prepost().pre_down.emplace_back(value);
} else if (strcmp(key, "ExcludedIPs") == 0) {
SplitString(value, ',', &ss);
for (size_t i = 0; i < ss.size(); i++) {
if (!ParseCidrAddr(ss[i], &addr))
return false;
wg_->AddExcludedIp(addr);
}
} else {
goto err;
}
@ -344,7 +410,7 @@ bool WgFileParser::ParseFlag(const char *group, const char *key, char *value) {
return false;
}
} else if (strcmp(key, "Endpoint") == 0) {
if (!ParseSockaddrInWithPort(value, &sin, exit_flag_))
if (!ParseSockaddrInWithPort(value, &sin, dns_resolver_))
return false;
peer_->SetEndpoint(sin);
} else if (strcmp(key, "PersistentKeepalive") == 0) {
@ -384,11 +450,20 @@ err:
return true;
}
bool ParseWireGuardConfigFile(WireguardProcessor *wg, const char *filename, bool *exit_flag) {
static bool ContainsNonAsciiCharacter(const char *buf, size_t size) {
for (size_t i = 0; i < size; i++) {
uint8 c = buf[i];
if (c < 32 && ((1 << c) & (1 << '\n' | 1 << '\r' | 1 << '\t')) == 0)
return true;
}
return false;
}
bool ParseWireGuardConfigFile(WireguardProcessor *wg, const char *filename, DnsResolver *dns_resolver) {
char buf[1024];
char group[32] = {0};
WgFileParser file_parser(wg, exit_flag);
WgFileParser file_parser(wg, dns_resolver);
RINFO("Loading file: %s", filename);
@ -400,6 +475,13 @@ bool ParseWireGuardConfigFile(WireguardProcessor *wg, const char *filename, bool
while (fgets(buf, sizeof(buf), f)) {
size_t l = strlen(buf);
if (ContainsNonAsciiCharacter(buf, l)) {
RERROR("File is not a config file: %s", filename);
return false;
}
while (l && is_space(buf[l - 1]))
buf[--l] = 0;
if (buf[0] == '#' || buf[0] == '\0')

View file

@ -3,13 +3,38 @@
#ifndef TINYVPN_TINYVPN_H_
#define TINYVPN_TINYVPN_H_
class WireguardProcessor;
#include "netapi.h"
bool ParseWireGuardConfigFile(WireguardProcessor *wg, const char *filename, bool *exit_flag);
class WireguardProcessor;
class DnsBlocker;
class DnsResolver {
public:
explicit DnsResolver(DnsBlocker *dns_blocker);
~DnsResolver();
bool Resolve(const char *hostname, IpAddr *result);
void ClearCache();
void SetAbortFlag(bool v) { abort_flag_ = v; }
private:
struct Entry {
std::string name;
IpAddr ip;
Entry(const std::string &name, const IpAddr &ip) : name(name), ip(ip) {}
};
std::vector<Entry> cache_;
bool abort_flag_;
DnsBlocker *dns_blocker_;
};
bool ParseWireGuardConfigFile(WireguardProcessor *wg, const char *filename, DnsResolver *dns_resolver);
#define kSizeOfAddress 64
const char *print_ip_prefix(char buf[kSizeOfAddress], int family, const void *ip, int prefixlen);
char *PrintIpAddr(const IpAddr &addr, char buf[kSizeOfAddress]);
#endif // TINYVPN_TINYVPN_H_

View file

@ -11,7 +11,7 @@
#include "util.h"
#include "crypto_ops.h"
#include "bit_ops.h"
#include "tunsafe_cpu.h"
#include "tunsafe_cpu.h"
#include <algorithm>
#include <assert.h>
#include <stdlib.h>
@ -23,97 +23,6 @@ static const uint8 kWgInitHash[WG_HASH_LEN] = {0x22,0x11,0xb3,0x61,0x08,0x1a,0xc
static const uint8 kWgInitChainingKey[WG_HASH_LEN] = {0x60,0xe2,0x6d,0xae,0xf3,0x27,0xef,0xc0,0x2e,0xc3,0x35,0xe2,0xa0,0x25,0xd2,0xd0,0x16,0xeb,0x42,0x06,0xf8,0x72,0x77,0xf5,0x2d,0x38,0xd1,0x98,0x8b,0x78,0xcd,0x36};
static const uint8 kCurve25519Basepoint[32] = {9};
IpToPeerMap::IpToPeerMap() {
}
IpToPeerMap::~IpToPeerMap() {
}
bool IpToPeerMap::InsertV4(const void *addr, int cidr, void *peer) {
uint32 mask = cidr == 32 ? 0xffffffff : ~(0xffffffff >> cidr);
Entry4 e = {ReadBE32(addr) & mask, mask, peer};
ipv4_.push_back(e);
return true;
}
bool IpToPeerMap::InsertV6(const void *addr, int cidr, void *peer) {
Entry6 e;
e.cidr_len = cidr;
e.peer = peer;
memcpy(e.ip, addr, 16);
ipv6_.push_back(e);
return true;
}
void *IpToPeerMap::LookupV4(uint32 ip) {
uint32 best_mask = 0;
void *best_peer = NULL;
for (auto it = ipv4_.begin(); it != ipv4_.end(); ++it) {
if (it->ip == (ip & it->mask) && it->mask >= best_mask) {
best_mask = it->mask;
best_peer = it->peer;
}
}
return best_peer;
}
void *IpToPeerMap::LookupV4DefaultPeer() {
for (auto it = ipv4_.begin(); it != ipv4_.end(); ++it) {
if (it->mask == 0)
return it->peer;
}
return NULL;
}
void *IpToPeerMap::LookupV6DefaultPeer() {
for (auto it = ipv6_.begin(); it != ipv6_.end(); ++it) {
if (it->cidr_len == 0)
return it->peer;
}
return NULL;
}
static int CalculateIPv6CommonPrefix(const uint8 *a, const uint8 *b) {
uint64 x = ToBE64(*(uint64*)&a[0] ^ *(uint64*)&b[0]);
uint64 y = ToBE64(*(uint64*)&a[8] ^ *(uint64*)&b[8]);
return x ? 64 - FindHighestSetBit64(x) : 128 - FindHighestSetBit64(y);
}
void *IpToPeerMap::LookupV6(const void *addr) {
int best_len = 0;
void *best_peer = NULL;
for (auto it = ipv6_.begin(); it != ipv6_.end(); ++it) {
int len = CalculateIPv6CommonPrefix((const uint8*)addr, it->ip);
if (len >= it->cidr_len && len >= best_len) {
best_len = len;
best_peer = it->peer;
}
}
return best_peer;
}
void IpToPeerMap::RemovePeer(void *peer) {
{
size_t n = ipv4_.size();
Entry4 *r = &ipv4_[0], *w = r;
for (size_t i = 0; i != n; i++, r++) {
if (r->peer != peer)
*w++ = *r;
}
ipv4_.resize(w - &ipv4_[0]);
}
{
size_t n = ipv6_.size();
Entry6 *r = &ipv6_[0], *w = r;
for (size_t i = 0; i != n; i++, r++) {
if (r->peer != peer)
*w++ = *r;
}
ipv6_.resize(w - &ipv6_[0]);
}
}
ReplayDetector::ReplayDetector() {
expected_seq_nr_ = 0;
memset(bitmap_, 0, sizeof(bitmap_));
@ -124,8 +33,9 @@ ReplayDetector::~ReplayDetector() {
bool ReplayDetector::CheckReplay(uint64 seq_nr) {
uint64 slot = seq_nr / BITS_PER_ENTRY;
if (seq_nr >= expected_seq_nr_) {
uint64 prev_slot = (expected_seq_nr_ + BITS_PER_ENTRY - 1) / BITS_PER_ENTRY - 1, n;
uint64 expected_seq_nr = expected_seq_nr_;
if (seq_nr >= expected_seq_nr) {
uint64 prev_slot = (expected_seq_nr + BITS_PER_ENTRY - 1) / BITS_PER_ENTRY - 1, n;
if ((n = slot - prev_slot) != 0) {
size_t nn = (size_t)std::min<uint64>(n, BITMAP_SIZE);
do {
@ -133,7 +43,7 @@ bool ReplayDetector::CheckReplay(uint64 seq_nr) {
} while (--nn);
}
expected_seq_nr_ = seq_nr + 1;
} else if (seq_nr + WINDOW_SIZE <= expected_seq_nr_) {
} else if (seq_nr + WINDOW_SIZE <= expected_seq_nr) {
return false;
}
uint32 mask = 1 << (seq_nr & (BITS_PER_ENTRY - 1)), prev;
@ -146,21 +56,21 @@ WgDevice::WgDevice() {
peers_ = NULL;
header_obfuscation_ = false;
next_rng_slot_ = 0;
last_complete_handskake_timestamp_ = 0;
memset(&compression_header_, 0, sizeof(compression_header_));
low_resolution_timestamp_ = cookie_secret_timestamp_ = OsGetMilliseconds();
OsGetRandomBytes(cookie_secret_, sizeof(cookie_secret_));
OsGetRandomBytes((uint8*)random_number_input_, sizeof(random_number_input_));
SetCurrentThreadAsMainThread();
}
WgDevice::~WgDevice() {
}
void WgDevice::SecondLoop(uint64 now) {
low_resolution_timestamp_ = now;
assert(IsMainThread());
low_resolution_timestamp_ = now;
if (rate_limiter_.is_used()) {
uint32 k[5];
for (size_t i = 0; i < ARRAY_SIZE(k); i++)
@ -170,11 +80,16 @@ void WgDevice::SecondLoop(uint64 now) {
}
uint32 WgDevice::InsertInKeyIdLookup(WgPeer *peer, WgKeypair *kp) {
assert(IsMainThread());
assert(peer);
for (;;) {
uint32 v = GetRandomNumber();
if (v == 0)
continue;
// Take the exclusive lock since we're modifying it.
WG_SCOPED_RWLOCK_EXCLUSIVE(key_id_lookup_lock_);
std::pair<WgPeer*, WgKeypair*> &peer_and_keypair = key_id_lookup_[v];
if (peer_and_keypair.first == NULL) {
peer_and_keypair = std::make_pair(peer, kp);
@ -188,7 +103,24 @@ uint32 WgDevice::InsertInKeyIdLookup(WgPeer *peer, WgKeypair *kp) {
}
}
std::pair<WgPeer*, WgKeypair*> *WgDevice::LookupPeerInKeyIdLookup(uint32 key_id) {
// This function is only ever called by the main thread, so no need to lock,
// since the main thread is the only mutator.
assert(IsMainThread());
auto it = key_id_lookup_.find(key_id);
return (it != key_id_lookup_.end() && it->second.second == NULL) ? &it->second : NULL;
}
WgKeypair *WgDevice::LookupKeypairByKeyId(uint32 key_id) {
// This function can be called from any thread, so make sure to
// lock using the shared lock.
WG_SCOPED_RWLOCK_SHARED(key_id_lookup_lock_);
auto it = key_id_lookup_.find(key_id);
return (it != key_id_lookup_.end()) ? it->second.second : NULL;
}
uint32 WgDevice::GetRandomNumber() {
assert(IsMainThread());
size_t slot;
if ((slot = next_rng_slot_) == 0) {
blake2s(random_number_output_, sizeof(random_number_output_), random_number_input_, sizeof(random_number_input_), NULL, 0);
@ -232,6 +164,7 @@ void WgDevice::Initialize(const uint8 private_key[WG_PUBLIC_KEY_LEN]) {
}
WgPeer *WgDevice::AddPeer() {
assert(IsMainThread());
WgPeer *peer = new WgPeer(this);
WgPeer **pp = &peers_;
while (*pp)
@ -241,6 +174,8 @@ WgPeer *WgDevice::AddPeer() {
}
WgPeer *WgDevice::GetPeerFromPublicKey(uint8 public_key[WG_PUBLIC_KEY_LEN]) {
assert(IsMainThread());
// todo: add O(1) lookup
for (WgPeer *peer = peers_; peer; peer = peer->next_peer_) {
if (memcmp(peer->s_remote_, public_key, WG_PUBLIC_KEY_LEN) == 0)
return peer;
@ -249,15 +184,16 @@ WgPeer *WgDevice::GetPeerFromPublicKey(uint8 public_key[WG_PUBLIC_KEY_LEN]) {
}
bool WgDevice::CheckCookieMac1(Packet *packet) {
assert(IsMainThread());
uint8 mac[WG_COOKIE_LEN];
const uint8 *data = packet->data;
size_t data_size = packet->size;
blake2s(mac, sizeof(mac), data, data_size - WG_COOKIE_LEN * 2, precomputed_mac1_key_, sizeof(precomputed_mac1_key_));
return !memcmp_crypto(mac, data + data_size - WG_COOKIE_LEN * 2, WG_COOKIE_LEN);
}
void WgDevice::MakeCookie(uint8 cookie[WG_COOKIE_LEN], Packet *packet) {
assert(IsMainThread());
blake2s_state b2s;
uint64 now = OsGetMilliseconds();
if (now - cookie_secret_timestamp_ >= COOKIE_SECRET_MAX_AGE_MS) {
@ -274,6 +210,7 @@ void WgDevice::MakeCookie(uint8 cookie[WG_COOKIE_LEN], Packet *packet) {
}
bool WgDevice::CheckCookieMac2(Packet *packet) {
assert(IsMainThread());
uint8 cookie[WG_COOKIE_LEN];
uint8 mac[WG_COOKIE_LEN];
MakeCookie(cookie, packet);
@ -282,6 +219,7 @@ bool WgDevice::CheckCookieMac2(Packet *packet) {
}
void WgDevice::CreateCookieMessage(MessageHandshakeCookie *dst, Packet *packet, uint32 remote_key_id) {
assert(IsMainThread());
dst->type = MESSAGE_HANDSHAKE_COOKIE;
dst->receiver_key_id = remote_key_id;
MakeCookie(dst->cookie_enc, packet);
@ -290,7 +228,7 @@ void WgDevice::CreateCookieMessage(MessageHandshakeCookie *dst, Packet *packet,
xchacha20poly1305_encrypt(dst->cookie_enc, dst->cookie_enc, WG_COOKIE_LEN, mac->mac1, WG_COOKIE_LEN, dst->nonce, precomputed_cookie_key_);
}
void WgDevice::EraseKeypairAddrEntry(WgKeypair *kp) {
void WgDevice::EraseKeypairAddrEntry_Locked(WgKeypair *kp) {
WgAddrEntry *ae = kp->addr_entry;
assert(ae->ref_count >= 1);
@ -308,14 +246,28 @@ void WgDevice::EraseKeypairAddrEntry(WgKeypair *kp) {
}
}
void WgDevice::UpdateKeypairAddrEntry(uint64 addr_id, WgKeypair *keypair) {
if (keypair->addr_entry != NULL && keypair->addr_entry->addr_entry_id == addr_id) {
keypair->broadcast_short_key = 1;
return;
WgKeypair *WgDevice::LookupKeypairInAddrEntryMap(uint64 addr, uint32 slot) {
WG_SCOPED_RWLOCK_SHARED(addr_entry_lookup_lock_);
auto it = addr_entry_lookup_.find(addr);
if (it == addr_entry_lookup_.end())
return NULL;
WgAddrEntry *addr_entry = it->second;
return addr_entry->keys[slot];
}
void WgDevice::UpdateKeypairAddrEntry_Locked(uint64 addr_id, WgKeypair *keypair) {
assert(keypair->peer->IsPeerLocked());
{
WG_SCOPED_RWLOCK_SHARED(addr_entry_lookup_lock_);
if (keypair->addr_entry != NULL && keypair->addr_entry->addr_entry_id == addr_id) {
keypair->broadcast_short_key = 1;
return;
}
}
WG_SCOPED_RWLOCK_EXCLUSIVE(addr_entry_lookup_lock_);
if (keypair->addr_entry != NULL)
EraseKeypairAddrEntry(keypair);
EraseKeypairAddrEntry_Locked(keypair);
WgAddrEntry **aep = &addr_entry_lookup_[addr_id], *ae;
@ -362,13 +314,16 @@ void WgDevice::SetHeaderObfuscation(const char *key) {
WgPeer::WgPeer(WgDevice *dev) {
assert(dev->IsMainThread());
dev_ = dev;
endpoint_.sin.sin_family = 0;
next_peer_ = NULL;
curr_keypair_ = next_keypair_ = prev_keypair_ = NULL;
expect_cookie_reply_ = false;
has_mac2_cookie_ = false;
pending_keepalive_ = false;
allow_multicast_through_peer_ = false;
allow_endpoint_change_ = true;
supports_handshake_extensions_ = true;
local_key_id_during_hs_ = 0;
last_handshake_init_timestamp_ = -1000000ll;
@ -380,20 +335,43 @@ WgPeer::WgPeer(WgDevice *dev) {
last_queued_packet_ptr_ = &first_queued_packet_;
num_queued_packets_ = 0;
handshake_attempts_ = 0;
total_handshake_attempts_ = 0;
num_ciphers_ = 0;
cipher_prio_ = 0;
main_thread_scheduled_ = 0;
memset(last_timestamp_, 0, sizeof(last_timestamp_));
ipv4_broadcast_addr_ = 0xffffffff;
memset(features_, 0, sizeof(features_));
}
WgPeer::~WgPeer() {
ClearKeys();
ClearHandshake();
ClearPacketQueue();
assert(dev_->IsMainThread());
WG_ACQUIRE_LOCK(mutex_);
ClearKeys_Locked();
ClearHandshake_Locked();
ClearPacketQueue_Locked();
WG_RELEASE_LOCK(mutex_);
}
void WgPeer::ClearPacketQueue() {
void WgPeer::ClearKeys_Locked() {
assert(dev_->IsMainThread() && IsPeerLocked());
DeleteKeypair(&curr_keypair_);
DeleteKeypair(&next_keypair_);
DeleteKeypair(&prev_keypair_);
}
void WgPeer::ClearHandshake_Locked() {
assert(dev_->IsMainThread() && IsPeerLocked());
uint32 v = local_key_id_during_hs_;
if (v != 0) {
local_key_id_during_hs_ = 0;
WG_SCOPED_RWLOCK_EXCLUSIVE(dev_->key_id_lookup_lock_);
dev_->key_id_lookup_.erase(v);
}
}
void WgPeer::ClearPacketQueue_Locked() {
assert(dev_->IsMainThread() && IsPeerLocked());
Packet *packet;
while ((packet = first_queued_packet_) != NULL) {
first_queued_packet_ = packet->next;
@ -422,6 +400,8 @@ void WgPeer::Initialize(const uint8 spub[WG_PUBLIC_KEY_LEN], const uint8 preshar
// run on the client
void WgPeer::CreateMessageHandshakeInitiation(Packet *packet) {
assert(dev_->IsMainThread());
uint8 k[WG_SYMMETRIC_KEY_LEN];
MessageHandshakeInitiation *dst = (MessageHandshakeInitiation *)packet->data;
@ -463,7 +443,6 @@ void WgPeer::CreateMessageHandshakeInitiation(Packet *packet) {
packet->size = (unsigned)(sizeof(MessageHandshakeInitiation) + extfield_size);
// Insert a pointer to this object,
dst->sender_key_id = dev_->InsertInKeyIdLookup(this, NULL);
dst->type = MESSAGE_HANDSHAKE_INITIATION;
memzero_crypto(k, sizeof(k));
@ -472,6 +451,7 @@ void WgPeer::CreateMessageHandshakeInitiation(Packet *packet) {
// Parsed by server
WgPeer *WgPeer::ParseMessageHandshakeInitiation(WgDevice *dev, Packet *packet) { // const MessageHandshakeInitiation *src, MessageHandshakeResponse *dst) {
assert(dev->IsMainThread());
// Copy values into handshake once we've validated it all.
uint8 ci[WG_HASH_LEN];
uint8 hi[WG_HASH_LEN];
@ -562,9 +542,14 @@ WgPeer *WgPeer::ParseMessageHandshakeInitiation(WgDevice *dev, Packet *packet) {
BlakeMix(hi, t, sizeof(t));
dst->receiver_key_id = remote_key_id;
keypair = peer->CreateNewKeypair(false, ci, remote_key_id, extbuf + WG_TIMESTAMP_LEN, extfield_size);
keypair = WgPeer::CreateNewKeypair(false, ci, remote_key_id, extbuf + WG_TIMESTAMP_LEN, extfield_size);
if (keypair) {
peer->InsertKeypairInPeer(keypair);
WG_ACQUIRE_LOCK(peer->mutex_);
peer->InsertKeypairInPeer_Locked(keypair);
peer->OnHandshakeAuthComplete();
WG_RELEASE_LOCK(peer->mutex_);
dst->sender_key_id = dev->InsertInKeyIdLookup(peer, keypair);
size_t extfield_out_size = 0;
@ -593,15 +578,15 @@ getout:
}
WgPeer *WgPeer::ParseMessageHandshakeResponse(WgDevice *dev, const Packet *packet) {
assert(dev->IsMainThread());
MessageHandshakeResponse *src = (MessageHandshakeResponse *)packet->data;
uint8 t[WG_HASH_LEN];
uint8 k[WG_SYMMETRIC_KEY_LEN];
WgKeypair *keypair;
auto it = dev->key_id_lookup().find(src->receiver_key_id);
if (it == dev->key_id_lookup().end() || it->second.second != NULL)
auto peer_and_keypair = dev->LookupPeerInKeyIdLookup(src->receiver_key_id);
if (peer_and_keypair == NULL)
return NULL;
WgPeer *peer = it->second.first;
WgPeer *peer = peer_and_keypair->first;
assert(src->receiver_key_id == peer->local_key_id_during_hs_);
HandshakeState hs = peer->hs_;
@ -626,16 +611,18 @@ WgPeer *WgPeer::ParseMessageHandshakeResponse(WgDevice *dev, const Packet *packe
if (!chacha20poly1305_decrypt(src->empty_enc, src->empty_enc, extfield_size + sizeof(src->empty_enc), hs.hi, sizeof(hs.hi), 0, k))
goto getout;
keypair = peer->CreateNewKeypair(true, hs.ci, src->sender_key_id, src->empty_enc, extfield_size);
keypair = WgPeer::CreateNewKeypair(true, hs.ci, src->sender_key_id, src->empty_enc, extfield_size);
if (!keypair)
goto getout;
peer->InsertKeypairInPeer(keypair);
// Re-map the entry in the id table so it points at this keypair instead.
keypair->local_key_id = peer->local_key_id_during_hs_;
peer->local_key_id_during_hs_ = 0;
it->second.second = keypair;
peer_and_keypair->second = keypair;
WG_ACQUIRE_LOCK(peer->mutex_);
peer->InsertKeypairInPeer_Locked(keypair);
WG_RELEASE_LOCK(peer->mutex_);
if (0) {
getout:
@ -650,11 +637,12 @@ getout:
// This is parsed by the initiator, when it needs to re-send the handshake message with a better mac.
void WgPeer::ParseMessageHandshakeCookie(WgDevice *dev, const MessageHandshakeCookie *src) {
assert(dev->IsMainThread());
uint8 cookie[WG_COOKIE_LEN];
auto it = dev->key_id_lookup().find(src->receiver_key_id);
if (it == dev->key_id_lookup().end() || it->second.second != NULL)
auto peer_and_keypair = dev->LookupPeerInKeyIdLookup(src->receiver_key_id);
if (!peer_and_keypair)
return;
WgPeer *peer = it->second.first;
WgPeer *peer = peer_and_keypair->first;
if (!peer->expect_cookie_reply_)
return;
if (!xchacha20poly1305_decrypt(cookie, src->cookie_enc, sizeof(src->cookie_enc),
@ -756,6 +744,7 @@ void WgKeypairSetupCompressionExtension(WgKeypair *keypair, const WgPacketCompre
state->server_addr_v4_subnet_bytes = (remotec->flags & 3);
WriteLE32(&state->server_addr_v4_netmask, 0xffffffff >> ((remotec->flags & 3) * 8));
}
bool WgKeypairParseExtendedHandshake(WgKeypair *keypair, const uint8 *data, size_t data_size) {
bool did_setup_compression = false;
@ -804,33 +793,29 @@ bool WgKeypairParseExtendedHandshake(WgKeypair *keypair, const uint8 *data, size
#endif // WITH_HANDSHAKE_EXT
void WgPeer::ClearKeys() {
DeleteKeypair(&curr_keypair_);
DeleteKeypair(&next_keypair_);
DeleteKeypair(&prev_keypair_);
}
void WgPeer::ClearHandshake() {
uint32 v = local_key_id_during_hs_;
if (v != 0) {
local_key_id_during_hs_ = 0;
dev_->key_id_lookup_.erase(v);
}
static void ActualFreeKeypair(void *x) {
WgKeypair *t = (WgKeypair*)x;
if (t->aes_gcm128_context_)
free(t->aes_gcm128_context_);
delete t;
}
void WgPeer::DeleteKeypair(WgKeypair **kp) {
WgKeypair *t = *kp;
*kp = NULL;
if (t) {
if (t->addr_entry)
dev_->EraseKeypairAddrEntry(t);
if (t->local_key_id)
assert(t->peer->IsPeerLocked());
if (t->addr_entry) {
WG_SCOPED_RWLOCK_EXCLUSIVE(dev_->addr_entry_lookup_lock_);
dev_->EraseKeypairAddrEntry_Locked(t);
}
if (t->local_key_id) {
WG_SCOPED_RWLOCK_EXCLUSIVE(dev_->key_id_lookup_lock_);
dev_->key_id_lookup_.erase(t->local_key_id);
if (t->aes_gcm128_context_)
free(t->aes_gcm128_context_);
delete t;
t->local_key_id = 0;
}
t->recv_key_state = WgKeypair::KEY_INVALID;
dev_->delayed_delete_.Add(&ActualFreeKeypair, t);
}
}
@ -840,21 +825,24 @@ WgKeypair *WgPeer::CreateNewKeypair(bool is_initiator, const uint8 chaining_key[
if (!kp)
return NULL;
memset(kp, 0, offsetof(WgKeypair, replay_detector));
kp->peer = this;
kp->is_initiator = is_initiator;
kp->remote_key_id = remote_key_id;
kp->auth_tag_length = CHACHA20POLY1305_AUTHTAGLEN;
#if WITH_HANDSHAKE_EXT
if (!WgKeypairParseExtendedHandshake(kp, extfield, extfield_size))
goto fail;
if (!WgKeypairParseExtendedHandshake(kp, extfield, extfield_size)) {
fail:
delete kp;
return NULL;
}
#endif // WITH_HANDSHAKE_EXT
first_key = kp->send_key, second_key = kp->recv_key;
if (!is_initiator)
std::swap(first_key, second_key);
blake2s_hkdf(first_key, sizeof(kp->send_key), second_key, sizeof(kp->recv_key),
kp->auth_tag_length != CHACHA20POLY1305_AUTHTAGLEN ? (uint8*)kp->compress_mac_keys : NULL, 32, NULL, 0, chaining_key, WG_HASH_LEN);
kp->auth_tag_length != CHACHA20POLY1305_AUTHTAGLEN ? (uint8*)kp->compress_mac_keys : NULL, 32,
NULL, 0, chaining_key, WG_HASH_LEN);
if (!is_initiator) {
std::swap(kp->compress_mac_keys[0][0], kp->compress_mac_keys[1][0]);
@ -870,25 +858,22 @@ WgKeypair *WgPeer::CreateNewKeypair(bool is_initiator, const uint8 chaining_key[
int key_size = (kp->cipher_suite == EXT_CIPHER_SUITE_AES128_GCM) ? 128 : 256;
CRYPTO_gcm128_init(&kp->aes_gcm128_context_[0], kp->send_key, key_size);
CRYPTO_gcm128_init(&kp->aes_gcm128_context_[1], kp->recv_key, key_size);
#else
#else // WITH_AESGCM
goto fail;
#endif
#endif // WITH_AESGCM
}
#endif // WITH_HANDSHAKE_EXT
kp->send_key_state = kp->recv_key_state = WgKeypair::KEY_VALID;
time_of_next_key_event_ = 0;
kp->key_timestamp = OsGetMilliseconds();
return kp;
fail:
delete kp;
return NULL;
}
void WgPeer::InsertKeypairInPeer(WgKeypair *kp) {
assert(kp->peer == this);
void WgPeer::InsertKeypairInPeer_Locked(WgKeypair *kp) {
assert(dev_->IsMainThread() && IsPeerLocked());
assert(kp->peer == NULL);
kp->peer = this;
time_of_next_key_event_ = 0;
DeleteKeypair(&prev_keypair_);
if (kp->is_initiator) {
// When we're the initator then we got the handshake and we can
@ -908,7 +893,8 @@ void WgPeer::InsertKeypairInPeer(WgKeypair *kp) {
}
}
bool WgPeer::CheckSwitchToNextKey(WgKeypair *keypair) {
bool WgPeer::CheckSwitchToNextKey_Locked(WgKeypair *keypair) {
assert(IsPeerLocked());
if (keypair != next_keypair_)
return false;
DeleteKeypair(&prev_keypair_);
@ -920,6 +906,7 @@ bool WgPeer::CheckSwitchToNextKey(WgKeypair *keypair) {
}
bool WgPeer::CheckHandshakeRateLimit() {
assert(dev_->IsMainThread());
uint64 now = OsGetMilliseconds();
if (now - last_handshake_init_timestamp_ < REKEY_TIMEOUT_MS)
return false;
@ -928,6 +915,7 @@ bool WgPeer::CheckHandshakeRateLimit() {
}
void WgPeer::WriteMacToPacket(const uint8 *data, MessageMacs *dst) {
assert(dev_->IsMainThread());
expect_cookie_reply_ = true;
blake2s(dst->mac1, sizeof(dst->mac1), data, (uint8*)dst->mac1 - data, precomputed_mac1_key_, sizeof(precomputed_mac1_key_));
memcpy(sent_mac1_, dst->mac1, sizeof(sent_mac1_));
@ -964,6 +952,7 @@ enum {
#define WgSetTimer(x) (timers_ |= (32 << (x)))
void WgPeer::OnDataSent() {
assert(IsPeerLocked());
WgClearTimer(TIMER_SEND_KEEPALIVE);
if (!WgIsTimerActive(TIMER_NEW_HANDSHAKE))
WgSetTimer(TIMER_NEW_HANDSHAKE);
@ -971,10 +960,12 @@ void WgPeer::OnDataSent() {
}
void WgPeer::OnKeepaliveSent() {
assert(IsPeerLocked());
WgSetTimer(TIMER_PERSISTENT_KEEPALIVE);
}
void WgPeer::OnDataReceived() {
assert(IsPeerLocked());
WgClearTimer(TIMER_NEW_HANDSHAKE);
if (!WgIsTimerActive(TIMER_SEND_KEEPALIVE))
WgSetTimer(TIMER_SEND_KEEPALIVE);
@ -984,16 +975,19 @@ void WgPeer::OnDataReceived() {
}
void WgPeer::OnKeepaliveReceived() {
assert(IsPeerLocked());
WgClearTimer(TIMER_NEW_HANDSHAKE);
WgSetTimer(TIMER_PERSISTENT_KEEPALIVE);
}
void WgPeer::OnHandshakeInitSent() {
assert(IsPeerLocked());
WgClearTimer(TIMER_SEND_KEEPALIVE);
WgSetTimer(TIMER_RETRANSMIT_HANDSHAKE);
}
void WgPeer::OnHandshakeAuthComplete() {
assert(IsPeerLocked());
WgClearTimer(TIMER_NEW_HANDSHAKE);
WgSetTimer(TIMER_ZERO_KEYS);
WgSetTimer(TIMER_PERSISTENT_KEEPALIVE);
@ -1007,8 +1001,11 @@ static const char * const kCipherSuites[] = {
};
void WgPeer::OnHandshakeFullyComplete() {
assert(IsPeerLocked());
WgClearTimer(TIMER_RETRANSMIT_HANDSHAKE);
handshake_attempts_ = 0;
total_handshake_attempts_ = handshake_attempts_ = 0;
uint64 now = OsGetMilliseconds();
if (last_complete_handskake_timestamp_ == 0) {
bool any_feature = false;
@ -1022,17 +1019,15 @@ void WgPeer::OnHandshakeFullyComplete() {
curr_keypair_->enabled_features[4] ? "skip_keyid_in" : "",
curr_keypair_->enabled_features[5] ? "skip_keyid_out" : "");
}
}
last_complete_handskake_timestamp_ = OsGetMilliseconds();
dev_->last_complete_handskake_timestamp_ = last_complete_handskake_timestamp_;
last_complete_handskake_timestamp_ = now;
// RINFO("Connection established.");
}
// Check if any of the timeouts have expired
uint32 WgPeer::CheckTimeouts(uint64 now) {
assert(IsPeerLocked());
uint32 t, rv = 0;
if (now >= time_of_next_key_event_)
@ -1056,11 +1051,9 @@ uint32 WgPeer::CheckTimeouts(uint64 now) {
if ((t & (1 << TIMER_RETRANSMIT_HANDSHAKE)) && (now32 - timer_value_[TIMER_RETRANSMIT_HANDSHAKE]) >= REKEY_TIMEOUT_MS) {
t ^= (1 << TIMER_RETRANSMIT_HANDSHAKE);
if (handshake_attempts_ > MAX_HANDSHAKE_ATTEMPTS) {
RINFO("Too many handshake attempts. Stopping.");
t &= ~(1 << TIMER_SEND_KEEPALIVE);
ClearPacketQueue();
ClearPacketQueue_Locked();
} else {
RINFO("Retrying handshake, attempt %d...", handshake_attempts_ + 2);
handshake_attempts_++;
rv |= ACTION_SEND_HANDSHAKE;
}
@ -1085,13 +1078,12 @@ uint32 WgPeer::CheckTimeouts(uint64 now) {
t &= ~(1 << TIMER_NEW_HANDSHAKE);
handshake_attempts_ = 0;
rv |= ACTION_SEND_HANDSHAKE;
RINFO("Retrying handshake with peer");
}
if ((t & (1 << TIMER_ZERO_KEYS)) && (now32 - timer_value_[TIMER_ZERO_KEYS]) >= REJECT_AFTER_TIME_MS * 3) {
RINFO("Expiring all keys for peer");
t &= ~(1 << TIMER_ZERO_KEYS);
ClearKeys();
ClearHandshake();
ClearKeys_Locked();
ClearHandshake_Locked();
}
}
timers_ = t;
@ -1100,6 +1092,7 @@ uint32 WgPeer::CheckTimeouts(uint64 now) {
// Check all key stuff here to avoid calling possibly expensive timestamp routines in the packet handler
void WgPeer::CheckAndUpdateTimeOfNextKeyEvent(uint64 now) {
assert(IsPeerLocked());
uint64 next_time = UINT64_MAX;
uint32 rv = 0;
@ -1110,8 +1103,7 @@ void WgPeer::CheckAndUpdateTimeOfNextKeyEvent(uint64 now) {
// if a peer is the initiator of a current secure session, WireGuard will send a handshake initiation
// message to begin a new secure session if, after transmitting a transport data message, the current secure session
// is REKEY_AFTER_TIME_MS old, or if after receiving a transport data message, the current secure session is
// (REKEY_AFTER_TIME_MS - KEEPALIVE_TIMEOUT_MS - REKEY_TIMEOUT_MS) seconds old and it has not yet acted upon
// this event.
// (REKEY_AFTER_TIME_MS - KEEPALIVE_TIMEOUT_MS - REKEY_TIMEOUT_MS) seconds old and it has not yet acted upon it.
if (now >= curr_keypair_->key_timestamp + (REJECT_AFTER_TIME_MS - KEEPALIVE_TIMEOUT_MS - REKEY_TIMEOUT_MS)) {
next_time = curr_keypair_->key_timestamp + REJECT_AFTER_TIME_MS;
if (curr_keypair_->recv_key_state == WgKeypair::KEY_VALID)
@ -1153,16 +1145,22 @@ void WgPeer::SetPersistentKeepalive(int persistent_keepalive_secs) {
}
bool WgPeer::AddIp(const WgCidrAddr &cidr_addr) {
assert(dev_->IsMainThread());
if (cidr_addr.size == 32) {
if (cidr_addr.cidr > 32)
return false;
WG_ACQUIRE_RWLOCK_EXCLUSIVE(dev_->ip_to_peer_map_lock_);
dev_->ip_to_peer_map_.InsertV4(cidr_addr.addr, cidr_addr.cidr, this);
WG_RELEASE_RWLOCK_EXCLUSIVE(dev_->ip_to_peer_map_lock_);
allowed_ips_.push_back(cidr_addr);
return true;
} else if (cidr_addr.size == 128) {
if (cidr_addr.cidr > 128)
return false;
WG_ACQUIRE_RWLOCK_EXCLUSIVE(dev_->ip_to_peer_map_lock_);
dev_->ip_to_peer_map_.InsertV6(cidr_addr.addr, cidr_addr.cidr, this);
WG_RELEASE_RWLOCK_EXCLUSIVE(dev_->ip_to_peer_map_lock_);
allowed_ips_.push_back(cidr_addr);
return true;
} else {
@ -1183,14 +1181,13 @@ bool WgPeer::AddCipher(int cipher) {
return false;
if (cipher == EXT_CIPHER_SUITE_AES128_GCM || cipher == EXT_CIPHER_SUITE_AES256_GCM) {
#if !WITH_AESGCM
return true;
#endif // !WITH_AESGCM
#if defined(ARCH_CPU_X86_FAMILY) && WITH_AESGCM
if (!X86_PCAP_AES)
return true;
#else
return true;
#endif // defined(ARCH_CPU_X86_FAMILY) && WITH_AESGCM
}
ciphers_[num_ciphers_++] = cipher;
return true;
}
@ -1214,15 +1211,10 @@ void WgRateLimit::Periodic(uint32 s[5]) {
if (per_sec < 1)
per_sec = 1;
}
if ((unsigned)per_sec > packets_per_sec_)
per_sec = (per_sec + packets_per_sec_ + 1) >> 1;
// if (per_sec != packets_per_sec_) {
// RINFO("Setting pps: %d", per_sec);
packets_per_sec_ = per_sec;
// }
packets_per_sec_ = per_sec;
used_rate_limit_ = 0;
rand_xor_ = s[4];
key2_[0] = key1_[0];
@ -1278,7 +1270,8 @@ void WgKeypairEncryptPayload(uint8 *dst, const size_t src_len,
bool WgKeypairDecryptPayload(uint8 *dst, size_t src_len,
const uint8 *ad, size_t ad_len,
const uint64 nonce, WgKeypair *keypair) {
uint8 mac[16];
__aligned(16) uint8 mac[16];
if (src_len < keypair->auth_tag_length)
return false;

View file

@ -4,9 +4,40 @@
#include "tunsafe_types.h"
#include "netapi.h"
#include "ipzip2/ipzip2.h"
#include "tunsafe_config.h"
#include "tunsafe_threading.h"
#include "ip_to_peer_map.h"
#include <vector>
#include <unordered_map>
#include <atomic>
// Threading macros that enable locks only in MT builds
#if WITH_WG_THREADING
#define WG_SCOPED_LOCK(name) AutoLock scoped_lock(&name)
#define WG_ACQUIRE_LOCK(name) name.Acquire()
#define WG_RELEASE_LOCK(name) name.Release()
#define WG_DECLARE_LOCK(name) Mutex name;
#define WG_DECLARE_RWLOCK(name) ReaderWriterLock name;
#define WG_ACQUIRE_RWLOCK_SHARED(name) name.AcquireShared()
#define WG_RELEASE_RWLOCK_SHARED(name) name.ReleaseShared()
#define WG_ACQUIRE_RWLOCK_EXCLUSIVE(name) name.AcquireExclusive()
#define WG_RELEASE_RWLOCK_EXCLUSIVE(name) name.ReleaseExclusive()
#define WG_SCOPED_RWLOCK_SHARED(name) ScopedLockShared scoped_lock(&name)
#define WG_SCOPED_RWLOCK_EXCLUSIVE(name) ScopedLockExclusive scoped_lock(&name)
#else // WITH_WG_THREADING
#define WG_SCOPED_LOCK(name)
#define WG_ACQUIRE_LOCK(name)
#define WG_RELEASE_LOCK(name)
#define WG_DECLARE_LOCK(name)
#define WG_DECLARE_RWLOCK(name)
#define WG_ACQUIRE_RWLOCK_SHARED(name)
#define WG_RELEASE_RWLOCK_SHARED(name)
#define WG_ACQUIRE_RWLOCK_EXCLUSIVE(name)
#define WG_RELEASE_RWLOCK_EXCLUSIVE(name)
#define WG_SCOPED_RWLOCK_SHARED(name)
#define WG_SCOPED_RWLOCK_EXCLUSIVE(name)
#endif // WITH_WG_THREADING
enum ProtocolTimeouts {
COOKIE_SECRET_MAX_AGE_MS = 120000,
@ -17,6 +48,8 @@ enum ProtocolTimeouts {
REJECT_AFTER_TIME_MS = 180000,
PERSISTENT_KEEPALIVE_MS = 25000,
MIN_HANDSHAKE_INTERVAL_MS = 20,
MAX_SIZE_OF_HANDSHAKE_EXTENSION = 1024,
};
enum ProtocolLimits {
@ -26,7 +59,6 @@ enum ProtocolLimits {
MAX_HANDSHAKE_ATTEMPTS = 20,
MAX_QUEUED_PACKETS_PER_PEER = 128,
MESSAGE_MINIMUM_SIZE = 16,
MAX_SIZE_OF_HANDSHAKE_EXTENSION = 1024,
};
enum MessageType {
@ -61,7 +93,7 @@ enum {
WG_ACK_HEADER_COUNTER_NONE = 0x00,
WG_ACK_HEADER_COUNTER_2 = 0x04,
WG_ACK_HEADER_COUNTER_4 = 0x08,
WG_ACK_HEADER_COUNTER_8 = 0x0C,
WG_ACK_HEADER_COUNTER_6 = 0x0C,
WG_ACK_HEADER_KEY_MASK = 3,
};
@ -166,39 +198,6 @@ STATIC_ASSERT(sizeof(WgPacketCompressionVer01) == 24, WgPacketCompressionVer01_w
struct WgKeypair;
class WgPeer;
// Maps CIDR addresses to a peer, always returning the longest match
class IpToPeerMap {
public:
IpToPeerMap();
~IpToPeerMap();
// Inserts an IP address of a given CIDR length into the lookup table, pointing to peer.
bool InsertV4(const void *addr, int cidr, void *peer);
bool InsertV6(const void *addr, int cidr, void *peer);
// Lookup the peer matching the IP Address
void *LookupV4(uint32 ip);
void *LookupV6(const void *addr);
void *LookupV4DefaultPeer();
void *LookupV6DefaultPeer();
// Remove a peer from the table
void RemovePeer(void *peer);
private:
struct Entry4 {
uint32 ip;
uint32 mask;
void *peer;
};
struct Entry6 {
uint8 ip[16];
uint8 cidr_len;
void *peer;
};
std::vector<Entry4> ipv4_;
std::vector<Entry6> ipv6_;
};
class WgRateLimit {
public:
@ -262,7 +261,6 @@ struct ScramblerSiphashKeys {
uint64 keys[4];
};
// Implementation of most business logic of Wireguard
class WgDevice {
friend class WgPeer;
friend class WireguardProcessor;
@ -272,7 +270,8 @@ public:
// Initialize with the private key, precompute all internal keys etc.
void Initialize(const uint8 private_key[WG_PUBLIC_KEY_LEN]);
// Create a new peer
WgPeer *AddPeer();
// Setup header obfuscation
@ -281,35 +280,26 @@ public:
// Check whether Mac1 appears to be valid
bool CheckCookieMac1(Packet *packet);
// Check whether Mac2 appears to be valid, this also uses
// the remote ip address
// Check whether Mac2 appears to be valid, this also uses the remote ip address
bool CheckCookieMac2(Packet *packet);
void CreateCookieMessage(MessageHandshakeCookie *dst, Packet *packet, uint32 remote_key_id);
void UpdateKeypairAddrEntry(uint64 addr_id, WgKeypair *keypair);
void UpdateKeypairAddrEntry_Locked(uint64 addr_id, WgKeypair *keypair);
void SecondLoop(uint64 now);
IpToPeerMap &ip_to_peer_map() { return ip_to_peer_map_; }
std::unordered_map<uint32, std::pair<WgPeer*, WgKeypair*> > &key_id_lookup() { return key_id_lookup_; }
WgPeer *first_peer() { return peers_; }
uint64 last_complete_handskake_timestamp() const {
return last_complete_handskake_timestamp_;
}
const uint8 *public_key() const { return s_pub_; }
void SecondLoop(uint64 now);
WgRateLimit *rate_limiter() { return &rate_limiter_; }
std::unordered_map<uint64, WgAddrEntry*> &addr_entry_map() { return addr_entry_lookup_; }
WgPacketCompressionVer01 *compression_header() { return &compression_header_; }
bool IsMainThread() { return CurrentThreadIdEquals(main_thread_id_); }
void SetCurrentThreadAsMainThread() { main_thread_id_ = GetCurrentThreadId(); }
private:
std::pair<WgPeer*, WgKeypair*> *LookupPeerInKeyIdLookup(uint32 key_id);
WgKeypair *LookupKeypairByKeyId(uint32 key_id);
WgKeypair *LookupKeypairInAddrEntryMap(uint64 addr, uint32 slot);
// Return the peer matching the |public_key| or NULL
WgPeer *GetPeerFromPublicKey(uint8 public_key[WG_PUBLIC_KEY_LEN]);
// Create a cookie by inspecting the source address of the |packet|
@ -319,12 +309,19 @@ private:
// Get a random number
uint32 GetRandomNumber();
void EraseKeypairAddrEntry(WgKeypair *kp);
void EraseKeypairAddrEntry_Locked(WgKeypair *kp);
// Maps IP addresses to peers
IpToPeerMap ip_to_peer_map_;
// This lock protects |ip_to_peer_map_|.
WG_DECLARE_RWLOCK(ip_to_peer_map_lock_);
// For enumerating all peers
WgPeer *peers_;
// Lock that protects key_id_lookup_
WG_DECLARE_RWLOCK(key_id_lookup_lock_);
// Mapping from key-id to either an active keypair (if keypair is non-NULL),
// or to a handshake.
std::unordered_map<uint32, std::pair<WgPeer*, WgKeypair*> > key_id_lookup_;
@ -332,6 +329,7 @@ private:
// Mapping from IPV4 IP/PORT to WgPeer*, so we can find the peer when a key id is
// not explicitly included.
std::unordered_map<uint64, WgAddrEntry*> addr_entry_lookup_;
WG_DECLARE_RWLOCK(addr_entry_lookup_lock_);
// Counter for generating new indices in |keypair_lookup_|
uint8 next_rng_slot_;
@ -339,7 +337,7 @@ private:
// Whether packet obfuscation is enabled
bool header_obfuscation_;
uint64 last_complete_handskake_timestamp_;
ThreadId main_thread_id_;
uint64 low_resolution_timestamp_;
@ -360,9 +358,12 @@ private:
WgRateLimit rate_limiter_;
WgPacketCompressionVer01 compression_header_;
// For defering deletes until all worker threads are guaranteed not to use an object.
MultithreadedDelayedDelete delayed_delete_;
};
// State for Noise handshake
// State for peer
class WgPeer {
friend class WgDevice;
friend class WireguardProcessor;
@ -387,10 +388,10 @@ public:
static WgPeer *ParseMessageHandshakeResponse(WgDevice *dev, const Packet *packet);
static void ParseMessageHandshakeCookie(WgDevice *dev, const MessageHandshakeCookie *src);
void CreateMessageHandshakeInitiation(Packet *packet);
bool CheckSwitchToNextKey(WgKeypair *keypair);
void ClearKeys();
void ClearHandshake();
void ClearPacketQueue();
bool CheckSwitchToNextKey_Locked(WgKeypair *keypair);
void ClearKeys_Locked();
void ClearHandshake_Locked();
void ClearPacketQueue_Locked();
bool CheckHandshakeRateLimit();
// Timer notifications
@ -408,23 +409,32 @@ public:
};
uint32 CheckTimeouts(uint64 now);
void AddPacketToPeerQueue(Packet *packet);
#if WITH_WG_THREADING
bool IsPeerLocked() { return mutex_.IsLocked(); }
#else // WITH_WG_THREADING
bool IsPeerLocked() { return true; }
#endif // WITH_WG_THREADING
private:
WgKeypair *CreateNewKeypair(bool is_initiator, const uint8 key[WG_HASH_LEN], uint32 send_key_id, const uint8 *extfield, size_t extfield_size);
static WgKeypair *CreateNewKeypair(bool is_initiator, const uint8 key[WG_HASH_LEN], uint32 send_key_id, const uint8 *extfield, size_t extfield_size);
void WriteMacToPacket(const uint8 *data, MessageMacs *mac);
void DeleteKeypair(WgKeypair **kp);
void CheckAndUpdateTimeOfNextKeyEvent(uint64 now);
static void CopyEndpointToPeer(WgKeypair *keypair, const IpAddr *addr);
static void CopyEndpointToPeer_Locked(WgKeypair *keypair, const IpAddr *addr);
size_t WriteHandshakeExtension(uint8 *dst, WgKeypair *keypair);
void InsertKeypairInPeer(WgKeypair *keypair);
void InsertKeypairInPeer_Locked(WgKeypair *keypair);
WgDevice *dev_;
WgPeer *next_peer_;
// Keypairs, |curr_keypair_| is the used one, the other ones are
// the old ones and the next one.
WgKeypair *curr_keypair_;
WgKeypair *prev_keypair_;
WgKeypair *next_keypair_;
WgKeypair *curr_keypair_, *prev_keypair_, *next_keypair_;
// Protects shared variables of the WgPeer
WG_DECLARE_LOCK(mutex_);
// Timestamp when the next key related event is going to occur.
uint64 time_of_next_key_event_;
@ -433,23 +443,38 @@ private:
uint32 timers_;
uint32 timer_value_[5];
// Holds the entry into the key id table during handshake
// Holds the entry into the key id table during handshake - mt only.
uint32 local_key_id_during_hs_;
// Address of peer
IpAddr endpoint_;
enum {
kMainThreadScheduled_ScheduleHandshake = 1,
};
std::atomic<uint32> main_thread_scheduled_;
WgPeer *main_thread_scheduled_next_;
// The broadcast address of the IPv4 network, used to block broadcast traffic
// from being sent out over the VPN link.
uint32 ipv4_broadcast_addr_;
// Whether the tunsafe specific handshake extensions are supported
bool supports_handshake_extensions_;
// Whether any data was sent since the keepalive timer was set
bool pending_keepalive_;
// Whether to change the endpoint on incoming packets.
bool allow_endpoint_change_;
// Whether we've sent a mac to the peer so we may expect a cookie reply back.
bool expect_cookie_reply_;
// Whether we want to route incoming multicast/broadcast traffic to this peer.
bool allow_multicast_through_peer_;
// Whether
// Whether |mac2_cookie_| is valid.
bool has_mac2_cookie_;
// Number of handshakes made so far, when this gets too high we stop connecting.
@ -462,11 +487,18 @@ private:
uint8 num_queued_packets_;
Packet *first_queued_packet_, **last_queued_packet_ptr_;
// For statistics
uint64 last_handshake_init_timestamp_;
uint64 last_complete_handskake_timestamp_;
uint64 last_handshake_init_recv_timestamp_;
enum { MAX_CIPHERS = 16 };
// Timestamp to detect flooding of handshakes
uint64 last_handshake_init_recv_timestamp_; // main thread only
// Number of handshake attempts since last successful handshake
uint32 total_handshake_attempts_;
// For dynamic ciphers, holds the list of supported ciphers.
enum { MAX_CIPHERS = 4 };
uint8 cipher_prio_;
uint8 num_ciphers_;
uint8 ciphers_[MAX_CIPHERS];
@ -482,19 +514,19 @@ private:
uint8 e_priv[WG_PUBLIC_KEY_LEN];
};
HandshakeState hs_;
// Remote's static public key - Written only by Init
// Remote's static public key - init only.
uint8 s_remote_[WG_PUBLIC_KEY_LEN];
// Remote's preshared key - Written only by Init
// Remote's preshared key - init only.
uint8 preshared_key_[WG_SYMMETRIC_KEY_LEN];
// Precomputed DH(spriv_local, spub_remote).
// Precomputed DH(spriv_local, spub_remote) - init only.
uint8 s_priv_pub_[WG_PUBLIC_KEY_LEN];
// The most recent seen timestamp, only accept higher timestamps.
uint8 last_timestamp_[WG_TIMESTAMP_LEN];
// Precomputed key for decrypting cookies from the peer.
// The most recent seen timestamp, only accept higher timestamps - mt only.
uint8 last_timestamp_[WG_TIMESTAMP_LEN];
// Precomputed key for decrypting cookies from the peer - init only.
uint8 precomputed_cookie_key_[WG_SYMMETRIC_KEY_LEN];
// Precomputed key for sending MACs to the peer.
// Precomputed key for sending MACs to the peer - init only.
uint8 precomputed_mac1_key_[WG_SYMMETRIC_KEY_LEN];
// The last mac value sent, required to make cookies
// The last mac value sent, required to make cookies - mt only.
uint8 sent_mac1_[WG_COOKIE_LEN];
// The mac2 cookie that gets appended to outgoing packets
uint8 mac2_cookie_[WG_COOKIE_LEN];
@ -520,10 +552,10 @@ public:
BITMAP_MASK = BITMAP_SIZE - 1,
};
uint64 expected_seq_nr() const { return expected_seq_nr_; }
const uint64 expected_seq_nr() const { return expected_seq_nr_; }
private:
uint64 expected_seq_nr_;
std::atomic<uint64> expected_seq_nr_;
uint32 bitmap_[BITMAP_SIZE];
};
@ -574,7 +606,7 @@ struct WgKeypair {
// Used so we know when to send out ack packets.
uint32 incoming_packet_count;
// Id of the key in my map
// Id of the key in my map. (MainThread)
uint32 local_key_id;
// Id of the key in their map
uint32 remote_key_id;
@ -602,7 +634,6 @@ struct WgKeypair {
// State for packet compressor
IpzipState ipzip_state_;
#endif // WITH_HANDSHAKE_EXT
};
void WgKeypairEncryptPayload(uint8 *dst, const size_t src_len,