Browse Source

local-tester: support failpoints

Anthony Romano 9 years ago
parent
commit
244266708b
2 changed files with 48 additions and 5 deletions
  1. 3 3
      tools/local-tester/Procfile
  2. 45 2
      tools/local-tester/faults.sh

+ 3 - 3
tools/local-tester/Procfile

@@ -14,8 +14,8 @@ faults: tools/local-tester/faults.sh
 
 stress-put: tools/benchmark/benchmark --endpoints=127.0.0.1:2379,127.0.0.1:22379,127.0.0.1:32379 --clients=27 --conns=3 put --sequential-keys --key-space-size=100000 --total=100000
 
-etcd1: bin/etcd --name infra1 --snapshot-count=1000 --listen-client-urls http://127.0.0.1:11119 --advertise-client-urls http://127.0.0.1:2379 --listen-peer-urls http://127.0.0.1:12380 --initial-advertise-peer-urls http://127.0.0.1:11111 --initial-cluster-token etcd-cluster-1 --initial-cluster 'infra1=http://127.0.0.1:11111,infra2=http://127.0.0.1:22222,infra3=http://127.0.0.1:33333' --initial-cluster-state new --enable-pprof
-etcd2: bin/etcd --name infra2 --snapshot-count=1000 --listen-client-urls http://127.0.0.1:22229 --advertise-client-urls http://127.0.0.1:22379 --listen-peer-urls http://127.0.0.1:22380 --initial-advertise-peer-urls http://127.0.0.1:22222 --initial-cluster-token etcd-cluster-1 --initial-cluster 'infra1=http://127.0.0.1:11111,infra2=http://127.0.0.1:22222,infra3=http://127.0.0.1:33333' --initial-cluster-state new --enable-pprof
-etcd3: bin/etcd --name infra3 --snapshot-count=1000 --listen-client-urls http://127.0.0.1:33339 --advertise-client-urls http://127.0.0.1:32379 --listen-peer-urls http://127.0.0.1:32380 --initial-advertise-peer-urls http://127.0.0.1:33333 --initial-cluster-token etcd-cluster-1 --initial-cluster 'infra1=http://127.0.0.1:11111,infra2=http://127.0.0.1:22222,infra3=http://127.0.0.1:33333' --initial-cluster-state new --enable-pprof
+etcd1: GOFAIL_HTTP="127.0.0.1:11180" bin/etcd --name infra1 --snapshot-count=1000 --listen-client-urls http://127.0.0.1:11119 --advertise-client-urls http://127.0.0.1:2379 --listen-peer-urls http://127.0.0.1:12380 --initial-advertise-peer-urls http://127.0.0.1:11111 --initial-cluster-token etcd-cluster-1 --initial-cluster 'infra1=http://127.0.0.1:11111,infra2=http://127.0.0.1:22222,infra3=http://127.0.0.1:33333' --initial-cluster-state new --enable-pprof
+etcd2: GOFAIL_HTTP="127.0.0.1:22280" bin/etcd --name infra2 --snapshot-count=1000 --listen-client-urls http://127.0.0.1:22229 --advertise-client-urls http://127.0.0.1:22379 --listen-peer-urls http://127.0.0.1:22380 --initial-advertise-peer-urls http://127.0.0.1:22222 --initial-cluster-token etcd-cluster-1 --initial-cluster 'infra1=http://127.0.0.1:11111,infra2=http://127.0.0.1:22222,infra3=http://127.0.0.1:33333' --initial-cluster-state new --enable-pprof
+etcd3: GOFAIL_HTTP="127.0.0.1:33380" bin/etcd --name infra3 --snapshot-count=1000 --listen-client-urls http://127.0.0.1:33339 --advertise-client-urls http://127.0.0.1:32379 --listen-peer-urls http://127.0.0.1:32380 --initial-advertise-peer-urls http://127.0.0.1:33333 --initial-cluster-token etcd-cluster-1 --initial-cluster 'infra1=http://127.0.0.1:11111,infra2=http://127.0.0.1:22222,infra3=http://127.0.0.1:33333' --initial-cluster-state new --enable-pprof
 # in future, use proxy to listen on 2379
 #proxy: bin/etcd --name infra-proxy1 --proxy=on --listen-client-urls http://127.0.0.1:2378 --initial-cluster 'infra1=http://127.0.0.1:12380,infra2=http://127.0.0.1:22380,infra3=http://127.0.0.1:32380' --enable-pprof

+ 45 - 2
tools/local-tester/faults.sh

@@ -1,6 +1,7 @@
 #!/bin/bash
 
 PROCFILE="tools/local-tester/Procfile"
+HTTPFAIL=(127.0.0.1:11180 127.0.0.1:22280 127.0.0.1:33380)
 
 function wait_time {
 	expr $RANDOM % 10 + 1
@@ -52,14 +53,56 @@ function kill_all {
 	done
 }
 
+function rand_fp {
+	echo "$FAILPOINTS" | sed `expr $RANDOM % $NUMFPS + 1`"q;d"
+}
+
+# fp_activate <http> <fppath> <value>
+function fp_activate {
+	curl "$1"/"$2" -XPUT -d "$3" >/dev/null 2>&1
+}
+
+function fp_rand_single {
+	fp=`rand_fp`
+	fp_activate ${HTTPFAIL[`expr $RANDOM % ${#HTTPFAIL[@]}`]} $fp 'panic("'$fp'")'
+	sleep `wait_time`s
+}
+
+function fp_rand_all {
+	fp=`rand_fp`
+	for a in `seq ${#HTTPFAIL[@]}`; do fp_activate ${HTTPFAIL[$a]} "$fp" 'panic("'$fp'")'; done
+	sleep `wait_time`s
+}
+
+function fp_all_rand_fire {
+	for fp in $FAILPOINTS; do
+		for url in "${HTTPFAIL[@]}"; do
+			fp_activate "$url" "$fp" '0.5%panic("0.5%'$fp'")'
+		done
+	done
+}
+
 function choose {
-	faults=(cycle_members kill_maj kill_all cycle_pbridge cycle_cbridge cycle_stresser)
-	fault=${faults[`expr $RANDOM % ${#faults[@]}`]}
+	fault=${FAULTS[`expr $RANDOM % ${#FAULTS[@]}`]}
 	echo $fault
 	$fault || echo "failed: $fault"
 }
 
 sleep 2s
+
+FAULTS=(cycle_members kill_maj kill_all cycle_pbridge cycle_cbridge cycle_stresser)
+
+# add failpoint faults if available
+FAILPOINTS=`curl http://"${HTTPFAIL[0]}" 2>/dev/null | cut -f1 -d'=' | grep -v "^$"`
+NUMFPS=`echo $(echo "$FAILPOINTS" | wc -l)`
+if [ "$NUMFPS" != "0" ]; then
+	FAULTS+=(fp_rand_single)
+	FAULTS+=(fp_rand_all)
+fi
+
 while [ 1 ]; do
 	choose
+	# start any nodes that have been killed by failpoints
+	for a in etcd1 etcd2 etcd3; do goreman -f $PROCFILE run start $a; done
+	fp_all_rand_fire
 done