faults.sh 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. #!/bin/bash
  2. PROCFILE="tools/local-tester/Procfile"
  3. HTTPFAIL=(127.0.0.1:11180 127.0.0.1:22280 127.0.0.1:33380)
  4. function wait_time {
  5. expr $RANDOM % 10 + 1
  6. }
  7. function cycle {
  8. for a; do
  9. echo "cycling $a"
  10. goreman -f $PROCFILE run stop $a || echo "could not stop $a"
  11. sleep `wait_time`s
  12. goreman -f $PROCFILE run restart $a || echo "could not restart $a"
  13. done
  14. }
  15. function cycle_members {
  16. cycle etcd1 etcd2 etcd3
  17. }
  18. function cycle_pbridge {
  19. cycle pbridge1 pbridge2 pbridge3
  20. }
  21. function cycle_cbridge {
  22. cycle cbridge1 cbridge2 cbridge3
  23. }
  24. function cycle_stresser {
  25. cycle stress-put
  26. }
  27. function kill_maj {
  28. idx="etcd"`expr $RANDOM % 3 + 1`
  29. idx2="$idx"
  30. while [ "$idx" == "$idx2" ]; do
  31. idx2="etcd"`expr $RANDOM % 3 + 1`
  32. done
  33. echo "kill majority $idx $idx2"
  34. goreman -f $PROCFILE run stop $idx || echo "could not stop $idx"
  35. goreman -f $PROCFILE run stop $idx2 || echo "could not stop $idx2"
  36. sleep `wait_time`s
  37. goreman -f $PROCFILE run restart $idx || echo "could not restart $idx"
  38. goreman -f $PROCFILE run restart $idx2 || echo "could not restart $idx2"
  39. }
  40. function kill_all {
  41. for a in etcd1 etcd2 etcd3; do
  42. goreman -f $PROCFILE run stop $a || echo "could not stop $a"
  43. done
  44. sleep `wait_time`s
  45. for a in etcd1 etcd2 etcd3; do
  46. goreman -f $PROCFILE run restart $a || echo "could not restart $a"
  47. done
  48. }
  49. function rand_fp {
  50. echo "$FAILPOINTS" | sed `expr $RANDOM % $NUMFPS + 1`"q;d"
  51. }
  52. # fp_activate <http> <fppath> <value>
  53. function fp_activate {
  54. curl "$1"/"$2" -XPUT -d "$3" >/dev/null 2>&1
  55. }
  56. function fp_rand_single {
  57. fp=`rand_fp`
  58. fp_activate ${HTTPFAIL[`expr $RANDOM % ${#HTTPFAIL[@]}`]} $fp 'panic("'$fp'")'
  59. sleep `wait_time`s
  60. }
  61. function fp_rand_all {
  62. fp=`rand_fp`
  63. for a in `seq ${#HTTPFAIL[@]}`; do fp_activate ${HTTPFAIL[$a]} "$fp" 'panic("'$fp'")'; done
  64. sleep `wait_time`s
  65. }
  66. function fp_all_rand_fire {
  67. for fp in $FAILPOINTS; do
  68. for url in "${HTTPFAIL[@]}"; do
  69. fp_activate "$url" "$fp" '0.5%panic("0.5%'$fp'")'
  70. done
  71. done
  72. }
  73. function choose {
  74. fault=${FAULTS[`expr $RANDOM % ${#FAULTS[@]}`]}
  75. echo $fault
  76. $fault || echo "failed: $fault"
  77. }
  78. sleep 2s
  79. FAULTS=(cycle_members kill_maj kill_all cycle_pbridge cycle_cbridge cycle_stresser)
  80. # add failpoint faults if available
  81. FAILPOINTS=`curl http://"${HTTPFAIL[0]}" 2>/dev/null | cut -f1 -d'=' | grep -v "^$"`
  82. NUMFPS=`echo $(echo "$FAILPOINTS" | wc -l)`
  83. if [ "$NUMFPS" != "0" ]; then
  84. FAULTS+=(fp_rand_single)
  85. FAULTS+=(fp_rand_all)
  86. fi
  87. while [ 1 ]; do
  88. choose
  89. # start any nodes that have been killed by failpoints
  90. for a in etcd1 etcd2 etcd3; do goreman -f $PROCFILE run start $a; done
  91. fp_all_rand_fire
  92. done