cluster_util.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407
  1. // Copyright 2015 The etcd Authors
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package etcdserver
  15. import (
  16. "context"
  17. "encoding/json"
  18. "fmt"
  19. "io/ioutil"
  20. "net/http"
  21. "sort"
  22. "strings"
  23. "time"
  24. "go.etcd.io/etcd/etcdserver/api/membership"
  25. "go.etcd.io/etcd/pkg/types"
  26. "go.etcd.io/etcd/version"
  27. "github.com/coreos/go-semver/semver"
  28. "go.uber.org/zap"
  29. )
  30. // isMemberBootstrapped tries to check if the given member has been bootstrapped
  31. // in the given cluster.
  32. func isMemberBootstrapped(lg *zap.Logger, cl *membership.RaftCluster, member string, rt http.RoundTripper, timeout time.Duration) bool {
  33. rcl, err := getClusterFromRemotePeers(lg, getRemotePeerURLs(cl, member), timeout, false, rt)
  34. if err != nil {
  35. return false
  36. }
  37. id := cl.MemberByName(member).ID
  38. m := rcl.Member(id)
  39. if m == nil {
  40. return false
  41. }
  42. if len(m.ClientURLs) > 0 {
  43. return true
  44. }
  45. return false
  46. }
  47. // GetClusterFromRemotePeers takes a set of URLs representing etcd peers, and
  48. // attempts to construct a Cluster by accessing the members endpoint on one of
  49. // these URLs. The first URL to provide a response is used. If no URLs provide
  50. // a response, or a Cluster cannot be successfully created from a received
  51. // response, an error is returned.
  52. // Each request has a 10-second timeout. Because the upper limit of TTL is 5s,
  53. // 10 second is enough for building connection and finishing request.
  54. func GetClusterFromRemotePeers(lg *zap.Logger, urls []string, rt http.RoundTripper) (*membership.RaftCluster, error) {
  55. return getClusterFromRemotePeers(lg, urls, 10*time.Second, true, rt)
  56. }
  57. // If logerr is true, it prints out more error messages.
  58. func getClusterFromRemotePeers(lg *zap.Logger, urls []string, timeout time.Duration, logerr bool, rt http.RoundTripper) (*membership.RaftCluster, error) {
  59. cc := &http.Client{
  60. Transport: rt,
  61. Timeout: timeout,
  62. }
  63. for _, u := range urls {
  64. addr := u + "/members"
  65. resp, err := cc.Get(addr)
  66. if err != nil {
  67. if logerr {
  68. if lg != nil {
  69. lg.Warn("failed to get cluster response", zap.String("address", addr), zap.Error(err))
  70. } else {
  71. plog.Warningf("could not get cluster response from %s: %v", u, err)
  72. }
  73. }
  74. continue
  75. }
  76. b, err := ioutil.ReadAll(resp.Body)
  77. resp.Body.Close()
  78. if err != nil {
  79. if logerr {
  80. if lg != nil {
  81. lg.Warn("failed to read body of cluster response", zap.String("address", addr), zap.Error(err))
  82. } else {
  83. plog.Warningf("could not read the body of cluster response: %v", err)
  84. }
  85. }
  86. continue
  87. }
  88. var membs []*membership.Member
  89. if err = json.Unmarshal(b, &membs); err != nil {
  90. if logerr {
  91. if lg != nil {
  92. lg.Warn("failed to unmarshal cluster response", zap.String("address", addr), zap.Error(err))
  93. } else {
  94. plog.Warningf("could not unmarshal cluster response: %v", err)
  95. }
  96. }
  97. continue
  98. }
  99. id, err := types.IDFromString(resp.Header.Get("X-Etcd-Cluster-ID"))
  100. if err != nil {
  101. if logerr {
  102. if lg != nil {
  103. lg.Warn(
  104. "failed to parse cluster ID",
  105. zap.String("address", addr),
  106. zap.String("header", resp.Header.Get("X-Etcd-Cluster-ID")),
  107. zap.Error(err),
  108. )
  109. } else {
  110. plog.Warningf("could not parse the cluster ID from cluster res: %v", err)
  111. }
  112. }
  113. continue
  114. }
  115. // check the length of membership members
  116. // if the membership members are present then prepare and return raft cluster
  117. // if membership members are not present then the raft cluster formed will be
  118. // an invalid empty cluster hence return failed to get raft cluster member(s) from the given urls error
  119. if len(membs) > 0 {
  120. return membership.NewClusterFromMembers(lg, "", id, membs), nil
  121. }
  122. return nil, fmt.Errorf("failed to get raft cluster member(s) from the given URLs")
  123. }
  124. return nil, fmt.Errorf("could not retrieve cluster information from the given URLs")
  125. }
  126. // getRemotePeerURLs returns peer urls of remote members in the cluster. The
  127. // returned list is sorted in ascending lexicographical order.
  128. func getRemotePeerURLs(cl *membership.RaftCluster, local string) []string {
  129. us := make([]string, 0)
  130. for _, m := range cl.Members() {
  131. if m.Name == local {
  132. continue
  133. }
  134. us = append(us, m.PeerURLs...)
  135. }
  136. sort.Strings(us)
  137. return us
  138. }
  139. // getVersions returns the versions of the members in the given cluster.
  140. // The key of the returned map is the member's ID. The value of the returned map
  141. // is the semver versions string, including server and cluster.
  142. // If it fails to get the version of a member, the key will be nil.
  143. func getVersions(lg *zap.Logger, cl *membership.RaftCluster, local types.ID, rt http.RoundTripper) map[string]*version.Versions {
  144. members := cl.Members()
  145. vers := make(map[string]*version.Versions)
  146. for _, m := range members {
  147. if m.ID == local {
  148. cv := "not_decided"
  149. if cl.Version() != nil {
  150. cv = cl.Version().String()
  151. }
  152. vers[m.ID.String()] = &version.Versions{Server: version.Version, Cluster: cv}
  153. continue
  154. }
  155. ver, err := getVersion(lg, m, rt)
  156. if err != nil {
  157. if lg != nil {
  158. lg.Warn("failed to get version", zap.String("remote-member-id", m.ID.String()), zap.Error(err))
  159. } else {
  160. plog.Warningf("cannot get the version of member %s (%v)", m.ID, err)
  161. }
  162. vers[m.ID.String()] = nil
  163. } else {
  164. vers[m.ID.String()] = ver
  165. }
  166. }
  167. return vers
  168. }
  169. // decideClusterVersion decides the cluster version based on the versions map.
  170. // The returned version is the min server version in the map, or nil if the min
  171. // version in unknown.
  172. func decideClusterVersion(lg *zap.Logger, vers map[string]*version.Versions) *semver.Version {
  173. var cv *semver.Version
  174. lv := semver.Must(semver.NewVersion(version.Version))
  175. for mid, ver := range vers {
  176. if ver == nil {
  177. return nil
  178. }
  179. v, err := semver.NewVersion(ver.Server)
  180. if err != nil {
  181. if lg != nil {
  182. lg.Warn(
  183. "failed to parse server version of remote member",
  184. zap.String("remote-member-id", mid),
  185. zap.String("remote-member-version", ver.Server),
  186. zap.Error(err),
  187. )
  188. } else {
  189. plog.Errorf("cannot understand the version of member %s (%v)", mid, err)
  190. }
  191. return nil
  192. }
  193. if lv.LessThan(*v) {
  194. if lg != nil {
  195. lg.Warn(
  196. "leader found higher-versioned member",
  197. zap.String("local-member-version", lv.String()),
  198. zap.String("remote-member-id", mid),
  199. zap.String("remote-member-version", ver.Server),
  200. )
  201. } else {
  202. plog.Warningf("the local etcd version %s is not up-to-date", lv.String())
  203. plog.Warningf("member %s has a higher version %s", mid, ver.Server)
  204. }
  205. }
  206. if cv == nil {
  207. cv = v
  208. } else if v.LessThan(*cv) {
  209. cv = v
  210. }
  211. }
  212. return cv
  213. }
  214. // isCompatibleWithCluster return true if the local member has a compatible version with
  215. // the current running cluster.
  216. // The version is considered as compatible when at least one of the other members in the cluster has a
  217. // cluster version in the range of [MinClusterVersion, Version] and no known members has a cluster version
  218. // out of the range.
  219. // We set this rule since when the local member joins, another member might be offline.
  220. func isCompatibleWithCluster(lg *zap.Logger, cl *membership.RaftCluster, local types.ID, rt http.RoundTripper) bool {
  221. vers := getVersions(lg, cl, local, rt)
  222. minV := semver.Must(semver.NewVersion(version.MinClusterVersion))
  223. maxV := semver.Must(semver.NewVersion(version.Version))
  224. maxV = &semver.Version{
  225. Major: maxV.Major,
  226. Minor: maxV.Minor,
  227. }
  228. return isCompatibleWithVers(lg, vers, local, minV, maxV)
  229. }
  230. func isCompatibleWithVers(lg *zap.Logger, vers map[string]*version.Versions, local types.ID, minV, maxV *semver.Version) bool {
  231. var ok bool
  232. for id, v := range vers {
  233. // ignore comparison with local version
  234. if id == local.String() {
  235. continue
  236. }
  237. if v == nil {
  238. continue
  239. }
  240. clusterv, err := semver.NewVersion(v.Cluster)
  241. if err != nil {
  242. if lg != nil {
  243. lg.Warn(
  244. "failed to parse cluster version of remote member",
  245. zap.String("remote-member-id", id),
  246. zap.String("remote-member-cluster-version", v.Cluster),
  247. zap.Error(err),
  248. )
  249. } else {
  250. plog.Errorf("cannot understand the cluster version of member %s (%v)", id, err)
  251. }
  252. continue
  253. }
  254. if clusterv.LessThan(*minV) {
  255. if lg != nil {
  256. lg.Warn(
  257. "cluster version of remote member is not compatible; too low",
  258. zap.String("remote-member-id", id),
  259. zap.String("remote-member-cluster-version", clusterv.String()),
  260. zap.String("minimum-cluster-version-supported", minV.String()),
  261. )
  262. } else {
  263. plog.Warningf("the running cluster version(%v) is lower than the minimal cluster version(%v) supported", clusterv.String(), minV.String())
  264. }
  265. return false
  266. }
  267. if maxV.LessThan(*clusterv) {
  268. if lg != nil {
  269. lg.Warn(
  270. "cluster version of remote member is not compatible; too high",
  271. zap.String("remote-member-id", id),
  272. zap.String("remote-member-cluster-version", clusterv.String()),
  273. zap.String("minimum-cluster-version-supported", minV.String()),
  274. )
  275. } else {
  276. plog.Warningf("the running cluster version(%v) is higher than the maximum cluster version(%v) supported", clusterv.String(), maxV.String())
  277. }
  278. return false
  279. }
  280. ok = true
  281. }
  282. return ok
  283. }
  284. // getVersion returns the Versions of the given member via its
  285. // peerURLs. Returns the last error if it fails to get the version.
  286. func getVersion(lg *zap.Logger, m *membership.Member, rt http.RoundTripper) (*version.Versions, error) {
  287. cc := &http.Client{
  288. Transport: rt,
  289. }
  290. var (
  291. err error
  292. resp *http.Response
  293. )
  294. for _, u := range m.PeerURLs {
  295. addr := u + "/version"
  296. resp, err = cc.Get(addr)
  297. if err != nil {
  298. if lg != nil {
  299. lg.Warn(
  300. "failed to reach the peer URL",
  301. zap.String("address", addr),
  302. zap.String("remote-member-id", m.ID.String()),
  303. zap.Error(err),
  304. )
  305. } else {
  306. plog.Warningf("failed to reach the peerURL(%s) of member %s (%v)", u, m.ID, err)
  307. }
  308. continue
  309. }
  310. var b []byte
  311. b, err = ioutil.ReadAll(resp.Body)
  312. resp.Body.Close()
  313. if err != nil {
  314. if lg != nil {
  315. lg.Warn(
  316. "failed to read body of response",
  317. zap.String("address", addr),
  318. zap.String("remote-member-id", m.ID.String()),
  319. zap.Error(err),
  320. )
  321. } else {
  322. plog.Warningf("failed to read out the response body from the peerURL(%s) of member %s (%v)", u, m.ID, err)
  323. }
  324. continue
  325. }
  326. var vers version.Versions
  327. if err = json.Unmarshal(b, &vers); err != nil {
  328. if lg != nil {
  329. lg.Warn(
  330. "failed to unmarshal response",
  331. zap.String("address", addr),
  332. zap.String("remote-member-id", m.ID.String()),
  333. zap.Error(err),
  334. )
  335. } else {
  336. plog.Warningf("failed to unmarshal the response body got from the peerURL(%s) of member %s (%v)", u, m.ID, err)
  337. }
  338. continue
  339. }
  340. return &vers, nil
  341. }
  342. return nil, err
  343. }
  344. func promoteMemberHTTP(ctx context.Context, url string, id uint64, peerRt http.RoundTripper) ([]*membership.Member, error) {
  345. cc := &http.Client{Transport: peerRt}
  346. // TODO: refactor member http handler code
  347. // cannot import etcdhttp, so manually construct url
  348. requestUrl := url + "/members/promote/" + fmt.Sprintf("%d", id)
  349. req, err := http.NewRequest("POST", requestUrl, nil)
  350. if err != nil {
  351. return nil, err
  352. }
  353. req = req.WithContext(ctx)
  354. resp, err := cc.Do(req)
  355. if err != nil {
  356. return nil, err
  357. }
  358. defer resp.Body.Close()
  359. b, err := ioutil.ReadAll(resp.Body)
  360. if err != nil {
  361. return nil, err
  362. }
  363. if resp.StatusCode == http.StatusRequestTimeout {
  364. return nil, ErrTimeout
  365. }
  366. if resp.StatusCode == http.StatusPreconditionFailed {
  367. // both ErrMemberNotLearner and ErrLearnerNotReady have same http status code
  368. if strings.Contains(string(b), ErrLearnerNotReady.Error()) {
  369. return nil, ErrLearnerNotReady
  370. }
  371. if strings.Contains(string(b), membership.ErrMemberNotLearner.Error()) {
  372. return nil, membership.ErrMemberNotLearner
  373. }
  374. return nil, fmt.Errorf("member promote: unknown error(%s)", string(b))
  375. }
  376. if resp.StatusCode == http.StatusNotFound {
  377. return nil, membership.ErrIDNotFound
  378. }
  379. if resp.StatusCode != http.StatusOK { // all other types of errors
  380. return nil, fmt.Errorf("member promote: unknown error(%s)", string(b))
  381. }
  382. var membs []*membership.Member
  383. if err := json.Unmarshal(b, &membs); err != nil {
  384. return nil, err
  385. }
  386. return membs, nil
  387. }