exec_plan9.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633
  1. // Copyright 2009 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // Fork, exec, wait, etc.
  5. package plan9
  6. import (
  7. "runtime"
  8. "sync"
  9. "unsafe"
  10. )
  11. // Lock synchronizing creation of new file descriptors with fork.
  12. //
  13. // We want the child in a fork/exec sequence to inherit only the
  14. // file descriptors we intend. To do that, we mark all file
  15. // descriptors close-on-exec and then, in the child, explicitly
  16. // unmark the ones we want the exec'ed program to keep.
  17. // Unix doesn't make this easy: there is, in general, no way to
  18. // allocate a new file descriptor close-on-exec. Instead you
  19. // have to allocate the descriptor and then mark it close-on-exec.
  20. // If a fork happens between those two events, the child's exec
  21. // will inherit an unwanted file descriptor.
  22. //
  23. // This lock solves that race: the create new fd/mark close-on-exec
  24. // operation is done holding ForkLock for reading, and the fork itself
  25. // is done holding ForkLock for writing. At least, that's the idea.
  26. // There are some complications.
  27. //
  28. // Some system calls that create new file descriptors can block
  29. // for arbitrarily long times: open on a hung NFS server or named
  30. // pipe, accept on a socket, and so on. We can't reasonably grab
  31. // the lock across those operations.
  32. //
  33. // It is worse to inherit some file descriptors than others.
  34. // If a non-malicious child accidentally inherits an open ordinary file,
  35. // that's not a big deal. On the other hand, if a long-lived child
  36. // accidentally inherits the write end of a pipe, then the reader
  37. // of that pipe will not see EOF until that child exits, potentially
  38. // causing the parent program to hang. This is a common problem
  39. // in threaded C programs that use popen.
  40. //
  41. // Luckily, the file descriptors that are most important not to
  42. // inherit are not the ones that can take an arbitrarily long time
  43. // to create: pipe returns instantly, and the net package uses
  44. // non-blocking I/O to accept on a listening socket.
  45. // The rules for which file descriptor-creating operations use the
  46. // ForkLock are as follows:
  47. //
  48. // 1) Pipe. Does not block. Use the ForkLock.
  49. // 2) Socket. Does not block. Use the ForkLock.
  50. // 3) Accept. If using non-blocking mode, use the ForkLock.
  51. // Otherwise, live with the race.
  52. // 4) Open. Can block. Use O_CLOEXEC if available (Linux).
  53. // Otherwise, live with the race.
  54. // 5) Dup. Does not block. Use the ForkLock.
  55. // On Linux, could use fcntl F_DUPFD_CLOEXEC
  56. // instead of the ForkLock, but only for dup(fd, -1).
  57. var ForkLock sync.RWMutex
  58. // SlicePtrFromStrings converts a slice of strings to a slice of
  59. // pointers to NUL-terminated byte slices. If any string contains
  60. // a NUL byte, it returns (nil, EINVAL).
  61. func SlicePtrFromStrings(ss []string) ([]*byte, error) {
  62. var err error
  63. bb := make([]*byte, len(ss)+1)
  64. for i := 0; i < len(ss); i++ {
  65. bb[i], err = BytePtrFromString(ss[i])
  66. if err != nil {
  67. return nil, err
  68. }
  69. }
  70. bb[len(ss)] = nil
  71. return bb, nil
  72. }
  73. // readdirnames returns the names of files inside the directory represented by dirfd.
  74. func readdirnames(dirfd int) (names []string, err error) {
  75. names = make([]string, 0, 100)
  76. var buf [STATMAX]byte
  77. for {
  78. n, e := Read(dirfd, buf[:])
  79. if e != nil {
  80. return nil, e
  81. }
  82. if n == 0 {
  83. break
  84. }
  85. for i := 0; i < n; {
  86. m, _ := gbit16(buf[i:])
  87. m += 2
  88. if m < STATFIXLEN {
  89. return nil, ErrBadStat
  90. }
  91. s, _, ok := gstring(buf[i+41:])
  92. if !ok {
  93. return nil, ErrBadStat
  94. }
  95. names = append(names, s)
  96. i += int(m)
  97. }
  98. }
  99. return
  100. }
  101. // readdupdevice returns a list of currently opened fds (excluding stdin, stdout, stderr) from the dup device #d.
  102. // ForkLock should be write locked before calling, so that no new fds would be created while the fd list is being read.
  103. func readdupdevice() (fds []int, err error) {
  104. dupdevfd, err := Open("#d", O_RDONLY)
  105. if err != nil {
  106. return
  107. }
  108. defer Close(dupdevfd)
  109. names, err := readdirnames(dupdevfd)
  110. if err != nil {
  111. return
  112. }
  113. fds = make([]int, 0, len(names)/2)
  114. for _, name := range names {
  115. if n := len(name); n > 3 && name[n-3:n] == "ctl" {
  116. continue
  117. }
  118. fd := int(atoi([]byte(name)))
  119. switch fd {
  120. case 0, 1, 2, dupdevfd:
  121. continue
  122. }
  123. fds = append(fds, fd)
  124. }
  125. return
  126. }
  127. var startupFds []int
  128. // Plan 9 does not allow clearing the OCEXEC flag
  129. // from the underlying channel backing an open file descriptor,
  130. // therefore we store a list of already opened file descriptors
  131. // inside startupFds and skip them when manually closing descriptors
  132. // not meant to be passed to a child exec.
  133. func init() {
  134. startupFds, _ = readdupdevice()
  135. }
  136. // forkAndExecInChild forks the process, calling dup onto 0..len(fd)
  137. // and finally invoking exec(argv0, argvv, envv) in the child.
  138. // If a dup or exec fails, it writes the error string to pipe.
  139. // (The pipe write end is close-on-exec so if exec succeeds, it will be closed.)
  140. //
  141. // In the child, this function must not acquire any locks, because
  142. // they might have been locked at the time of the fork. This means
  143. // no rescheduling, no malloc calls, and no new stack segments.
  144. // The calls to RawSyscall are okay because they are assembly
  145. // functions that do not grow the stack.
  146. func forkAndExecInChild(argv0 *byte, argv []*byte, envv []envItem, dir *byte, attr *ProcAttr, fdsToClose []int, pipe int, rflag int) (pid int, err error) {
  147. // Declare all variables at top in case any
  148. // declarations require heap allocation (e.g., errbuf).
  149. var (
  150. r1 uintptr
  151. nextfd int
  152. i int
  153. clearenv int
  154. envfd int
  155. errbuf [ERRMAX]byte
  156. )
  157. // Guard against side effects of shuffling fds below.
  158. // Make sure that nextfd is beyond any currently open files so
  159. // that we can't run the risk of overwriting any of them.
  160. fd := make([]int, len(attr.Files))
  161. nextfd = len(attr.Files)
  162. for i, ufd := range attr.Files {
  163. if nextfd < int(ufd) {
  164. nextfd = int(ufd)
  165. }
  166. fd[i] = int(ufd)
  167. }
  168. nextfd++
  169. if envv != nil {
  170. clearenv = RFCENVG
  171. }
  172. // About to call fork.
  173. // No more allocation or calls of non-assembly functions.
  174. r1, _, _ = RawSyscall(SYS_RFORK, uintptr(RFPROC|RFFDG|RFREND|clearenv|rflag), 0, 0)
  175. if r1 != 0 {
  176. if int32(r1) == -1 {
  177. return 0, NewError(errstr())
  178. }
  179. // parent; return PID
  180. return int(r1), nil
  181. }
  182. // Fork succeeded, now in child.
  183. // Close fds we don't need.
  184. for i = 0; i < len(fdsToClose); i++ {
  185. r1, _, _ = RawSyscall(SYS_CLOSE, uintptr(fdsToClose[i]), 0, 0)
  186. if int32(r1) == -1 {
  187. goto childerror
  188. }
  189. }
  190. if envv != nil {
  191. // Write new environment variables.
  192. for i = 0; i < len(envv); i++ {
  193. r1, _, _ = RawSyscall(SYS_CREATE, uintptr(unsafe.Pointer(envv[i].name)), uintptr(O_WRONLY), uintptr(0666))
  194. if int32(r1) == -1 {
  195. goto childerror
  196. }
  197. envfd = int(r1)
  198. r1, _, _ = RawSyscall6(SYS_PWRITE, uintptr(envfd), uintptr(unsafe.Pointer(envv[i].value)), uintptr(envv[i].nvalue),
  199. ^uintptr(0), ^uintptr(0), 0)
  200. if int32(r1) == -1 || int(r1) != envv[i].nvalue {
  201. goto childerror
  202. }
  203. r1, _, _ = RawSyscall(SYS_CLOSE, uintptr(envfd), 0, 0)
  204. if int32(r1) == -1 {
  205. goto childerror
  206. }
  207. }
  208. }
  209. // Chdir
  210. if dir != nil {
  211. r1, _, _ = RawSyscall(SYS_CHDIR, uintptr(unsafe.Pointer(dir)), 0, 0)
  212. if int32(r1) == -1 {
  213. goto childerror
  214. }
  215. }
  216. // Pass 1: look for fd[i] < i and move those up above len(fd)
  217. // so that pass 2 won't stomp on an fd it needs later.
  218. if pipe < nextfd {
  219. r1, _, _ = RawSyscall(SYS_DUP, uintptr(pipe), uintptr(nextfd), 0)
  220. if int32(r1) == -1 {
  221. goto childerror
  222. }
  223. pipe = nextfd
  224. nextfd++
  225. }
  226. for i = 0; i < len(fd); i++ {
  227. if fd[i] >= 0 && fd[i] < int(i) {
  228. r1, _, _ = RawSyscall(SYS_DUP, uintptr(fd[i]), uintptr(nextfd), 0)
  229. if int32(r1) == -1 {
  230. goto childerror
  231. }
  232. fd[i] = nextfd
  233. nextfd++
  234. if nextfd == pipe { // don't stomp on pipe
  235. nextfd++
  236. }
  237. }
  238. }
  239. // Pass 2: dup fd[i] down onto i.
  240. for i = 0; i < len(fd); i++ {
  241. if fd[i] == -1 {
  242. RawSyscall(SYS_CLOSE, uintptr(i), 0, 0)
  243. continue
  244. }
  245. if fd[i] == int(i) {
  246. continue
  247. }
  248. r1, _, _ = RawSyscall(SYS_DUP, uintptr(fd[i]), uintptr(i), 0)
  249. if int32(r1) == -1 {
  250. goto childerror
  251. }
  252. }
  253. // Pass 3: close fd[i] if it was moved in the previous pass.
  254. for i = 0; i < len(fd); i++ {
  255. if fd[i] >= 0 && fd[i] != int(i) {
  256. RawSyscall(SYS_CLOSE, uintptr(fd[i]), 0, 0)
  257. }
  258. }
  259. // Time to exec.
  260. r1, _, _ = RawSyscall(SYS_EXEC,
  261. uintptr(unsafe.Pointer(argv0)),
  262. uintptr(unsafe.Pointer(&argv[0])), 0)
  263. childerror:
  264. // send error string on pipe
  265. RawSyscall(SYS_ERRSTR, uintptr(unsafe.Pointer(&errbuf[0])), uintptr(len(errbuf)), 0)
  266. errbuf[len(errbuf)-1] = 0
  267. i = 0
  268. for i < len(errbuf) && errbuf[i] != 0 {
  269. i++
  270. }
  271. RawSyscall6(SYS_PWRITE, uintptr(pipe), uintptr(unsafe.Pointer(&errbuf[0])), uintptr(i),
  272. ^uintptr(0), ^uintptr(0), 0)
  273. for {
  274. RawSyscall(SYS_EXITS, 0, 0, 0)
  275. }
  276. }
  277. func cexecPipe(p []int) error {
  278. e := Pipe(p)
  279. if e != nil {
  280. return e
  281. }
  282. fd, e := Open("#d/"+itoa(p[1]), O_CLOEXEC)
  283. if e != nil {
  284. Close(p[0])
  285. Close(p[1])
  286. return e
  287. }
  288. Close(fd)
  289. return nil
  290. }
  291. type envItem struct {
  292. name *byte
  293. value *byte
  294. nvalue int
  295. }
  296. type ProcAttr struct {
  297. Dir string // Current working directory.
  298. Env []string // Environment.
  299. Files []uintptr // File descriptors.
  300. Sys *SysProcAttr
  301. }
  302. type SysProcAttr struct {
  303. Rfork int // additional flags to pass to rfork
  304. }
  305. var zeroProcAttr ProcAttr
  306. var zeroSysProcAttr SysProcAttr
  307. func forkExec(argv0 string, argv []string, attr *ProcAttr) (pid int, err error) {
  308. var (
  309. p [2]int
  310. n int
  311. errbuf [ERRMAX]byte
  312. wmsg Waitmsg
  313. )
  314. if attr == nil {
  315. attr = &zeroProcAttr
  316. }
  317. sys := attr.Sys
  318. if sys == nil {
  319. sys = &zeroSysProcAttr
  320. }
  321. p[0] = -1
  322. p[1] = -1
  323. // Convert args to C form.
  324. argv0p, err := BytePtrFromString(argv0)
  325. if err != nil {
  326. return 0, err
  327. }
  328. argvp, err := SlicePtrFromStrings(argv)
  329. if err != nil {
  330. return 0, err
  331. }
  332. var dir *byte
  333. if attr.Dir != "" {
  334. dir, err = BytePtrFromString(attr.Dir)
  335. if err != nil {
  336. return 0, err
  337. }
  338. }
  339. var envvParsed []envItem
  340. if attr.Env != nil {
  341. envvParsed = make([]envItem, 0, len(attr.Env))
  342. for _, v := range attr.Env {
  343. i := 0
  344. for i < len(v) && v[i] != '=' {
  345. i++
  346. }
  347. envname, err := BytePtrFromString("/env/" + v[:i])
  348. if err != nil {
  349. return 0, err
  350. }
  351. envvalue := make([]byte, len(v)-i)
  352. copy(envvalue, v[i+1:])
  353. envvParsed = append(envvParsed, envItem{envname, &envvalue[0], len(v) - i})
  354. }
  355. }
  356. // Acquire the fork lock to prevent other threads from creating new fds before we fork.
  357. ForkLock.Lock()
  358. // get a list of open fds, excluding stdin,stdout and stderr that need to be closed in the child.
  359. // no new fds can be created while we hold the ForkLock for writing.
  360. openFds, e := readdupdevice()
  361. if e != nil {
  362. ForkLock.Unlock()
  363. return 0, e
  364. }
  365. fdsToClose := make([]int, 0, len(openFds))
  366. for _, fd := range openFds {
  367. doClose := true
  368. // exclude files opened at startup.
  369. for _, sfd := range startupFds {
  370. if fd == sfd {
  371. doClose = false
  372. break
  373. }
  374. }
  375. // exclude files explicitly requested by the caller.
  376. for _, rfd := range attr.Files {
  377. if fd == int(rfd) {
  378. doClose = false
  379. break
  380. }
  381. }
  382. if doClose {
  383. fdsToClose = append(fdsToClose, fd)
  384. }
  385. }
  386. // Allocate child status pipe close on exec.
  387. e = cexecPipe(p[:])
  388. if e != nil {
  389. return 0, e
  390. }
  391. fdsToClose = append(fdsToClose, p[0])
  392. // Kick off child.
  393. pid, err = forkAndExecInChild(argv0p, argvp, envvParsed, dir, attr, fdsToClose, p[1], sys.Rfork)
  394. if err != nil {
  395. if p[0] >= 0 {
  396. Close(p[0])
  397. Close(p[1])
  398. }
  399. ForkLock.Unlock()
  400. return 0, err
  401. }
  402. ForkLock.Unlock()
  403. // Read child error status from pipe.
  404. Close(p[1])
  405. n, err = Read(p[0], errbuf[:])
  406. Close(p[0])
  407. if err != nil || n != 0 {
  408. if n != 0 {
  409. err = NewError(string(errbuf[:n]))
  410. }
  411. // Child failed; wait for it to exit, to make sure
  412. // the zombies don't accumulate.
  413. for wmsg.Pid != pid {
  414. Await(&wmsg)
  415. }
  416. return 0, err
  417. }
  418. // Read got EOF, so pipe closed on exec, so exec succeeded.
  419. return pid, nil
  420. }
  421. type waitErr struct {
  422. Waitmsg
  423. err error
  424. }
  425. var procs struct {
  426. sync.Mutex
  427. waits map[int]chan *waitErr
  428. }
  429. // startProcess starts a new goroutine, tied to the OS
  430. // thread, which runs the process and subsequently waits
  431. // for it to finish, communicating the process stats back
  432. // to any goroutines that may have been waiting on it.
  433. //
  434. // Such a dedicated goroutine is needed because on
  435. // Plan 9, only the parent thread can wait for a child,
  436. // whereas goroutines tend to jump OS threads (e.g.,
  437. // between starting a process and running Wait(), the
  438. // goroutine may have been rescheduled).
  439. func startProcess(argv0 string, argv []string, attr *ProcAttr) (pid int, err error) {
  440. type forkRet struct {
  441. pid int
  442. err error
  443. }
  444. forkc := make(chan forkRet, 1)
  445. go func() {
  446. runtime.LockOSThread()
  447. var ret forkRet
  448. ret.pid, ret.err = forkExec(argv0, argv, attr)
  449. // If fork fails there is nothing to wait for.
  450. if ret.err != nil || ret.pid == 0 {
  451. forkc <- ret
  452. return
  453. }
  454. waitc := make(chan *waitErr, 1)
  455. // Mark that the process is running.
  456. procs.Lock()
  457. if procs.waits == nil {
  458. procs.waits = make(map[int]chan *waitErr)
  459. }
  460. procs.waits[ret.pid] = waitc
  461. procs.Unlock()
  462. forkc <- ret
  463. var w waitErr
  464. for w.err == nil && w.Pid != ret.pid {
  465. w.err = Await(&w.Waitmsg)
  466. }
  467. waitc <- &w
  468. close(waitc)
  469. }()
  470. ret := <-forkc
  471. return ret.pid, ret.err
  472. }
  473. // Combination of fork and exec, careful to be thread safe.
  474. func ForkExec(argv0 string, argv []string, attr *ProcAttr) (pid int, err error) {
  475. return startProcess(argv0, argv, attr)
  476. }
  477. // StartProcess wraps ForkExec for package os.
  478. func StartProcess(argv0 string, argv []string, attr *ProcAttr) (pid int, handle uintptr, err error) {
  479. pid, err = startProcess(argv0, argv, attr)
  480. return pid, 0, err
  481. }
  482. // Ordinary exec.
  483. func Exec(argv0 string, argv []string, envv []string) (err error) {
  484. if envv != nil {
  485. r1, _, _ := RawSyscall(SYS_RFORK, RFCENVG, 0, 0)
  486. if int32(r1) == -1 {
  487. return NewError(errstr())
  488. }
  489. for _, v := range envv {
  490. i := 0
  491. for i < len(v) && v[i] != '=' {
  492. i++
  493. }
  494. fd, e := Create("/env/"+v[:i], O_WRONLY, 0666)
  495. if e != nil {
  496. return e
  497. }
  498. _, e = Write(fd, []byte(v[i+1:]))
  499. if e != nil {
  500. Close(fd)
  501. return e
  502. }
  503. Close(fd)
  504. }
  505. }
  506. argv0p, err := BytePtrFromString(argv0)
  507. if err != nil {
  508. return err
  509. }
  510. argvp, err := SlicePtrFromStrings(argv)
  511. if err != nil {
  512. return err
  513. }
  514. _, _, e1 := Syscall(SYS_EXEC,
  515. uintptr(unsafe.Pointer(argv0p)),
  516. uintptr(unsafe.Pointer(&argvp[0])),
  517. 0)
  518. return e1
  519. }
  520. // WaitProcess waits until the pid of a
  521. // running process is found in the queue of
  522. // wait messages. It is used in conjunction
  523. // with ForkExec/StartProcess to wait for a
  524. // running process to exit.
  525. func WaitProcess(pid int, w *Waitmsg) (err error) {
  526. procs.Lock()
  527. ch := procs.waits[pid]
  528. procs.Unlock()
  529. var wmsg *waitErr
  530. if ch != nil {
  531. wmsg = <-ch
  532. procs.Lock()
  533. if procs.waits[pid] == ch {
  534. delete(procs.waits, pid)
  535. }
  536. procs.Unlock()
  537. }
  538. if wmsg == nil {
  539. // ch was missing or ch is closed
  540. return NewError("process not found")
  541. }
  542. if wmsg.err != nil {
  543. return wmsg.err
  544. }
  545. if w != nil {
  546. *w = wmsg.Waitmsg
  547. }
  548. return nil
  549. }