|
- // Copyright 2016 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- package pipeline
- import (
- "bytes"
- "errors"
- "fmt"
- "go/ast"
- "go/constant"
- "go/format"
- "go/token"
- "go/types"
- "path/filepath"
- "sort"
- "strings"
- "unicode"
- "unicode/utf8"
- fmtparser "golang.org/x/text/internal/format"
- "golang.org/x/tools/go/callgraph"
- "golang.org/x/tools/go/callgraph/cha"
- "golang.org/x/tools/go/loader"
- "golang.org/x/tools/go/ssa"
- "golang.org/x/tools/go/ssa/ssautil"
- )
- const debug = false
- // TODO:
- // - merge information into existing files
- // - handle different file formats (PO, XLIFF)
- // - handle features (gender, plural)
- // - message rewriting
- // - `msg:"etc"` tags
- // Extract extracts all strings form the package defined in Config.
- func Extract(c *Config) (*State, error) {
- x, err := newExtracter(c)
- if err != nil {
- return nil, wrap(err, "")
- }
- if err := x.seedEndpoints(); err != nil {
- return nil, err
- }
- x.extractMessages()
- return &State{
- Config: *c,
- program: x.iprog,
- Extracted: Messages{
- Language: c.SourceLanguage,
- Messages: x.messages,
- },
- }, nil
- }
- type extracter struct {
- conf loader.Config
- iprog *loader.Program
- prog *ssa.Program
- callGraph *callgraph.Graph
- // Calls and other expressions to collect.
- globals map[token.Pos]*constData
- funcs map[token.Pos]*callData
- messages []Message
- }
- func newExtracter(c *Config) (x *extracter, err error) {
- x = &extracter{
- conf: loader.Config{},
- globals: map[token.Pos]*constData{},
- funcs: map[token.Pos]*callData{},
- }
- x.iprog, err = loadPackages(&x.conf, c.Packages)
- if err != nil {
- return nil, wrap(err, "")
- }
- x.prog = ssautil.CreateProgram(x.iprog, ssa.GlobalDebug|ssa.BareInits)
- x.prog.Build()
- x.callGraph = cha.CallGraph(x.prog)
- return x, nil
- }
- func (x *extracter) globalData(pos token.Pos) *constData {
- cd := x.globals[pos]
- if cd == nil {
- cd = &constData{}
- x.globals[pos] = cd
- }
- return cd
- }
- func (x *extracter) seedEndpoints() error {
- pkgInfo := x.iprog.Package("golang.org/x/text/message")
- if pkgInfo == nil {
- return errors.New("pipeline: golang.org/x/text/message is not imported")
- }
- pkg := x.prog.Package(pkgInfo.Pkg)
- typ := types.NewPointer(pkg.Type("Printer").Type())
- x.processGlobalVars()
- x.handleFunc(x.prog.LookupMethod(typ, pkg.Pkg, "Printf"), &callData{
- formatPos: 1,
- argPos: 2,
- isMethod: true,
- })
- x.handleFunc(x.prog.LookupMethod(typ, pkg.Pkg, "Sprintf"), &callData{
- formatPos: 1,
- argPos: 2,
- isMethod: true,
- })
- x.handleFunc(x.prog.LookupMethod(typ, pkg.Pkg, "Fprintf"), &callData{
- formatPos: 2,
- argPos: 3,
- isMethod: true,
- })
- return nil
- }
- // processGlobalVars finds string constants that are assigned to global
- // variables.
- func (x *extracter) processGlobalVars() {
- for _, p := range x.prog.AllPackages() {
- m, ok := p.Members["init"]
- if !ok {
- continue
- }
- for _, b := range m.(*ssa.Function).Blocks {
- for _, i := range b.Instrs {
- s, ok := i.(*ssa.Store)
- if !ok {
- continue
- }
- a, ok := s.Addr.(*ssa.Global)
- if !ok {
- continue
- }
- t := a.Type()
- for {
- p, ok := t.(*types.Pointer)
- if !ok {
- break
- }
- t = p.Elem()
- }
- if b, ok := t.(*types.Basic); !ok || b.Kind() != types.String {
- continue
- }
- x.visitInit(a, s.Val)
- }
- }
- }
- }
- type constData struct {
- call *callData // to provide a signature for the constants
- values []constVal
- others []token.Pos // Assigned to other global data.
- }
- func (d *constData) visit(x *extracter, f func(c constant.Value)) {
- for _, v := range d.values {
- f(v.value)
- }
- for _, p := range d.others {
- if od, ok := x.globals[p]; ok {
- od.visit(x, f)
- }
- }
- }
- type constVal struct {
- value constant.Value
- pos token.Pos
- }
- type callData struct {
- call ssa.CallInstruction
- expr *ast.CallExpr
- formats []constant.Value
- callee *callData
- isMethod bool
- formatPos int
- argPos int // varargs at this position in the call
- argTypes []int // arguments extractable from this position
- }
- func (c *callData) callFormatPos() int {
- c = c.callee
- if c.isMethod {
- return c.formatPos - 1
- }
- return c.formatPos
- }
- func (c *callData) callArgsStart() int {
- c = c.callee
- if c.isMethod {
- return c.argPos - 1
- }
- return c.argPos
- }
- func (c *callData) Pos() token.Pos { return c.call.Pos() }
- func (c *callData) Pkg() *types.Package { return c.call.Parent().Pkg.Pkg }
- func (x *extracter) handleFunc(f *ssa.Function, fd *callData) {
- for _, e := range x.callGraph.Nodes[f].In {
- if e.Pos() == 0 {
- continue
- }
- call := e.Site
- caller := x.funcs[call.Pos()]
- if caller != nil {
- // TODO: theoretically a format string could be passed to multiple
- // arguments of a function. Support this eventually.
- continue
- }
- x.debug(call, "CALL", f.String())
- caller = &callData{
- call: call,
- callee: fd,
- formatPos: -1,
- argPos: -1,
- }
- // Offset by one if we are invoking an interface method.
- offset := 0
- if call.Common().IsInvoke() {
- offset = -1
- }
- x.funcs[call.Pos()] = caller
- if fd.argPos >= 0 {
- x.visitArgs(caller, call.Common().Args[fd.argPos+offset])
- }
- x.visitFormats(caller, call.Common().Args[fd.formatPos+offset])
- }
- }
- type posser interface {
- Pos() token.Pos
- Parent() *ssa.Function
- }
- func (x *extracter) debug(v posser, header string, args ...interface{}) {
- if debug {
- pos := ""
- if p := v.Parent(); p != nil {
- pos = posString(&x.conf, p.Package().Pkg, v.Pos())
- }
- if header != "CALL" && header != "INSERT" {
- header = " " + header
- }
- fmt.Printf("%-32s%-10s%-15T ", pos+fmt.Sprintf("@%d", v.Pos()), header, v)
- for _, a := range args {
- fmt.Printf(" %v", a)
- }
- fmt.Println()
- }
- }
- // visitInit evaluates and collects values assigned to global variables in an
- // init function.
- func (x *extracter) visitInit(global *ssa.Global, v ssa.Value) {
- if v == nil {
- return
- }
- x.debug(v, "GLOBAL", v)
- switch v := v.(type) {
- case *ssa.Phi:
- for _, e := range v.Edges {
- x.visitInit(global, e)
- }
- case *ssa.Const:
- // Only record strings with letters.
- if str := constant.StringVal(v.Value); isMsg(str) {
- cd := x.globalData(global.Pos())
- cd.values = append(cd.values, constVal{v.Value, v.Pos()})
- }
- // TODO: handle %m-directive.
- case *ssa.Global:
- cd := x.globalData(global.Pos())
- cd.others = append(cd.others, v.Pos())
- case *ssa.FieldAddr, *ssa.Field:
- // TODO: mark field index v.Field of v.X.Type() for extraction. extract
- // an example args as to give parameters for the translator.
- case *ssa.Slice:
- if v.Low == nil && v.High == nil && v.Max == nil {
- x.visitInit(global, v.X)
- }
- case *ssa.Alloc:
- if ref := v.Referrers(); ref == nil {
- for _, r := range *ref {
- values := []ssa.Value{}
- for _, o := range r.Operands(nil) {
- if o == nil || *o == v {
- continue
- }
- values = append(values, *o)
- }
- // TODO: return something different if we care about multiple
- // values as well.
- if len(values) == 1 {
- x.visitInit(global, values[0])
- }
- }
- }
- case ssa.Instruction:
- rands := v.Operands(nil)
- if len(rands) == 1 && rands[0] != nil {
- x.visitInit(global, *rands[0])
- }
- }
- return
- }
- // visitFormats finds the original source of the value. The returned index is
- // position of the argument if originated from a function argument or -1
- // otherwise.
- func (x *extracter) visitFormats(call *callData, v ssa.Value) {
- if v == nil {
- return
- }
- x.debug(v, "VALUE", v)
- switch v := v.(type) {
- case *ssa.Phi:
- for _, e := range v.Edges {
- x.visitFormats(call, e)
- }
- case *ssa.Const:
- // Only record strings with letters.
- if isMsg(constant.StringVal(v.Value)) {
- x.debug(call.call, "FORMAT", v.Value.ExactString())
- call.formats = append(call.formats, v.Value)
- }
- // TODO: handle %m-directive.
- case *ssa.Global:
- x.globalData(v.Pos()).call = call
- case *ssa.FieldAddr, *ssa.Field:
- // TODO: mark field index v.Field of v.X.Type() for extraction. extract
- // an example args as to give parameters for the translator.
- case *ssa.Slice:
- if v.Low == nil && v.High == nil && v.Max == nil {
- x.visitFormats(call, v.X)
- }
- case *ssa.Parameter:
- // TODO: handle the function for the index parameter.
- f := v.Parent()
- for i, p := range f.Params {
- if p == v {
- if call.formatPos < 0 {
- call.formatPos = i
- // TODO: is there a better way to detect this is calling
- // a method rather than a function?
- call.isMethod = len(f.Params) > f.Signature.Params().Len()
- x.handleFunc(v.Parent(), call)
- } else if debug && i != call.formatPos {
- // TODO: support this.
- fmt.Printf("WARNING:%s: format string passed to arg %d and %d\n",
- posString(&x.conf, call.Pkg(), call.Pos()),
- call.formatPos, i)
- }
- }
- }
- case *ssa.Alloc:
- if ref := v.Referrers(); ref == nil {
- for _, r := range *ref {
- values := []ssa.Value{}
- for _, o := range r.Operands(nil) {
- if o == nil || *o == v {
- continue
- }
- values = append(values, *o)
- }
- // TODO: return something different if we care about multiple
- // values as well.
- if len(values) == 1 {
- x.visitFormats(call, values[0])
- }
- }
- }
- // TODO:
- // case *ssa.Index:
- // // Get all values in the array if applicable
- // case *ssa.IndexAddr:
- // // Get all values in the slice or *array if applicable.
- // case *ssa.Lookup:
- // // Get all values in the map if applicable.
- case *ssa.FreeVar:
- // TODO: find the link between free variables and parameters:
- //
- // func freeVar(p *message.Printer, str string) {
- // fn := func(p *message.Printer) {
- // p.Printf(str)
- // }
- // fn(p)
- // }
- case *ssa.Call:
- case ssa.Instruction:
- rands := v.Operands(nil)
- if len(rands) == 1 && rands[0] != nil {
- x.visitFormats(call, *rands[0])
- }
- }
- }
- // Note: a function may have an argument marked as both format and passthrough.
- // visitArgs collects information on arguments. For wrapped functions it will
- // just determine the position of the variable args slice.
- func (x *extracter) visitArgs(fd *callData, v ssa.Value) {
- if v == nil {
- return
- }
- x.debug(v, "ARGV", v)
- switch v := v.(type) {
- case *ssa.Slice:
- if v.Low == nil && v.High == nil && v.Max == nil {
- x.visitArgs(fd, v.X)
- }
- case *ssa.Parameter:
- // TODO: handle the function for the index parameter.
- f := v.Parent()
- for i, p := range f.Params {
- if p == v {
- fd.argPos = i
- }
- }
- case *ssa.Alloc:
- if ref := v.Referrers(); ref == nil {
- for _, r := range *ref {
- values := []ssa.Value{}
- for _, o := range r.Operands(nil) {
- if o == nil || *o == v {
- continue
- }
- values = append(values, *o)
- }
- // TODO: return something different if we care about
- // multiple values as well.
- if len(values) == 1 {
- x.visitArgs(fd, values[0])
- }
- }
- }
- case ssa.Instruction:
- rands := v.Operands(nil)
- if len(rands) == 1 && rands[0] != nil {
- x.visitArgs(fd, *rands[0])
- }
- }
- }
- // print returns Go syntax for the specified node.
- func (x *extracter) print(n ast.Node) string {
- var buf bytes.Buffer
- format.Node(&buf, x.conf.Fset, n)
- return buf.String()
- }
- type packageExtracter struct {
- f *ast.File
- x *extracter
- info *loader.PackageInfo
- cmap ast.CommentMap
- }
- func (px packageExtracter) getComment(n ast.Node) string {
- cs := px.cmap.Filter(n).Comments()
- if len(cs) > 0 {
- return strings.TrimSpace(cs[0].Text())
- }
- return ""
- }
- func (x *extracter) extractMessages() {
- prog := x.iprog
- keys := make([]*types.Package, 0, len(x.iprog.AllPackages))
- for k := range x.iprog.AllPackages {
- keys = append(keys, k)
- }
- sort.Slice(keys, func(i, j int) bool { return keys[i].Path() < keys[j].Path() })
- files := []packageExtracter{}
- for _, k := range keys {
- info := x.iprog.AllPackages[k]
- for _, f := range info.Files {
- // Associate comments with nodes.
- px := packageExtracter{
- f, x, info,
- ast.NewCommentMap(prog.Fset, f, f.Comments),
- }
- files = append(files, px)
- }
- }
- for _, px := range files {
- ast.Inspect(px.f, func(n ast.Node) bool {
- switch v := n.(type) {
- case *ast.CallExpr:
- if d := x.funcs[v.Lparen]; d != nil {
- d.expr = v
- }
- }
- return true
- })
- }
- for _, px := range files {
- ast.Inspect(px.f, func(n ast.Node) bool {
- switch v := n.(type) {
- case *ast.CallExpr:
- return px.handleCall(v)
- case *ast.ValueSpec:
- return px.handleGlobal(v)
- }
- return true
- })
- }
- }
- func (px packageExtracter) handleGlobal(spec *ast.ValueSpec) bool {
- comment := px.getComment(spec)
- for _, ident := range spec.Names {
- data, ok := px.x.globals[ident.Pos()]
- if !ok {
- continue
- }
- name := ident.Name
- var arguments []argument
- if data.call != nil {
- arguments = px.getArguments(data.call)
- } else if !strings.HasPrefix(name, "msg") && !strings.HasPrefix(name, "Msg") {
- continue
- }
- data.visit(px.x, func(c constant.Value) {
- px.addMessage(spec.Pos(), []string{name}, c, comment, arguments)
- })
- }
- return true
- }
- func (px packageExtracter) handleCall(call *ast.CallExpr) bool {
- x := px.x
- data := x.funcs[call.Lparen]
- if data == nil || len(data.formats) == 0 {
- return true
- }
- if data.expr != call {
- panic("invariant `data.call != call` failed")
- }
- x.debug(data.call, "INSERT", data.formats)
- argn := data.callFormatPos()
- if argn >= len(call.Args) {
- return true
- }
- format := call.Args[argn]
- arguments := px.getArguments(data)
- comment := ""
- key := []string{}
- if ident, ok := format.(*ast.Ident); ok {
- key = append(key, ident.Name)
- if v, ok := ident.Obj.Decl.(*ast.ValueSpec); ok && v.Comment != nil {
- // TODO: get comment above ValueSpec as well
- comment = v.Comment.Text()
- }
- }
- if c := px.getComment(call.Args[0]); c != "" {
- comment = c
- }
- formats := data.formats
- for _, c := range formats {
- px.addMessage(call.Lparen, key, c, comment, arguments)
- }
- return true
- }
- func (px packageExtracter) getArguments(data *callData) []argument {
- arguments := []argument{}
- x := px.x
- info := px.info
- if data.callArgsStart() >= 0 {
- args := data.expr.Args[data.callArgsStart():]
- for i, arg := range args {
- expr := x.print(arg)
- val := ""
- if v := info.Types[arg].Value; v != nil {
- val = v.ExactString()
- switch arg.(type) {
- case *ast.BinaryExpr, *ast.UnaryExpr:
- expr = val
- }
- }
- arguments = append(arguments, argument{
- ArgNum: i + 1,
- Type: info.Types[arg].Type.String(),
- UnderlyingType: info.Types[arg].Type.Underlying().String(),
- Expr: expr,
- Value: val,
- Comment: px.getComment(arg),
- Position: posString(&x.conf, info.Pkg, arg.Pos()),
- // TODO report whether it implements
- // interfaces plural.Interface,
- // gender.Interface.
- })
- }
- }
- return arguments
- }
- func (px packageExtracter) addMessage(
- pos token.Pos,
- key []string,
- c constant.Value,
- comment string,
- arguments []argument) {
- x := px.x
- fmtMsg := constant.StringVal(c)
- ph := placeholders{index: map[string]string{}}
- trimmed, _, _ := trimWS(fmtMsg)
- p := fmtparser.Parser{}
- simArgs := make([]interface{}, len(arguments))
- for i, v := range arguments {
- simArgs[i] = v
- }
- msg := ""
- p.Reset(simArgs)
- for p.SetFormat(trimmed); p.Scan(); {
- name := ""
- var arg *argument
- switch p.Status {
- case fmtparser.StatusText:
- msg += p.Text()
- continue
- case fmtparser.StatusSubstitution,
- fmtparser.StatusBadWidthSubstitution,
- fmtparser.StatusBadPrecSubstitution:
- arguments[p.ArgNum-1].used = true
- arg = &arguments[p.ArgNum-1]
- name = getID(arg)
- case fmtparser.StatusBadArgNum, fmtparser.StatusMissingArg:
- arg = &argument{
- ArgNum: p.ArgNum,
- Position: posString(&x.conf, px.info.Pkg, pos),
- }
- name, arg.UnderlyingType = verbToPlaceholder(p.Text(), p.ArgNum)
- }
- sub := p.Text()
- if !p.HasIndex {
- r, sz := utf8.DecodeLastRuneInString(sub)
- sub = fmt.Sprintf("%s[%d]%c", sub[:len(sub)-sz], p.ArgNum, r)
- }
- msg += fmt.Sprintf("{%s}", ph.addArg(arg, name, sub))
- }
- key = append(key, msg)
- // Add additional Placeholders that can be used in translations
- // that are not present in the string.
- for _, arg := range arguments {
- if arg.used {
- continue
- }
- ph.addArg(&arg, getID(&arg), fmt.Sprintf("%%[%d]v", arg.ArgNum))
- }
- x.messages = append(x.messages, Message{
- ID: key,
- Key: fmtMsg,
- Message: Text{Msg: msg},
- // TODO(fix): this doesn't get the before comment.
- Comment: comment,
- Placeholders: ph.slice,
- Position: posString(&x.conf, px.info.Pkg, pos),
- })
- }
- func posString(conf *loader.Config, pkg *types.Package, pos token.Pos) string {
- p := conf.Fset.Position(pos)
- file := fmt.Sprintf("%s:%d:%d", filepath.Base(p.Filename), p.Line, p.Column)
- return filepath.Join(pkg.Path(), file)
- }
- func getID(arg *argument) string {
- s := getLastComponent(arg.Expr)
- s = strip(s)
- s = strings.Replace(s, " ", "", -1)
- // For small variable names, use user-defined types for more info.
- if len(s) <= 2 && arg.UnderlyingType != arg.Type {
- s = getLastComponent(arg.Type)
- }
- return strings.Title(s)
- }
- // strip is a dirty hack to convert function calls to placeholder IDs.
- func strip(s string) string {
- s = strings.Map(func(r rune) rune {
- if unicode.IsSpace(r) || r == '-' {
- return '_'
- }
- if !unicode.In(r, unicode.Letter, unicode.Mark, unicode.Number) {
- return -1
- }
- return r
- }, s)
- // Strip "Get" from getter functions.
- if strings.HasPrefix(s, "Get") || strings.HasPrefix(s, "get") {
- if len(s) > len("get") {
- r, _ := utf8.DecodeRuneInString(s)
- if !unicode.In(r, unicode.Ll, unicode.M) { // not lower or mark
- s = s[len("get"):]
- }
- }
- }
- return s
- }
- // verbToPlaceholder gives a name for a placeholder based on the substitution
- // verb. This is only to be used if there is otherwise no other type information
- // available.
- func verbToPlaceholder(sub string, pos int) (name, underlying string) {
- r, _ := utf8.DecodeLastRuneInString(sub)
- name = fmt.Sprintf("Arg_%d", pos)
- switch r {
- case 's', 'q':
- underlying = "string"
- case 'd':
- name = "Integer"
- underlying = "int"
- case 'e', 'f', 'g':
- name = "Number"
- underlying = "float64"
- case 'm':
- name = "Message"
- underlying = "string"
- default:
- underlying = "interface{}"
- }
- return name, underlying
- }
- type placeholders struct {
- index map[string]string
- slice []Placeholder
- }
- func (p *placeholders) addArg(arg *argument, name, sub string) (id string) {
- id = name
- alt, ok := p.index[id]
- for i := 1; ok && alt != sub; i++ {
- id = fmt.Sprintf("%s_%d", name, i)
- alt, ok = p.index[id]
- }
- p.index[id] = sub
- p.slice = append(p.slice, Placeholder{
- ID: id,
- String: sub,
- Type: arg.Type,
- UnderlyingType: arg.UnderlyingType,
- ArgNum: arg.ArgNum,
- Expr: arg.Expr,
- Comment: arg.Comment,
- })
- return id
- }
- func getLastComponent(s string) string {
- return s[1+strings.LastIndexByte(s, '.'):]
- }
- // isMsg returns whether s should be translated.
- func isMsg(s string) bool {
- // TODO: parse as format string and omit strings that contain letters
- // coming from format verbs.
- for _, r := range s {
- if unicode.In(r, unicode.L) {
- return true
- }
- }
- return false
- }
|