machine.go.rl 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159
  1. package urn
  2. import (
  3. "fmt"
  4. )
  5. var (
  6. errPrefix = "expecting the prefix to be the \"urn\" string (whatever case) [col %d]"
  7. errIdentifier = "expecting the identifier to be string (1..31 alnum chars, also containing dashes but not at its start) [col %d]"
  8. errSpecificString = "expecting the specific string to be a string containing alnum, hex, or others ([()+,-.:=@;$_!*']) chars [col %d]"
  9. errNoUrnWithinID = "expecting the identifier to not contain the \"urn\" reserved string [col %d]"
  10. errHex = "expecting the specific string hex chars to be well-formed (%%alnum{2}) [col %d]"
  11. errParse = "parsing error [col %d]"
  12. )
  13. %%{
  14. machine urn;
  15. # unsigned alphabet
  16. alphtype uint8;
  17. action mark {
  18. m.pb = m.p
  19. }
  20. action tolower {
  21. m.tolower = append(m.tolower, m.p - m.pb)
  22. }
  23. action set_pre {
  24. output.prefix = string(m.text())
  25. }
  26. action set_nid {
  27. output.ID = string(m.text())
  28. }
  29. action set_nss {
  30. raw := m.text()
  31. output.SS = string(raw)
  32. // Iterate upper letters lowering them
  33. for _, i := range m.tolower {
  34. raw[i] = raw[i] + 32
  35. }
  36. output.norm = string(raw)
  37. }
  38. action err_pre {
  39. m.err = fmt.Errorf(errPrefix, m.p)
  40. fhold;
  41. fgoto fail;
  42. }
  43. action err_nid {
  44. m.err = fmt.Errorf(errIdentifier, m.p)
  45. fhold;
  46. fgoto fail;
  47. }
  48. action err_nss {
  49. m.err = fmt.Errorf(errSpecificString, m.p)
  50. fhold;
  51. fgoto fail;
  52. }
  53. action err_urn {
  54. m.err = fmt.Errorf(errNoUrnWithinID, m.p)
  55. fhold;
  56. fgoto fail;
  57. }
  58. action err_hex {
  59. m.err = fmt.Errorf(errHex, m.p)
  60. fhold;
  61. fgoto fail;
  62. }
  63. action err_parse {
  64. m.err = fmt.Errorf(errParse, m.p)
  65. fhold;
  66. fgoto fail;
  67. }
  68. pre = ([uU][rR][nN] @err(err_pre)) >mark %set_pre;
  69. nid = (alnum >mark (alnum | '-'){0,31}) %set_nid;
  70. hex = '%' (digit | lower | upper >tolower){2} $err(err_hex);
  71. sss = (alnum | [()+,\-.:=@;$_!*']);
  72. nss = (sss | hex)+ $err(err_nss);
  73. fail := (any - [\n\r])* @err{ fgoto main; };
  74. main := (pre ':' (nid - pre %err(err_urn)) $err(err_nid) ':' nss >mark %set_nss) $err(err_parse);
  75. }%%
  76. %% write data noerror noprefix;
  77. // Machine is the interface representing the FSM
  78. type Machine interface {
  79. Error() error
  80. Parse(input []byte) (*URN, error)
  81. }
  82. type machine struct {
  83. data []byte
  84. cs int
  85. p, pe, eof, pb int
  86. err error
  87. tolower []int
  88. }
  89. // NewMachine creates a new FSM able to parse RFC 2141 strings.
  90. func NewMachine() Machine {
  91. m := &machine{}
  92. %% access m.;
  93. %% variable p m.p;
  94. %% variable pe m.pe;
  95. %% variable eof m.eof;
  96. %% variable data m.data;
  97. return m
  98. }
  99. // Err returns the error that occurred on the last call to Parse.
  100. //
  101. // If the result is nil, then the line was parsed successfully.
  102. func (m *machine) Error() error {
  103. return m.err
  104. }
  105. func (m *machine) text() []byte {
  106. return m.data[m.pb:m.p]
  107. }
  108. // Parse parses the input byte array as a RFC 2141 string.
  109. func (m *machine) Parse(input []byte) (*URN, error) {
  110. m.data = input
  111. m.p = 0
  112. m.pb = 0
  113. m.pe = len(input)
  114. m.eof = len(input)
  115. m.err = nil
  116. m.tolower = []int{}
  117. output := &URN{}
  118. %% write init;
  119. %% write exec;
  120. if m.cs < first_final || m.cs == en_fail {
  121. return nil, m.err
  122. }
  123. return output, nil
  124. }