123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159 |
- package urn
- import (
- "fmt"
- )
- var (
- errPrefix = "expecting the prefix to be the \"urn\" string (whatever case) [col %d]"
- errIdentifier = "expecting the identifier to be string (1..31 alnum chars, also containing dashes but not at its start) [col %d]"
- errSpecificString = "expecting the specific string to be a string containing alnum, hex, or others ([()+,-.:=@;$_!*']) chars [col %d]"
- errNoUrnWithinID = "expecting the identifier to not contain the \"urn\" reserved string [col %d]"
- errHex = "expecting the specific string hex chars to be well-formed (%%alnum{2}) [col %d]"
- errParse = "parsing error [col %d]"
- )
- %%{
- machine urn;
- # unsigned alphabet
- alphtype uint8;
- action mark {
- m.pb = m.p
- }
- action tolower {
- m.tolower = append(m.tolower, m.p - m.pb)
- }
- action set_pre {
- output.prefix = string(m.text())
- }
- action set_nid {
- output.ID = string(m.text())
- }
- action set_nss {
- raw := m.text()
- output.SS = string(raw)
- // Iterate upper letters lowering them
- for _, i := range m.tolower {
- raw[i] = raw[i] + 32
- }
- output.norm = string(raw)
- }
- action err_pre {
- m.err = fmt.Errorf(errPrefix, m.p)
- fhold;
- fgoto fail;
- }
- action err_nid {
- m.err = fmt.Errorf(errIdentifier, m.p)
- fhold;
- fgoto fail;
- }
- action err_nss {
- m.err = fmt.Errorf(errSpecificString, m.p)
- fhold;
- fgoto fail;
- }
- action err_urn {
- m.err = fmt.Errorf(errNoUrnWithinID, m.p)
- fhold;
- fgoto fail;
- }
- action err_hex {
- m.err = fmt.Errorf(errHex, m.p)
- fhold;
- fgoto fail;
- }
- action err_parse {
- m.err = fmt.Errorf(errParse, m.p)
- fhold;
- fgoto fail;
- }
- pre = ([uU][rR][nN] @err(err_pre)) >mark %set_pre;
- nid = (alnum >mark (alnum | '-'){0,31}) %set_nid;
- hex = '%' (digit | lower | upper >tolower){2} $err(err_hex);
- sss = (alnum | [()+,\-.:=@;$_!*']);
- nss = (sss | hex)+ $err(err_nss);
- fail := (any - [\n\r])* @err{ fgoto main; };
- main := (pre ':' (nid - pre %err(err_urn)) $err(err_nid) ':' nss >mark %set_nss) $err(err_parse);
- }%%
- %% write data noerror noprefix;
- // Machine is the interface representing the FSM
- type Machine interface {
- Error() error
- Parse(input []byte) (*URN, error)
- }
- type machine struct {
- data []byte
- cs int
- p, pe, eof, pb int
- err error
- tolower []int
- }
- // NewMachine creates a new FSM able to parse RFC 2141 strings.
- func NewMachine() Machine {
- m := &machine{}
- %% access m.;
- %% variable p m.p;
- %% variable pe m.pe;
- %% variable eof m.eof;
- %% variable data m.data;
- return m
- }
- // Err returns the error that occurred on the last call to Parse.
- //
- // If the result is nil, then the line was parsed successfully.
- func (m *machine) Error() error {
- return m.err
- }
- func (m *machine) text() []byte {
- return m.data[m.pb:m.p]
- }
- // Parse parses the input byte array as a RFC 2141 string.
- func (m *machine) Parse(input []byte) (*URN, error) {
- m.data = input
- m.p = 0
- m.pb = 0
- m.pe = len(input)
- m.eof = len(input)
- m.err = nil
- m.tolower = []int{}
- output := &URN{}
- %% write init;
- %% write exec;
- if m.cs < first_final || m.cs == en_fail {
- return nil, m.err
- }
- return output, nil
- }
|