-module(neotoma_parse). -export([parse/1,file/1]). -define(p_anything,true). -define(p_charclass,true). -define(p_choose,true). -define(p_label,true). -define(p_not,true). -define(p_one_or_more,true). -define(p_optional,true). -define(p_scan,true). -define(p_seq,true). -define(p_string,true). -define(p_zero_or_more,true). % insert escapes into a string -spec escape_string(string()) -> string(). escape_string(String) -> escape_string(String, []). -spec escape_string(string(), string()) -> string(). escape_string([], Output) -> lists:reverse(Output); escape_string([H|T], Output) -> escape_string(T, case H of $/ -> [$/,$\\|Output]; $\" -> [$\",$\\|Output]; % " comment inserted to help some editors with highlighting the generated parser $\' -> [$\',$\\|Output]; % ' comment inserted to help some editors with highlighting the generated parser $\b -> [$b,$\\|Output]; $\d -> [$d,$\\|Output]; $\e -> [$e,$\\|Output]; $\f -> [$f,$\\|Output]; $\n -> [$n,$\\|Output]; $\r -> [$r,$\\|Output]; $\s -> [$s,$\\|Output]; $\t -> [$t,$\\|Output]; $\v -> [$v,$\\|Output]; _ -> [H|Output] end). -spec add_lhs(binary(), index()) -> true. add_lhs(Symbol, Index) -> case ets:lookup(memo_table_name(), lhs) of [] -> ets:insert(memo_table_name(), {lhs, [{Symbol,Index}]}); [{lhs, L}] when is_list(L) -> ets:insert(memo_table_name(), {lhs, [{Symbol,Index}|L]}) end. -spec add_nt(binary(), index()) -> true | ok. add_nt(Symbol, Index) -> case ets:lookup(memo_table_name(), nts) of [] -> ets:insert(memo_table_name(), {nts, [{Symbol,Index}]}); [{nts, L}] when is_list(L) -> case proplists:is_defined(Symbol, L) of true -> ok; _ -> ets:insert(memo_table_name(), {nts, [{Symbol,Index}|L]}) end end. -spec verify_rules() -> ok | no_return(). verify_rules() -> [{lhs, LHS}] = ets:lookup(memo_table_name(), lhs), [{nts, NTs}] = ets:lookup(memo_table_name(), nts), [Root|NonRoots] = lists:reverse(LHS), lists:foreach(fun({Sym,Idx}) -> case proplists:is_defined(Sym, NTs) of true -> ok; _ -> io:format("neotoma warning: rule '~s' is unused. ~p~n", [Sym,Idx]) end end, NonRoots), lists:foreach(fun({S,I}) -> case proplists:is_defined(S, LHS) of true -> ok; _ -> io:format("neotoma error: nonterminal '~s' has no reduction. (found at ~p) No parser will be generated!~n", [S,I]), exit({neotoma, {no_reduction, list_to_atom(binary_to_list(S))}}) end end, NTs), Root. -spec used_combinator(atom()) -> true. used_combinator(C) -> case ets:lookup(memo_table_name(), combinators) of [] -> ets:insert(memo_table_name(), {combinators, ordsets:from_list([C])}); [{combinators, Cs}] -> ets:insert(memo_table_name(), {combinators, ordsets:add_element(C, Cs)}) end. -spec used_transform_variables(binary()) -> [ 'Node' | 'Idx' ]. used_transform_variables(Transform) -> Code = unicode:characters_to_list(Transform), {ok, Tokens, _} = erl_scan:string(Code), used_transform_variables(Tokens, []). used_transform_variables([{var, _, Name}|Tokens], Acc) -> used_transform_variables(Tokens, case Name of 'Node' -> [Name | Acc]; 'Idx' -> [Name | Acc]; _ -> Acc end); used_transform_variables([_|Tokens], Acc) -> used_transform_variables(Tokens, Acc); used_transform_variables([], Acc) -> lists:usort(Acc). -spec file(file:name()) -> any(). file(Filename) -> case file:read_file(Filename) of {ok,Bin} -> parse(Bin); Err -> Err end. -spec parse(binary() | list()) -> any(). parse(List) when is_list(List) -> parse(unicode:characters_to_binary(List)); parse(Input) when is_binary(Input) -> _ = setup_memo(), Result = case 'rules'(Input,{{line,1},{column,1}}) of {AST, <<>>, _Index} -> AST; Any -> Any end, release_memo(), Result. -spec 'rules'(input(), index()) -> parse_result(). 'rules'(Input, Index) -> p(Input, Index, 'rules', fun(I,D) -> (p_seq([p_optional(fun 'space'/2), fun 'declaration_sequence'/2, p_optional(fun 'space'/2), p_optional(fun 'code_block'/2), p_optional(fun 'space'/2)]))(I,D) end, fun(Node, _Idx) -> RootRule = verify_rules(), Rules = unicode:characters_to_binary(lists:map(fun(R) -> [R, "\n\n"] end, lists:nth(2, Node))), Code = case lists:nth(4, Node) of {code, Block} -> Block; _ -> [] end, [{rules, Rules}, {code, Code}, {root, RootRule}, {transform, ets:lookup(memo_table_name(),gen_transform)}, {combinators, ets:lookup_element(memo_table_name(), combinators, 2)}] end). -spec 'declaration_sequence'(input(), index()) -> parse_result(). 'declaration_sequence'(Input, Index) -> p(Input, Index, 'declaration_sequence', fun(I,D) -> (p_seq([p_label('head', fun 'declaration'/2), p_label('tail', p_zero_or_more(p_seq([fun 'space'/2, fun 'declaration'/2])))]))(I,D) end, fun(Node, _Idx) -> FirstRule = proplists:get_value(head, Node), OtherRules = [I || [_,I] <- proplists:get_value(tail, Node, [])], [FirstRule|OtherRules] end). -spec 'declaration'(input(), index()) -> parse_result(). 'declaration'(Input, Index) -> p(Input, Index, 'declaration', fun(I,D) -> (p_seq([fun 'nonterminal'/2, p_zero_or_more(fun 'space'/2), p_string(<<"<-">>), p_zero_or_more(fun 'space'/2), fun 'parsing_expression'/2, p_optional(fun 'space'/2), p_optional(fun 'code_block'/2), p_optional(fun 'space'/2), p_string(<<";">>)]))(I,D) end, fun(Node, _Idx) -> [{nonterminal,Symbol}|Tail] = Node, add_lhs(Symbol, Index), Transform = case lists:nth(6,Tail) of {code, CodeBlock} -> CodeBlock; _ -> ets:insert_new(memo_table_name(),{gen_transform, true}), ["transform('",Symbol,"', Node, Idx)"] end, TransformArgs = case used_transform_variables(Transform) of [] -> "_Node, _Idx"; ['Idx'] -> "_Node, Idx"; ['Node'] -> "Node, _Idx"; ['Idx', 'Node'] -> "Node, Idx" end, ["-spec '", Symbol, "'(input(), index()) -> parse_result().\n", "'",Symbol,"'","(Input, Index) ->\n ", "p(Input, Index, '",Symbol,"', fun(I,D) -> (", lists:nth(4, Tail), ")(I,D) end, fun(", TransformArgs, ") ->",Transform," end)."] end). -spec 'parsing_expression'(input(), index()) -> parse_result(). 'parsing_expression'(Input, Index) -> p(Input, Index, 'parsing_expression', fun(I,D) -> (p_choose([fun 'choice'/2, fun 'sequence'/2, fun 'primary'/2]))(I,D) end, fun(Node, _Idx) ->Node end). -spec 'choice'(input(), index()) -> parse_result(). 'choice'(Input, Index) -> p(Input, Index, 'choice', fun(I,D) -> (p_seq([p_label('head', fun 'alternative'/2), p_label('tail', p_one_or_more(p_seq([fun 'space'/2, p_string(<<"\/">>), fun 'space'/2, fun 'alternative'/2])))]))(I,D) end, fun(Node, _Idx) -> Tail = [lists:last(S) || S <- proplists:get_value(tail, Node)], Head = proplists:get_value(head, Node), Statements = [[", ", TS] || TS <- Tail], used_combinator(p_choose), ["p_choose([", Head, Statements, "])"] end). -spec 'alternative'(input(), index()) -> parse_result(). 'alternative'(Input, Index) -> p(Input, Index, 'alternative', fun(I,D) -> (p_choose([fun 'sequence'/2, fun 'labeled_primary'/2]))(I,D) end, fun(Node, _Idx) ->Node end). -spec 'primary'(input(), index()) -> parse_result(). 'primary'(Input, Index) -> p(Input, Index, 'primary', fun(I,D) -> (p_choose([p_seq([fun 'prefix'/2, fun 'atomic'/2]), p_seq([fun 'atomic'/2, fun 'suffix'/2]), fun 'atomic'/2]))(I,D) end, fun(Node, _Idx) -> case Node of [Atomic, one_or_more] -> used_combinator(p_one_or_more), used_combinator(p_scan), ["p_one_or_more(", Atomic, ")"]; [Atomic, zero_or_more] -> used_combinator(p_zero_or_more), used_combinator(p_scan), ["p_zero_or_more(", Atomic, ")"]; [Atomic, optional] -> used_combinator(p_optional), ["p_optional(", Atomic, ")"]; [assert, Atomic] -> used_combinator(p_assert), ["p_assert(", Atomic, ")"]; [not_, Atomic] -> used_combinator(p_not), ["p_not(", Atomic, ")"]; _ -> Node end end). -spec 'sequence'(input(), index()) -> parse_result(). 'sequence'(Input, Index) -> p(Input, Index, 'sequence', fun(I,D) -> (p_seq([p_label('head', fun 'labeled_primary'/2), p_label('tail', p_one_or_more(p_seq([fun 'space'/2, fun 'labeled_primary'/2])))]))(I,D) end, fun(Node, _Idx) -> Tail = [lists:nth(2, S) || S <- proplists:get_value(tail, Node)], Head = proplists:get_value(head, Node), Statements = [[", ", TS] || TS <- Tail], used_combinator(p_seq), ["p_seq([", Head, Statements, "])"] end). -spec 'labeled_primary'(input(), index()) -> parse_result(). 'labeled_primary'(Input, Index) -> p(Input, Index, 'labeled_primary', fun(I,D) -> (p_seq([p_optional(fun 'label'/2), fun 'primary'/2]))(I,D) end, fun(Node, _Idx) -> case hd(Node) of [] -> lists:nth(2, Node); Label -> used_combinator(p_label), ["p_label('", Label, "', ", lists:nth(2, Node), ")"] end end). -spec 'label'(input(), index()) -> parse_result(). 'label'(Input, Index) -> p(Input, Index, 'label', fun(I,D) -> (p_seq([fun 'alpha_char'/2, p_zero_or_more(fun 'alphanumeric_char'/2), p_string(<<":">>)]))(I,D) end, fun(Node, _Idx) -> lists:sublist(Node, length(Node)-1) end). -spec 'suffix'(input(), index()) -> parse_result(). 'suffix'(Input, Index) -> p(Input, Index, 'suffix', fun(I,D) -> (p_choose([fun 'repetition_suffix'/2, fun 'optional_suffix'/2]))(I,D) end, fun(Node, _Idx) -> case Node of <<"*">> -> zero_or_more; <<"+">> -> one_or_more; <<"?">> -> optional end end). -spec 'optional_suffix'(input(), index()) -> parse_result(). 'optional_suffix'(Input, Index) -> p(Input, Index, 'optional_suffix', fun(I,D) -> (p_string(<<"?">>))(I,D) end, fun(Node, _Idx) ->Node end). -spec 'repetition_suffix'(input(), index()) -> parse_result(). 'repetition_suffix'(Input, Index) -> p(Input, Index, 'repetition_suffix', fun(I,D) -> (p_choose([p_string(<<"+">>), p_string(<<"*">>)]))(I,D) end, fun(Node, _Idx) ->Node end). -spec 'prefix'(input(), index()) -> parse_result(). 'prefix'(Input, Index) -> p(Input, Index, 'prefix', fun(I,D) -> (p_choose([p_string(<<"&">>), p_string(<<"!">>)]))(I,D) end, fun(Node, _Idx) -> case Node of <<"&">> -> assert; <<"!">> -> not_ end end). -spec 'atomic'(input(), index()) -> parse_result(). 'atomic'(Input, Index) -> p(Input, Index, 'atomic', fun(I,D) -> (p_choose([fun 'terminal'/2, fun 'nonterminal'/2, fun 'parenthesized_expression'/2]))(I,D) end, fun(Node, _Idx) -> case Node of {nonterminal, Symbol} -> [<<"fun '">>, Symbol, <<"'/2">>]; _ -> Node end end). -spec 'parenthesized_expression'(input(), index()) -> parse_result(). 'parenthesized_expression'(Input, Index) -> p(Input, Index, 'parenthesized_expression', fun(I,D) -> (p_seq([p_string(<<"(">>), p_optional(fun 'space'/2), fun 'parsing_expression'/2, p_optional(fun 'space'/2), p_string(<<")">>)]))(I,D) end, fun(Node, _Idx) ->lists:nth(3, Node) end). -spec 'nonterminal'(input(), index()) -> parse_result(). 'nonterminal'(Input, Index) -> p(Input, Index, 'nonterminal', fun(I,D) -> (p_seq([fun 'alpha_char'/2, p_zero_or_more(fun 'alphanumeric_char'/2)]))(I,D) end, fun(Node, Idx) -> Symbol = unicode:characters_to_binary(Node), add_nt(Symbol, Idx), {nonterminal, Symbol} end). -spec 'terminal'(input(), index()) -> parse_result(). 'terminal'(Input, Index) -> p(Input, Index, 'terminal', fun(I,D) -> (p_choose([fun 'regexp_string'/2, fun 'quoted_string'/2, fun 'character_class'/2, fun 'anything_symbol'/2]))(I,D) end, fun(Node, _Idx) ->Node end). -spec 'regexp_string'(input(), index()) -> parse_result(). 'regexp_string'(Input, Index) -> p(Input, Index, 'regexp_string', fun(I,D) -> (p_seq([p_string(<<"#">>), p_label('string', p_one_or_more(p_seq([p_not(p_string(<<"#">>)), p_choose([p_string(<<"\\#">>), p_anything()])]))), p_string(<<"#">>)]))(I,D) end, fun(Node, _Idx) -> used_combinator(p_regexp), ["p_regexp(<<\"", % Escape \ and " as they are used in erlang string. Other sumbol stay as is. % \ -> \\ % " -> \" re:replace(proplists:get_value(string, Node), "\"|\\\\", "\\\\&", [{return, binary}, global]), "\">>)"] end). -spec 'quoted_string'(input(), index()) -> parse_result(). 'quoted_string'(Input, Index) -> p(Input, Index, 'quoted_string', fun(I,D) -> (p_choose([fun 'single_quoted_string'/2, fun 'double_quoted_string'/2]))(I,D) end, fun(Node, _Idx) -> used_combinator(p_string), lists:flatten(["p_string(<<\"", escape_string(unicode:characters_to_list(proplists:get_value(string, Node))), "\">>)"]) end). -spec 'double_quoted_string'(input(), index()) -> parse_result(). 'double_quoted_string'(Input, Index) -> p(Input, Index, 'double_quoted_string', fun(I,D) -> (p_seq([p_string(<<"\"">>), p_label('string', p_zero_or_more(p_seq([p_not(p_string(<<"\"">>)), p_choose([p_string(<<"\\\\">>), p_string(<<"\\\"">>), p_anything()])]))), p_string(<<"\"">>)]))(I,D) end, fun(Node, _Idx) ->Node end). -spec 'single_quoted_string'(input(), index()) -> parse_result(). 'single_quoted_string'(Input, Index) -> p(Input, Index, 'single_quoted_string', fun(I,D) -> (p_seq([p_string(<<"\'">>), p_label('string', p_zero_or_more(p_seq([p_not(p_string(<<"\'">>)), p_choose([p_string(<<"\\\\">>), p_string(<<"\\\'">>), p_anything()])]))), p_string(<<"\'">>)]))(I,D) end, fun(Node, _Idx) ->Node end). -spec 'character_class'(input(), index()) -> parse_result(). 'character_class'(Input, Index) -> p(Input, Index, 'character_class', fun(I,D) -> (p_seq([p_string(<<"[">>), p_label('characters', p_one_or_more(p_seq([p_not(p_string(<<"]">>)), p_choose([p_seq([p_string(<<"\\\\">>), p_anything()]), p_seq([p_not(p_string(<<"\\\\">>)), p_anything()])])]))), p_string(<<"]">>)]))(I,D) end, fun(Node, _Idx) -> used_combinator(p_charclass), ["p_charclass(<<\"[", escape_string(unicode:characters_to_list(proplists:get_value(characters, Node))), "]\">>)"] end). -spec 'anything_symbol'(input(), index()) -> parse_result(). 'anything_symbol'(Input, Index) -> p(Input, Index, 'anything_symbol', fun(I,D) -> (p_string(<<".">>))(I,D) end, fun(_Node, _Idx) -> used_combinator(p_anything), <<"p_anything()">> end). -spec 'alpha_char'(input(), index()) -> parse_result(). 'alpha_char'(Input, Index) -> p(Input, Index, 'alpha_char', fun(I,D) -> (p_charclass(<<"[A-Za-z_]">>))(I,D) end, fun(Node, _Idx) ->Node end). -spec 'alphanumeric_char'(input(), index()) -> parse_result(). 'alphanumeric_char'(Input, Index) -> p(Input, Index, 'alphanumeric_char', fun(I,D) -> (p_choose([fun 'alpha_char'/2, p_charclass(<<"[0-9]">>)]))(I,D) end, fun(Node, _Idx) ->Node end). -spec 'space'(input(), index()) -> parse_result(). 'space'(Input, Index) -> p(Input, Index, 'space', fun(I,D) -> (p_one_or_more(p_choose([fun 'white'/2, fun 'comment_to_eol'/2])))(I,D) end, fun(Node, _Idx) ->Node end). -spec 'comment_to_eol'(input(), index()) -> parse_result(). 'comment_to_eol'(Input, Index) -> p(Input, Index, 'comment_to_eol', fun(I,D) -> (p_seq([p_not(p_string(<<"%{">>)), p_string(<<"%">>), p_zero_or_more(p_seq([p_not(p_string(<<"\n">>)), p_anything()]))]))(I,D) end, fun(Node, _Idx) ->Node end). -spec 'white'(input(), index()) -> parse_result(). 'white'(Input, Index) -> p(Input, Index, 'white', fun(I,D) -> (p_charclass(<<"[\s\t\n\r]">>))(I,D) end, fun(Node, _Idx) ->Node end). -spec 'code_block'(input(), index()) -> parse_result(). 'code_block'(Input, Index) -> p(Input, Index, 'code_block', fun(I,D) -> (p_choose([p_seq([p_string(<<"%{">>), p_label('code', p_one_or_more(p_choose([p_string(<<"\\%">>), p_string(<<"$%">>), p_seq([p_not(p_string(<<"%}">>)), p_anything()])]))), p_string(<<"%}">>)]), p_seq([p_string(<<"`">>), p_label('code', p_one_or_more(p_choose([p_string(<<"\\`">>), p_string(<<"$`">>), p_seq([p_not(p_string(<<"`">>)), p_anything()])]))), p_string(<<"`">>)]), p_string(<<"~">>)]))(I,D) end, fun(Node, _Idx) -> case Node of <<"~">> -> {code, <<"Node">>}; _ -> {code, proplists:get_value('code', Node)} end end). -file("peg_includes.hrl", 1). -type index() :: {{line, pos_integer()}, {column, pos_integer()}}. -type input() :: binary(). -type parse_failure() :: {fail, term()}. -type parse_success() :: {term(), input(), index()}. -type parse_result() :: parse_failure() | parse_success(). -type parse_fun() :: fun((input(), index()) -> parse_result()). -type xform_fun() :: fun((input(), index()) -> term()). -spec p(input(), index(), atom(), parse_fun(), xform_fun()) -> parse_result(). p(Inp, StartIndex, Name, ParseFun, TransformFun) -> case get_memo(StartIndex, Name) of % See if the current reduction is memoized {ok, Memo} -> %Memo; % If it is, return the stored result Memo; _ -> % If not, attempt to parse Result = case ParseFun(Inp, StartIndex) of {fail,_} = Failure -> % If it fails, memoize the failure Failure; {Match, InpRem, NewIndex} -> % If it passes, transform and memoize the result. Transformed = TransformFun(Match, StartIndex), {Transformed, InpRem, NewIndex} end, memoize(StartIndex, Name, Result), Result end. -spec setup_memo() -> ets:tid(). setup_memo() -> put({parse_memo_table, ?MODULE}, ets:new(?MODULE, [set])). -spec release_memo() -> true. release_memo() -> ets:delete(memo_table_name()). -spec memoize(index(), atom(), parse_result()) -> true. memoize(Index, Name, Result) -> Memo = case ets:lookup(memo_table_name(), Index) of [] -> []; [{Index, Plist}] -> Plist end, ets:insert(memo_table_name(), {Index, [{Name, Result}|Memo]}). -spec get_memo(index(), atom()) -> {ok, term()} | {error, not_found}. get_memo(Index, Name) -> case ets:lookup(memo_table_name(), Index) of [] -> {error, not_found}; [{Index, Plist}] -> case proplists:lookup(Name, Plist) of {Name, Result} -> {ok, Result}; _ -> {error, not_found} end end. -spec memo_table_name() -> ets:tid(). memo_table_name() -> get({parse_memo_table, ?MODULE}). -ifdef(p_eof). -spec p_eof() -> parse_fun(). p_eof() -> fun(<<>>, Index) -> {eof, [], Index}; (_, Index) -> {fail, {expected, eof, Index}} end. -endif. -ifdef(p_optional). -spec p_optional(parse_fun()) -> parse_fun(). p_optional(P) -> fun(Input, Index) -> case P(Input, Index) of {fail,_} -> {[], Input, Index}; {_, _, _} = Success -> Success end end. -endif. -ifdef(p_not). -spec p_not(parse_fun()) -> parse_fun(). p_not(P) -> fun(Input, Index)-> case P(Input,Index) of {fail,_} -> {[], Input, Index}; {Result, _, _} -> {fail, {expected, {no_match, Result},Index}} end end. -endif. -ifdef(p_assert). -spec p_assert(parse_fun()) -> parse_fun(). p_assert(P) -> fun(Input,Index) -> case P(Input,Index) of {fail,_} = Failure-> Failure; _ -> {[], Input, Index} end end. -endif. -ifdef(p_seq). -spec p_seq([parse_fun()]) -> parse_fun(). p_seq(P) -> fun(Input, Index) -> p_all(P, Input, Index, []) end. -spec p_all([parse_fun()], input(), index(), [term()]) -> parse_result(). p_all([], Inp, Index, Accum ) -> {lists:reverse( Accum ), Inp, Index}; p_all([P|Parsers], Inp, Index, Accum) -> case P(Inp, Index) of {fail, _} = Failure -> Failure; {Result, InpRem, NewIndex} -> p_all(Parsers, InpRem, NewIndex, [Result|Accum]) end. -endif. -ifdef(p_choose). -spec p_choose([parse_fun()]) -> parse_fun(). p_choose(Parsers) -> fun(Input, Index) -> p_attempt(Parsers, Input, Index, none) end. -spec p_attempt([parse_fun()], input(), index(), none | parse_failure()) -> parse_result(). p_attempt([], _Input, _Index, Failure) -> Failure; p_attempt([P|Parsers], Input, Index, FirstFailure)-> case P(Input, Index) of {fail, _} = Failure -> case FirstFailure of none -> p_attempt(Parsers, Input, Index, Failure); _ -> p_attempt(Parsers, Input, Index, FirstFailure) end; Result -> Result end. -endif. -ifdef(p_zero_or_more). -spec p_zero_or_more(parse_fun()) -> parse_fun(). p_zero_or_more(P) -> fun(Input, Index) -> p_scan(P, Input, Index, []) end. -endif. -ifdef(p_one_or_more). -spec p_one_or_more(parse_fun()) -> parse_fun(). p_one_or_more(P) -> fun(Input, Index)-> Result = p_scan(P, Input, Index, []), case Result of {[_|_], _, _} -> Result; _ -> {fail, {expected, Failure, _}} = P(Input,Index), {fail, {expected, {at_least_one, Failure}, Index}} end end. -endif. -ifdef(p_label). -spec p_label(atom(), parse_fun()) -> parse_fun(). p_label(Tag, P) -> fun(Input, Index) -> case P(Input, Index) of {fail,_} = Failure -> Failure; {Result, InpRem, NewIndex} -> {{Tag, Result}, InpRem, NewIndex} end end. -endif. -ifdef(p_scan). -spec p_scan(parse_fun(), input(), index(), [term()]) -> {[term()], input(), index()}. p_scan(_, <<>>, Index, Accum) -> {lists:reverse(Accum), <<>>, Index}; p_scan(P, Inp, Index, Accum) -> case P(Inp, Index) of {fail,_} -> {lists:reverse(Accum), Inp, Index}; {Result, InpRem, NewIndex} -> p_scan(P, InpRem, NewIndex, [Result | Accum]) end. -endif. -ifdef(p_string). -spec p_string(binary()) -> parse_fun(). p_string(S) -> Length = erlang:byte_size(S), fun(Input, Index) -> try <> = Input, {S, Rest, p_advance_index(S, Index)} catch error:{badmatch,_} -> {fail, {expected, {string, S}, Index}} end end. -endif. -ifdef(p_anything). -spec p_anything() -> parse_fun(). p_anything() -> fun(<<>>, Index) -> {fail, {expected, any_character, Index}}; (Input, Index) when is_binary(Input) -> <> = Input, {<>, Rest, p_advance_index(<>, Index)} end. -endif. -ifdef(p_charclass). -spec p_charclass(string() | binary()) -> parse_fun(). p_charclass(Class) -> {ok, RE} = re:compile(Class, [unicode, dotall]), fun(Inp, Index) -> case re:run(Inp, RE, [anchored]) of {match, [{0, Length}|_]} -> {Head, Tail} = erlang:split_binary(Inp, Length), {Head, Tail, p_advance_index(Head, Index)}; _ -> {fail, {expected, {character_class, binary_to_list(Class)}, Index}} end end. -endif. -ifdef(p_regexp). -spec p_regexp(binary()) -> parse_fun(). p_regexp(Regexp) -> {ok, RE} = re:compile(Regexp, [unicode, dotall, anchored]), fun(Inp, Index) -> case re:run(Inp, RE) of {match, [{0, Length}|_]} -> {Head, Tail} = erlang:split_binary(Inp, Length), {Head, Tail, p_advance_index(Head, Index)}; _ -> {fail, {expected, {regexp, binary_to_list(Regexp)}, Index}} end end. -endif. -ifdef(line). -spec line(index() | term()) -> pos_integer() | undefined. line({{line,L},_}) -> L; line(_) -> undefined. -endif. -ifdef(column). -spec column(index() | term()) -> pos_integer() | undefined. column({_,{column,C}}) -> C; column(_) -> undefined. -endif. -spec p_advance_index(input() | unicode:charlist() | pos_integer(), index()) -> index(). p_advance_index(MatchedInput, Index) when is_list(MatchedInput) orelse is_binary(MatchedInput)-> % strings lists:foldl(fun p_advance_index/2, Index, unicode:characters_to_list(MatchedInput)); p_advance_index(MatchedInput, Index) when is_integer(MatchedInput) -> % single characters {{line, Line}, {column, Col}} = Index, case MatchedInput of $\n -> {{line, Line+1}, {column, 1}}; _ -> {{line, Line}, {column, Col+1}} end.