From 5833971a52a4e6e078988fa88738e5a5180cce36 Mon Sep 17 00:00:00 2001 From: Ryan Kavanagh Date: Tue, 2 Jun 2020 14:27:47 -0400 Subject: Split common parsing functions into separate function --- gemini/mimeTextGemini.ml | 17 +++-------------- gemini/mimeType.ml | 13 ++----------- gemini/parseCommon.ml | 43 +++++++++++++++++++++++++++++++++++++++++++ gemini/parseCommon.mli | 18 ++++++++++++++++++ 4 files changed, 66 insertions(+), 25 deletions(-) create mode 100644 gemini/parseCommon.ml create mode 100644 gemini/parseCommon.mli (limited to 'gemini') diff --git a/gemini/mimeTextGemini.ml b/gemini/mimeTextGemini.ml index e73be65..2f4d5ca 100644 --- a/gemini/mimeTextGemini.ml +++ b/gemini/mimeTextGemini.ml @@ -45,6 +45,8 @@ end module MimeTextGemini : MIME_TEXT_GEMINI = struct + open ParseCommon.ParseCommon + exception ParseError of string type gemini_line = @@ -96,24 +98,11 @@ struct let (_, s) = List.fold_right f g (false, "") in s - let parse_string p s = match Angstrom.parse_string ~consume:All (p >>= return) s with | Ok parsed -> parsed | Error m -> raise (ParseError m) - let is_space = function - | ' ' | '\t' -> true - | _ -> false - - let is_cr = function - | '\r' -> true - | _ -> false - - let skip_spaces = skip_while is_space - - let take_till_cr = take_till is_cr <* string "\r\n" - let header_parser = lift (fun x -> H3 x) (string "###" *> skip_spaces *> take_till_cr) <|> lift (fun x -> H2 x) (string "##" *> skip_spaces *> take_till_cr) @@ -123,7 +112,7 @@ struct let str_to_header_line = parse_string header_parser let link_parser = - let end_of_url b = is_space b || is_cr b in + let end_of_url b = is_whitespace b || is_cr b in let to_link s = function | "" -> Link (s, None) | n -> Link (s, Some n) diff --git a/gemini/mimeType.ml b/gemini/mimeType.ml index 1682416..0cfd063 100644 --- a/gemini/mimeType.ml +++ b/gemini/mimeType.ml @@ -33,19 +33,10 @@ struct (* FIXME: parameter does not handle quoted strings and does not try to ensure that everything is well-formed *) let open Angstrom in - let lift_or p q b = p b || q b in - let is_letter = function - | 'a' .. 'z' | 'A' .. 'Z' -> true - | _ -> false in - let is_digit = function - | '0' .. '9' -> true - | _ -> false in + let open ParseCommon.ParseCommon in let is_symbol = function | '!' | '#' | '$' | '&' | '-' | '^' | '_' | '.' | '+' -> true | _ -> false in - let is_space = function - | ' ' | '\t' -> true - | _ -> false in let take_all = take_while (fun _ -> true) in let maybe p = option None (lift (fun s -> Some s) p) in @@ -59,7 +50,7 @@ struct let parse = lift3 create_mimetype (restricted_name "type") ((char '/' *> restricted_name) "subtype") - (maybe (char ';' *> skip_while is_space *> parameter)) + (maybe (char ';' *> skip_while is_whitespace *> parameter)) "mimetype" in match Angstrom.parse_string ~consume:All parse s with | Ok x -> x diff --git a/gemini/parseCommon.ml b/gemini/parseCommon.ml new file mode 100644 index 0000000..999cd83 --- /dev/null +++ b/gemini/parseCommon.ml @@ -0,0 +1,43 @@ +module type PARSE_COMMON = +sig + val is_letter : char -> bool + + val is_digit : char -> bool + + val is_whitespace : char -> bool + + val is_cr : char -> bool + + val lift_or : ('a -> bool) -> ('a -> bool) -> 'a -> bool + + val skip_spaces : unit Angstrom.t + + val take_till_cr : string Angstrom.t +end + +module ParseCommon : PARSE_COMMON = +struct + open Angstrom + + let is_letter = function + | 'a' .. 'z' | 'A' .. 'Z' -> true + | _ -> false + + let is_digit = function + | '0' .. '9' -> true + | _ -> false + + let is_whitespace = function + | ' ' | '\t' -> true + | _ -> false + + let is_cr = function + | '\r' -> true + | _ -> false + + let lift_or p q b = p b || q b + + let skip_spaces = skip_while is_whitespace + + let take_till_cr = take_till is_cr <* string "\r\n" +end diff --git a/gemini/parseCommon.mli b/gemini/parseCommon.mli new file mode 100644 index 0000000..2994e6e --- /dev/null +++ b/gemini/parseCommon.mli @@ -0,0 +1,18 @@ +module type PARSE_COMMON = +sig + val is_letter : char -> bool + + val is_digit : char -> bool + + val is_whitespace : char -> bool + + val is_cr : char -> bool + + val lift_or : ('a -> bool) -> ('a -> bool) -> 'a -> bool + + val skip_spaces : unit Angstrom.t + + val take_till_cr : string Angstrom.t +end + +module ParseCommon : PARSE_COMMON -- cgit v1.2.3