From 08cf9b09f53f999cad569aad9547edf6d2bf819c Mon Sep 17 00:00:00 2001 From: Ryan Kavanagh Date: Mon, 8 Jun 2020 22:04:42 -0400 Subject: Fix parsing errors involving crlf vs eol --- gemini/geminiTransaction.ml | 10 +++++----- gemini/mimeTextGemini.ml | 20 ++++++++++---------- gemini/parseCommon.ml | 12 ++++++++++-- gemini/parseCommon.mli | 4 +++- 4 files changed, 28 insertions(+), 18 deletions(-) diff --git a/gemini/geminiTransaction.ml b/gemini/geminiTransaction.ml index 29f9ec6..27208b7 100644 --- a/gemini/geminiTransaction.ml +++ b/gemini/geminiTransaction.ml @@ -198,19 +198,19 @@ struct (string "10 " *> return INPUT) <|> (string "11 " *> return SENSITIVE_INPUT) <|> lift2 (fun m -> fun b -> SUCCESS (M.from_string m, b)) - (string "20 " *> take_till_cr) + (string "20 " *> take_till_crlf) (take_while (fun _ -> true) <* end_of_input) <|> lift2 (fun m -> fun b -> SUCCESS_EOCSS (M.from_string m, b)) - (string "21 " *> take_till_cr) + (string "21 " *> take_till_crlf) (take_while (fun _ -> true) <* end_of_input) - <|> lift (fun u -> REDIR_TEMP u) (string "30 " *> take_till_cr) - <|> lift (fun u -> REDIR_PERM u) (string "31 " *> take_till_cr) + <|> lift (fun u -> REDIR_TEMP u) (string "30 " *> take_till_crlf) + <|> lift (fun u -> REDIR_PERM u) (string "31 " *> take_till_crlf) <|> (string "40 " *> return TEMP_FAIL) <|> (string "41 " *> return SERVER_UNAVAILABLE) <|> (string "42 " *> return CGI_ERROR) <|> (string "43 " *> return PROXY_ERROR) <|> lift (fun n -> SLOW_DOWN (int_of_string n)) - (string "44 " *> take_till_cr) + (string "44 " *> take_till_crlf) <|> (string "50 " *> return PERM_FAIL) <|> (string "51 " *> return NOT_FOUND) <|> (string "52 " *> return GONE) diff --git a/gemini/mimeTextGemini.ml b/gemini/mimeTextGemini.ml index 95f432b..385871f 100644 --- a/gemini/mimeTextGemini.ml +++ b/gemini/mimeTextGemini.ml @@ -110,9 +110,9 @@ struct | Error m -> raise (ParseError m) let header_parser = - lift (fun x -> H3 x) (string "###" *> skip_spaces *> take_till_cr) - <|> lift (fun x -> H2 x) (string "##" *> skip_spaces *> take_till_cr) - <|> lift (fun x -> H1 x) (string "#" *> skip_spaces *> take_till_cr) + lift (fun x -> H3 x) (string "###" *> skip_spaces *> take_till_eol) + <|> lift (fun x -> H2 x) (string "##" *> skip_spaces *> take_till_eol) + <|> lift (fun x -> H1 x) (string "#" *> skip_spaces *> take_till_eol) "gemini_header" let str_to_header_line = parse_string header_parser @@ -126,14 +126,14 @@ struct lift2 to_link (string "=>" *> skip_spaces *> take_till end_of_url) (* the optional name, will be "" if there is none *) - (skip_spaces *> take_till_cr) + (skip_spaces *> take_till_eol) "gemini_link" let str_to_link_line = parse_string link_parser (* Makes sure a line is actually a plain text line *) let text_parser = - let take_line = lift (fun s -> Text s) take_till_cr in + let take_line = lift (fun s -> Text s) take_till_eol in peek_char >>= function | Some '#' -> fail "text line cannot start with #" | Some '>' -> fail "text line cannot start with >" @@ -153,12 +153,12 @@ struct (* The gemini spec says that pre blocks are demarcated by lines that start with "```". It says nothing about what follows on these lines. *) - let pre_divider = string "```" *> take_till_cr *> return None + let pre_divider = string "```" *> take_till_eol *> return None let pre_line_parser = peek_string 3 >>= function | "```" -> fail "preformatted line cannot start with ```" - | _ -> lift (fun s -> Pre s) take_till_cr + | _ -> lift (fun s -> Pre s) take_till_eol let pre_block_parser = pre_divider *> many pre_line_parser <* pre_divider @@ -166,12 +166,12 @@ struct let str_to_pre_block = parse_string pre_block_parser let list_item_parser = - lift (fun x -> Ul x) (string "* " *> skip_spaces *> take_till_cr) + lift (fun x -> Ul x) (string "* " *> skip_spaces *> take_till_eol) let str_to_list_line = parse_string list_item_parser let quoted_parser = - lift (fun x -> Ul x) (string ">" *> skip_spaces *> take_till_cr) + lift (fun x -> Ul x) (string ">" *> skip_spaces *> take_till_eol) let str_to_quoted_line = parse_string quoted_parser @@ -180,7 +180,7 @@ struct plain text line whenever we get to the text parser. As a result, we don't need to recheck that it's actually a text line and not some special line. *) - let plain_text_parser = lift (fun s -> Text s) take_till_cr + let plain_text_parser = lift (fun s -> Text s) take_till_eol in fix (fun gemini -> lift2 List.append pre_block_parser gemini diff --git a/gemini/parseCommon.ml b/gemini/parseCommon.ml index 999cd83..3df2c55 100644 --- a/gemini/parseCommon.ml +++ b/gemini/parseCommon.ml @@ -12,7 +12,9 @@ sig val skip_spaces : unit Angstrom.t - val take_till_cr : string Angstrom.t + val take_till_crlf : string Angstrom.t + + val take_till_eol : string Angstrom.t end module ParseCommon : PARSE_COMMON = @@ -39,5 +41,11 @@ struct let skip_spaces = skip_while is_whitespace - let take_till_cr = take_till is_cr <* string "\r\n" + let take_till_crlf = take_till is_cr <* string "\r\n" + + let take_till_eol = + let is_eol = function + | '\n' | '\r' -> true + | _ -> false in + take_till is_eol <* end_of_line end diff --git a/gemini/parseCommon.mli b/gemini/parseCommon.mli index 2994e6e..f469c6f 100644 --- a/gemini/parseCommon.mli +++ b/gemini/parseCommon.mli @@ -12,7 +12,9 @@ sig val skip_spaces : unit Angstrom.t - val take_till_cr : string Angstrom.t + val take_till_crlf : string Angstrom.t + + val take_till_eol : string Angstrom.t end module ParseCommon : PARSE_COMMON -- cgit v1.2.3