(* Copyright (C) 2020 Ryan Kavanagh *) (* Implements a parser for the text/gemini mime-type specified on https://gemini.circumlunar.space/docs/spec-spec.txt . *) open Angstrom module type MIME_TEXT_GEMINI = sig exception ParseError of string type gemini_line = | H1 of string (* # header *) | H2 of string (* ## header *) | H3 of string (* ### header *) | Link of string * (string option) (* link with url and optional name *) | Pre of string (* preformatted text line *) | Text of string (* plain text line *) | Ul of string (* unordered list item *) | Quoted of string (* quoted text *) type gemini = gemini_line list val gemini_line_to_str : gemini_line -> string val gemini_to_str : gemini -> string val gemini_line_to_canon_str : gemini_line -> string val gemini_to_canon_str : gemini -> string val str_to_header_line : string -> gemini_line val str_to_link_line : string -> gemini_line val str_to_text_line : string -> gemini_line val str_to_pre_line : string -> gemini_line val str_to_pre_block : string -> gemini_line list val str_to_list_line : string -> gemini_line val str_to_quoted_line : string -> gemini_line val str_to_gemini : string -> gemini end module MimeTextGemini : MIME_TEXT_GEMINI = struct open ParseCommon.ParseCommon exception ParseError of string type gemini_line = | H1 of string | H2 of string | H3 of string | Link of string * (string option) | Pre of string | Text of string | Ul of string | Quoted of string type gemini = gemini_line list let gemini_line_to_str = function | H1 s -> "H1 " ^ s | H2 s -> "H2 " ^ s | H3 s -> "H3 " ^ s | Link (s, Some n) -> "Link (" ^ s ^ ", Some" ^ n ^ ")" | Link (s, None) -> "Link (" ^ s ^ ", None)" | Pre s -> "Pre " ^ s | Text s -> "Text " ^ s | Ul s -> "Ul " ^ s | Quoted s -> "Quoted " ^ s let gemini_line_to_canon_str = function | H1 s -> "# " ^ s | H2 s -> "## " ^ s | H3 s -> "### " ^ s | Link (s, Some n) -> "=> " ^ s ^ " " ^ n | Link (s, None) -> "=> " ^ s | Pre s -> s | Text s -> s | Ul s -> "* " ^ s | Quoted s -> ">" ^ s let gemini_to_str g = List.fold_right (fun l -> fun r -> (gemini_line_to_str l) ^ "\n" ^ r) g "" let gemini_to_canon_str g = let eol = "\r\n" in let f (l : gemini_line) ((in_pre, r) : bool * string) = match l, in_pre with | Pre s, false -> (true, s ^ eol ^ "```" ^ eol ^ r) | Pre s, true -> (true, s ^ eol ^ r) | _, true -> (false, (gemini_line_to_canon_str l) ^ eol ^ "```" ^ eol ^ r) | _, false -> (false, (gemini_line_to_canon_str l) ^ eol ^ r) in let (_, s) = List.fold_right f g (false, "") in s let parse_string p s = match Angstrom.parse_string ~consume:All (p >>= return) s with | Ok parsed -> parsed | Error m -> raise (ParseError m) let header_parser = lift (fun x -> H3 x) (string "###" *> skip_spaces *> take_till_eol) <|> lift (fun x -> H2 x) (string "##" *> skip_spaces *> take_till_eol) <|> lift (fun x -> H1 x) (string "#" *> skip_spaces *> take_till_eol) "gemini_header" let str_to_header_line = parse_string header_parser let link_parser = let end_of_url b = is_whitespace b || is_cr b in let to_link s = function | "" -> Link (s, None) | n -> Link (s, Some n) in lift2 to_link (string "=>" *> skip_spaces *> take_till end_of_url) (* the optional name, will be "" if there is none *) (skip_spaces *> take_till_eol) "gemini_link" let str_to_link_line = parse_string link_parser (* Makes sure a line is actually a plain text line *) let text_parser = let take_line = lift (fun s -> Text s) take_till_eol in peek_char >>= function | Some '#' -> fail "text line cannot start with #" | Some '>' -> fail "text line cannot start with >" | Some '`' -> (peek_string 3 >>= function | "```" -> fail "text line cannot start with ```" | _ -> take_line) | Some '=' -> (peek_char >>= function | Some '>' -> fail "text line cannot start with =>" | _ -> take_line) | Some '*' -> (peek_char >>= function | Some ' ' -> fail "text line cannot start with `* '" | _ -> take_line) | _ -> take_line let str_to_text_line = parse_string text_parser (* The gemini spec says that pre blocks are demarcated by lines that start with "```". It says nothing about what follows on these lines. *) let pre_divider = string "```" *> take_till_eol *> return None let pre_line_parser = peek_string 3 >>= function | "```" -> fail "preformatted line cannot start with ```" | _ -> lift (fun s -> Pre s) take_till_eol let pre_block_parser = pre_divider *> many pre_line_parser <* pre_divider let str_to_pre_line = parse_string pre_line_parser let str_to_pre_block = parse_string pre_block_parser let list_item_parser = lift (fun x -> Ul x) (string "* " *> skip_spaces *> take_till_eol) let str_to_list_line = parse_string list_item_parser let quoted_parser = lift (fun x -> Ul x) (string ">" *> skip_spaces *> take_till_eol) let str_to_quoted_line = parse_string quoted_parser let gemini : gemini_line list Angstrom.t = (* Because many tries each in order, we know that we will have a plain text line whenever we get to the text parser. As a result, we don't need to recheck that it's actually a text line and not some special line. *) let plain_text_parser = lift (fun s -> Text s) take_till_eol in fix (fun gemini -> lift2 List.append pre_block_parser gemini <|> lift2 List.cons (header_parser <|> link_parser <|> list_item_parser <|> quoted_parser <|> plain_text_parser) gemini <|> return []) "gemini" let str_to_gemini = parse_string gemini end