|
|
(* Copyright (C) 2020 Ryan Kavanagh <rak@rak.ac> *)
(* Implements a parser for the text/gemini mime-type specified on
https://gemini.circumlunar.space/docs/spec-spec.txt . *)
open Angstrom
module type MIME_TEXT_GEMINI =
sig
exception ParseError of string
type gemini_line =
| H1 of string (* # header *)
| H2 of string (* ## header *)
| H3 of string (* ### header *)
| Link of string * (string option) (* link with url and optional name *)
| Pre of string (* preformatted text line *)
| Text of string (* plain text line *)
| Ul of string (* unordered list item *)
| Quoted of string (* quoted text *)
type gemini = gemini_line list
val gemini_line_to_str : gemini_line -> string
val gemini_to_str : gemini -> string
val gemini_line_to_canon_str : gemini_line -> string
val gemini_to_canon_str : gemini -> string
val str_to_header_line : string -> gemini_line
val str_to_link_line : string -> gemini_line
val str_to_text_line : string -> gemini_line
val str_to_pre_line : string -> gemini_line
val str_to_pre_block : string -> gemini_line list
val str_to_list_line : string -> gemini_line
val str_to_quoted_line : string -> gemini_line
val str_to_gemini : string -> gemini
end
module MimeTextGemini : MIME_TEXT_GEMINI =
struct
open ParseCommon.ParseCommon
exception ParseError of string
type gemini_line =
| H1 of string
| H2 of string
| H3 of string
| Link of string * (string option)
| Pre of string
| Text of string
| Ul of string
| Quoted of string
type gemini = gemini_line list
let gemini_line_to_str = function
| H1 s -> "H1 " ^ s
| H2 s -> "H2 " ^ s
| H3 s -> "H3 " ^ s
| Link (s, Some n) -> "Link (" ^ s ^ ", Some" ^ n ^ ")"
| Link (s, None) -> "Link (" ^ s ^ ", None)"
| Pre s -> "Pre " ^ s
| Text s -> "Text " ^ s
| Ul s -> "Ul " ^ s
| Quoted s -> "Quoted " ^ s
let gemini_line_to_canon_str = function
| H1 s -> "# " ^ s
| H2 s -> "## " ^ s
| H3 s -> "### " ^ s
| Link (s, Some n) -> "=> " ^ s ^ " " ^ n
| Link (s, None) -> "=> " ^ s
| Pre s -> s
| Text s -> s
| Ul s -> "* " ^ s
| Quoted s -> ">" ^ s
let gemini_to_str g =
List.fold_right (fun l -> fun r -> (gemini_line_to_str l) ^ "\n" ^ r) g ""
let gemini_to_canon_str g =
let eol = "\r\n" in
let f (l : gemini_line) ((in_pre, r) : bool * string) =
match l, in_pre with
| Pre s, false ->
(true, s ^ eol ^ "```" ^ eol ^ r)
| Pre s, true ->
(true, s ^ eol ^ r)
| _, true ->
(false, (gemini_line_to_canon_str l) ^ eol ^ "```" ^ eol ^ r)
| _, false ->
(false, (gemini_line_to_canon_str l) ^ eol ^ r) in
let (_, s) = List.fold_right f g (false, "") in
s
let parse_string p s =
match Angstrom.parse_string ~consume:All (p >>= return) s with
| Ok parsed -> parsed
| Error m -> raise (ParseError m)
let header_parser =
lift (fun x -> H3 x) (string "###" *> skip_spaces *> take_till_eol)
<|> lift (fun x -> H2 x) (string "##" *> skip_spaces *> take_till_eol)
<|> lift (fun x -> H1 x) (string "#" *> skip_spaces *> take_till_eol)
<?> "gemini_header"
let str_to_header_line = parse_string header_parser
let link_parser =
let end_of_url b = is_whitespace b || is_cr b in
let to_link s = function
| "" -> Link (s, None)
| n -> Link (s, Some n)
in
lift2 to_link
(string "=>" *> skip_spaces *> take_till end_of_url)
(* the optional name, will be "" if there is none *)
(skip_spaces *> take_till_eol)
<?> "gemini_link"
let str_to_link_line = parse_string link_parser
(* Makes sure a line is actually a plain text line *)
let text_parser =
let take_line = lift (fun s -> Text s) take_till_eol in
peek_char >>= function
| Some '#' -> fail "text line cannot start with #"
| Some '>' -> fail "text line cannot start with >"
| Some '`' -> (peek_string 3 >>= function
| "```" -> fail "text line cannot start with ```"
| _ -> take_line)
| Some '=' -> (peek_char >>= function
| Some '>' -> fail "text line cannot start with =>"
| _ -> take_line)
| Some '*' -> (peek_char >>= function
| Some ' ' -> fail "text line cannot start with `* '"
| _ -> take_line)
| _ -> take_line
let str_to_text_line = parse_string text_parser
(* The gemini spec says that pre blocks are demarcated by lines that
start with "```". It says nothing about what follows on these
lines. *)
let pre_divider = string "```" *> take_till_eol *> return None
let pre_line_parser = peek_string 3
>>= function
| "```" -> fail "preformatted line cannot start with ```"
| _ -> lift (fun s -> Pre s) take_till_eol
let pre_block_parser = pre_divider *> many pre_line_parser <* pre_divider
let str_to_pre_line = parse_string pre_line_parser
let str_to_pre_block = parse_string pre_block_parser
let list_item_parser =
lift (fun x -> Ul x) (string "* " *> skip_spaces *> take_till_eol)
let str_to_list_line = parse_string list_item_parser
let quoted_parser =
lift (fun x -> Ul x) (string ">" *> skip_spaces *> take_till_eol)
let str_to_quoted_line = parse_string quoted_parser
let gemini : gemini_line list Angstrom.t =
(* Because many tries each in order, we know that we will have a
plain text line whenever we get to the text parser. As a result,
we don't need to recheck that it's actually a text line and not
some special line. *)
let plain_text_parser = lift (fun s -> Text s) take_till_eol
in
fix (fun gemini ->
lift2 List.append pre_block_parser gemini
<|> lift2 List.cons (header_parser
<|> link_parser
<|> list_item_parser
<|> quoted_parser
<|> plain_text_parser) gemini
<|> return [])
<?> "gemini"
let str_to_gemini = parse_string gemini
end
|