summaryrefslogtreecommitdiff
path: root/gemini/mimeTextGemini.ml
diff options
context:
space:
mode:
Diffstat (limited to 'gemini/mimeTextGemini.ml')
-rw-r--r--gemini/mimeTextGemini.ml192
1 files changed, 192 insertions, 0 deletions
diff --git a/gemini/mimeTextGemini.ml b/gemini/mimeTextGemini.ml
new file mode 100644
index 0000000..ea370fd
--- /dev/null
+++ b/gemini/mimeTextGemini.ml
@@ -0,0 +1,192 @@
+(* Copyright (C) 2020 Ryan Kavanagh <rak@rak.ac> *)
+(* Implements a parser for the text/gemini mime-type specified on
+ https://gemini.circumlunar.space/docs/spec-spec.txt . *)
+
+open Angstrom
+
+module type MIME_TEXT_GEMINI =
+sig
+ exception ParseError of string
+
+ type gemini_line =
+ | H1 of string (* # header *)
+ | H2 of string (* ## header *)
+ | H3 of string (* ### header *)
+ | Link of string * (string option) (* link with url and optional name *)
+ | Pre of string (* preformatted text line *)
+ | Text of string (* plain text line *)
+ | Ul of string (* unordered list item *)
+
+ type gemini = gemini_line list
+
+ val gemini_line_to_str : gemini_line -> string
+
+ val gemini_to_str : gemini -> string
+
+ val gemini_line_to_canon_str : gemini_line -> string
+
+ val gemini_to_canon_str : gemini -> string
+
+ val str_to_header_line : string -> gemini_line
+
+ val str_to_link_line : string -> gemini_line
+
+ val str_to_text_line : string -> gemini_line
+
+ val str_to_pre_line : string -> gemini_line
+
+ val str_to_pre_block : string -> gemini_line list
+
+ val str_to_list_line : string -> gemini_line
+
+ val str_to_gemini : string -> gemini
+end
+
+module MimeTextGemini : MIME_TEXT_GEMINI =
+struct
+
+ exception ParseError of string
+
+ type gemini_line =
+ | H1 of string
+ | H2 of string
+ | H3 of string
+ | Link of string * (string option)
+ | Pre of string
+ | Text of string
+ | Ul of string
+
+ type gemini = gemini_line list
+
+ let gemini_line_to_str = function
+ | H1 s -> "H1 " ^ s
+ | H2 s -> "H2 " ^ s
+ | H3 s -> "H3 " ^ s
+ | Link (s, Some n) -> "Link (" ^ s ^ ", Some" ^ n ^ ")"
+ | Link (s, None) -> "Link (" ^ s ^ ", None)"
+ | Pre s -> "Pre " ^ s
+ | Text s -> "Text " ^ s
+ | Ul s -> "Ul " ^ s
+
+ let gemini_line_to_canon_str = function
+ | H1 s -> "# " ^ s
+ | H2 s -> "## " ^ s
+ | H3 s -> "### " ^ s
+ | Link (s, Some n) -> "=> " ^ s ^ " " ^ n
+ | Link (s, None) -> "=> " ^ s
+ | Pre s -> s
+ | Text s -> s
+ | Ul s -> "* " ^ s
+
+ let gemini_to_str g =
+ List.fold_right (fun l -> fun r -> (gemini_line_to_str l) ^ "\n" ^ r) g ""
+
+ let gemini_to_canon_str g =
+ let in_pre = ref false in
+ let f l r =
+ match (l, !in_pre) with
+ | Pre s, true -> s ^ "\r\n" ^ r
+ | Pre s, false ->
+ let () = in_pre := true in
+ "```\r\n" ^ s ^ "\r\n" ^ r
+ | _, true ->
+ let () = in_pre := false in
+ "```\r\n" ^ (gemini_line_to_canon_str l) ^ "\r\n" ^ r
+ | _, false -> (gemini_line_to_canon_str l) ^ "\r\n" ^ r
+ in
+ List.fold_right f g "\r\n"
+
+ let parse_string p s =
+ match Angstrom.parse_string ~consume:All (p >>= return) s with
+ | Ok parsed -> parsed
+ | Error m -> raise (ParseError m)
+
+ let is_space = function
+ | ' ' | '\t' -> true
+ | _ -> false
+
+ let is_cr = function
+ | '\r' -> true
+ | _ -> false
+
+ let skip_spaces = skip_while is_space
+
+ let take_till_cr = take_till is_cr <* string "\r\n"
+
+ let header_parser =
+ lift (fun x -> H3 x) (string "###" *> skip_spaces *> take_till_cr)
+ <|> lift (fun x -> H2 x) (string "##" *> skip_spaces *> take_till_cr)
+ <|> lift (fun x -> H1 x) (string "#" *> skip_spaces *> take_till_cr)
+ <?> "gemini_header"
+
+ let str_to_header_line = parse_string header_parser
+
+ let link_parser =
+ let end_of_url b = is_space b || is_cr b in
+ let to_link s = function
+ | "" -> Link (s, None)
+ | n -> Link (s, Some n)
+ in
+ lift2 to_link
+ (string "=>" *> skip_spaces *> take_till end_of_url)
+ (* the optional name, will be "" if there is none *)
+ (skip_spaces *> take_till_cr)
+ <?> "gemini_link"
+
+ let str_to_link_line = parse_string link_parser
+
+ (* Makes sure a line is actually a plain text line *)
+ let text_parser =
+ let take_line = lift (fun s -> Text s) take_till_cr in
+ peek_char >>= function
+ | Some '#' -> fail "text line cannot start with #"
+ | Some '*' -> fail "text line cannot start with *"
+ | Some '`' -> (peek_string 3 >>= function
+ | "```" -> fail "text line cannot start with ```"
+ | _ -> take_line)
+ | Some '=' -> (peek_char >>= function
+ | Some '>' -> fail "text line cannot start with =>"
+ | _ -> take_line)
+ | _ -> take_line
+
+ let str_to_text_line = parse_string text_parser
+
+ (* The gemini spec says that pre blocks are demarcated by lines that
+ start with "```". It says nothing about what follows on these
+ lines. *)
+ let pre_divider = string "```" *> take_till_cr *> return None
+
+ let pre_line_parser = peek_string 3
+ >>= function
+ | "```" -> fail "preformatted line cannot start with ```"
+ | _ -> lift (fun s -> Pre s) take_till_cr
+
+ let pre_block_parser = pre_divider *> many pre_line_parser <* pre_divider
+
+ let str_to_pre_line = parse_string pre_line_parser
+ let str_to_pre_block = parse_string pre_block_parser
+
+ let list_item_parser =
+ lift (fun x -> Ul x) (string "*" *> skip_spaces *> take_till_cr)
+
+ let str_to_list_line = parse_string list_item_parser
+
+ let gemini : gemini_line list Angstrom.t =
+ (* Because many tries each in order, we know that we will have a
+ plain text line whenever we get to the text parser. As a result,
+ we don't need to recheck that it's actually a text line and not
+ some special line. *)
+ let plain_text_parser = lift (fun s -> Text s) take_till_cr
+ in
+ fix (fun gemini ->
+ lift2 List.append pre_block_parser gemini
+ <|> lift2 List.cons (header_parser
+ <|> link_parser
+ <|> list_item_parser
+ <|> plain_text_parser) gemini
+ <|> return [])
+ <?> "gemini"
+
+ let str_to_gemini = parse_string gemini
+
+end