diff options
author | Ryan Kavanagh <rak@rak.ac> | 2020-05-30 19:16:03 -0400 |
---|---|---|
committer | Ryan Kavanagh <rak@rak.ac> | 2020-05-31 00:04:48 -0400 |
commit | c760484c1abfed9cd85db8ec5eb4d659ad16ba2a (patch) | |
tree | 3997432e131028c5947687b0722304fb739558db /gemini | |
parent | Initial import of a text/gemini parser (diff) |
Added a build system and adopt modules
Diffstat (limited to 'gemini')
-rw-r--r-- | gemini/dune | 3 | ||||
-rw-r--r-- | gemini/mimeTextGemini.ml | 192 | ||||
-rw-r--r-- | gemini/mimeTextGemini.mli | 43 |
3 files changed, 238 insertions, 0 deletions
diff --git a/gemini/dune b/gemini/dune new file mode 100644 index 0000000..cf041d8 --- /dev/null +++ b/gemini/dune @@ -0,0 +1,3 @@ +(library + (name gemini) + (libraries base angstrom)) diff --git a/gemini/mimeTextGemini.ml b/gemini/mimeTextGemini.ml new file mode 100644 index 0000000..ea370fd --- /dev/null +++ b/gemini/mimeTextGemini.ml @@ -0,0 +1,192 @@ +(* Copyright (C) 2020 Ryan Kavanagh <rak@rak.ac> *) +(* Implements a parser for the text/gemini mime-type specified on + https://gemini.circumlunar.space/docs/spec-spec.txt . *) + +open Angstrom + +module type MIME_TEXT_GEMINI = +sig + exception ParseError of string + + type gemini_line = + | H1 of string (* # header *) + | H2 of string (* ## header *) + | H3 of string (* ### header *) + | Link of string * (string option) (* link with url and optional name *) + | Pre of string (* preformatted text line *) + | Text of string (* plain text line *) + | Ul of string (* unordered list item *) + + type gemini = gemini_line list + + val gemini_line_to_str : gemini_line -> string + + val gemini_to_str : gemini -> string + + val gemini_line_to_canon_str : gemini_line -> string + + val gemini_to_canon_str : gemini -> string + + val str_to_header_line : string -> gemini_line + + val str_to_link_line : string -> gemini_line + + val str_to_text_line : string -> gemini_line + + val str_to_pre_line : string -> gemini_line + + val str_to_pre_block : string -> gemini_line list + + val str_to_list_line : string -> gemini_line + + val str_to_gemini : string -> gemini +end + +module MimeTextGemini : MIME_TEXT_GEMINI = +struct + + exception ParseError of string + + type gemini_line = + | H1 of string + | H2 of string + | H3 of string + | Link of string * (string option) + | Pre of string + | Text of string + | Ul of string + + type gemini = gemini_line list + + let gemini_line_to_str = function + | H1 s -> "H1 " ^ s + | H2 s -> "H2 " ^ s + | H3 s -> "H3 " ^ s + | Link (s, Some n) -> "Link (" ^ s ^ ", Some" ^ n ^ ")" + | Link (s, None) -> "Link (" ^ s ^ ", None)" + | Pre s -> "Pre " ^ s + | Text s -> "Text " ^ s + | Ul s -> "Ul " ^ s + + let gemini_line_to_canon_str = function + | H1 s -> "# " ^ s + | H2 s -> "## " ^ s + | H3 s -> "### " ^ s + | Link (s, Some n) -> "=> " ^ s ^ " " ^ n + | Link (s, None) -> "=> " ^ s + | Pre s -> s + | Text s -> s + | Ul s -> "* " ^ s + + let gemini_to_str g = + List.fold_right (fun l -> fun r -> (gemini_line_to_str l) ^ "\n" ^ r) g "" + + let gemini_to_canon_str g = + let in_pre = ref false in + let f l r = + match (l, !in_pre) with + | Pre s, true -> s ^ "\r\n" ^ r + | Pre s, false -> + let () = in_pre := true in + "```\r\n" ^ s ^ "\r\n" ^ r + | _, true -> + let () = in_pre := false in + "```\r\n" ^ (gemini_line_to_canon_str l) ^ "\r\n" ^ r + | _, false -> (gemini_line_to_canon_str l) ^ "\r\n" ^ r + in + List.fold_right f g "\r\n" + + let parse_string p s = + match Angstrom.parse_string ~consume:All (p >>= return) s with + | Ok parsed -> parsed + | Error m -> raise (ParseError m) + + let is_space = function + | ' ' | '\t' -> true + | _ -> false + + let is_cr = function + | '\r' -> true + | _ -> false + + let skip_spaces = skip_while is_space + + let take_till_cr = take_till is_cr <* string "\r\n" + + let header_parser = + lift (fun x -> H3 x) (string "###" *> skip_spaces *> take_till_cr) + <|> lift (fun x -> H2 x) (string "##" *> skip_spaces *> take_till_cr) + <|> lift (fun x -> H1 x) (string "#" *> skip_spaces *> take_till_cr) + <?> "gemini_header" + + let str_to_header_line = parse_string header_parser + + let link_parser = + let end_of_url b = is_space b || is_cr b in + let to_link s = function + | "" -> Link (s, None) + | n -> Link (s, Some n) + in + lift2 to_link + (string "=>" *> skip_spaces *> take_till end_of_url) + (* the optional name, will be "" if there is none *) + (skip_spaces *> take_till_cr) + <?> "gemini_link" + + let str_to_link_line = parse_string link_parser + + (* Makes sure a line is actually a plain text line *) + let text_parser = + let take_line = lift (fun s -> Text s) take_till_cr in + peek_char >>= function + | Some '#' -> fail "text line cannot start with #" + | Some '*' -> fail "text line cannot start with *" + | Some '`' -> (peek_string 3 >>= function + | "```" -> fail "text line cannot start with ```" + | _ -> take_line) + | Some '=' -> (peek_char >>= function + | Some '>' -> fail "text line cannot start with =>" + | _ -> take_line) + | _ -> take_line + + let str_to_text_line = parse_string text_parser + + (* The gemini spec says that pre blocks are demarcated by lines that + start with "```". It says nothing about what follows on these + lines. *) + let pre_divider = string "```" *> take_till_cr *> return None + + let pre_line_parser = peek_string 3 + >>= function + | "```" -> fail "preformatted line cannot start with ```" + | _ -> lift (fun s -> Pre s) take_till_cr + + let pre_block_parser = pre_divider *> many pre_line_parser <* pre_divider + + let str_to_pre_line = parse_string pre_line_parser + let str_to_pre_block = parse_string pre_block_parser + + let list_item_parser = + lift (fun x -> Ul x) (string "*" *> skip_spaces *> take_till_cr) + + let str_to_list_line = parse_string list_item_parser + + let gemini : gemini_line list Angstrom.t = + (* Because many tries each in order, we know that we will have a + plain text line whenever we get to the text parser. As a result, + we don't need to recheck that it's actually a text line and not + some special line. *) + let plain_text_parser = lift (fun s -> Text s) take_till_cr + in + fix (fun gemini -> + lift2 List.append pre_block_parser gemini + <|> lift2 List.cons (header_parser + <|> link_parser + <|> list_item_parser + <|> plain_text_parser) gemini + <|> return []) + <?> "gemini" + + let str_to_gemini = parse_string gemini + +end diff --git a/gemini/mimeTextGemini.mli b/gemini/mimeTextGemini.mli new file mode 100644 index 0000000..aa8d462 --- /dev/null +++ b/gemini/mimeTextGemini.mli @@ -0,0 +1,43 @@ +(* Copyright (C) 2020 Ryan Kavanagh <rak@rak.ac> *) +(* Implements a parser for the text/gemini mime-type specified on + https://gemini.circumlunar.space/docs/spec-spec.txt . *) + +module type MIME_TEXT_GEMINI = +sig + exception ParseError of string + + type gemini_line = + | H1 of string (* # header *) + | H2 of string (* ## header *) + | H3 of string (* ### header *) + | Link of string * (string option) (* link with url and optional name *) + | Pre of string (* preformatted text line *) + | Text of string (* plain text line *) + | Ul of string (* unordered list item *) + + type gemini = gemini_line list + + val gemini_line_to_str : gemini_line -> string + + val gemini_to_str : gemini -> string + + val gemini_line_to_canon_str : gemini_line -> string + + val gemini_to_canon_str : gemini -> string + + val str_to_header_line : string -> gemini_line + + val str_to_link_line : string -> gemini_line + + val str_to_text_line : string -> gemini_line + + val str_to_pre_line : string -> gemini_line + + val str_to_pre_block : string -> gemini_line list + + val str_to_list_line : string -> gemini_line + + val str_to_gemini : string -> gemini +end + +module MimeTextGemini : MIME_TEXT_GEMINI |