(* Copyright (C) 2020 Ryan Kavanagh *) (* Implements a parser for the text/gemini mime-type specified on https://gemini.circumlunar.space/docs/spec-spec.txt . *) open Angstrom type gemini_line = | H1 of string (* # header *) | H2 of string (* ## header *) | H3 of string (* ### header *) | Link of string * (string option) (* link with url and optional name *) | Pre of string (* preformatted text line *) | Text of string (* plain text line *) | Ul of string (* unordered list item *) type gemini = gemini_line list let is_space = function | ' ' | '\t' -> true | _ -> false let is_cr = function | '\r' -> true | _ -> false let skip_spaces = skip_while is_space let take_till_cr = take_till is_cr <* string "\r\n" let header = lift (fun x -> H3 x) (string "###" *> skip_spaces *> take_till_cr) <|> lift (fun x -> H2 x) (string "##" *> skip_spaces *> take_till_cr) <|> lift (fun x -> H1 x) (string "#" *> skip_spaces *> take_till_cr) "gemini_header" let link = let end_of_url b = is_space b || is_cr b in let to_link s = function | "" -> Link (s, None) | n -> Link (s, Some n) in lift2 to_link (string "=>" *> skip_spaces *> take_till end_of_url) (* the optional name, will be "" if there is none *) (skip_spaces *> take_till_cr) "gemini_link" (* Makes sure a line is actually a plain text line *) let text = let take_line = lift (fun s -> Text s) take_till_cr in peek_char >>= function | Some '#' -> fail "text line cannot start with #" | Some '*' -> fail "text line cannot start with *" | Some '`' -> (peek_string 3 >>= function | "```" -> fail "text line cannot start with ```" | _ -> take_line) | Some '=' -> (peek_char >>= function | Some '>' -> fail "text line cannot start with =>" | _ -> take_line) | _ -> take_line (* The gemini spec says that pre blocks are demarcated by lines that start with "```". It says nothing about what follows on these lines. *) let pre_divider = string "```" *> take_till_cr *> return None let pre_line = peek_string 3 >>= function | "```" -> fail "preformatted line cannot start with ```" | _ -> lift (fun s -> Pre s) take_till_cr let pre_block = pre_divider *> many pre_line <* pre_divider let list_item = lift (fun x -> Ul x) (string "*" *> skip_spaces *> take_till_cr) let gemini : gemini Angstrom.t = (* Because many tries each in order, we know that we will have a plain text line whenever we get to the text parser. As a result, we don't need to recheck that it's actually a text line and not some special line. *) let plain_text = lift (fun s -> Text s) take_till_cr in fix (fun gemini -> lift2 List.append pre_block gemini <|> lift2 List.cons (header <|> link <|> list_item <|> plain_text) gemini <|> return []) "gemini" let gemini_line_to_str = function | H1 s -> "# " ^ s | H2 s -> "## " ^ s | H3 s -> "### " ^ s | Link (s, Some n) -> "=> " ^ s ^ " " ^ n | Link (s, None) -> "=> " ^ s | Pre s -> "P " ^ s | Text s -> "T " ^ s | Ul s -> "* " ^ s let gemini_to_str l = List.fold_right (fun s -> fun r -> (gemini_line_to_str s) ^ "\n" ^ r) l "" let test_parse p str = match parse_string ~consume:All (p >>= return) str with | Ok x -> gemini_to_str x | _ -> ""