summaryrefslogblamecommitdiff
path: root/gemini/mimeTextGemini.ml
blob: e73be655c6bd4589fde8addc5becef9ad7748a0e (plain) (tree)



















































































                                                                              


                                                           
                       


                                         
                  





                                                                     






























































































                                                                           
(* Copyright (C) 2020 Ryan Kavanagh <rak@rak.ac> *)
(* Implements a parser for the text/gemini mime-type specified on
   https://gemini.circumlunar.space/docs/spec-spec.txt .  *)

open Angstrom

module type MIME_TEXT_GEMINI =
sig
  exception ParseError of string

  type gemini_line =
    | H1 of string                     (* # header   *)
    | H2 of string                     (* ## header  *)
    | H3 of string                     (* ### header *)
    | Link of string * (string option) (* link with url and optional name *)
    | Pre of string                    (* preformatted text line *)
    | Text of string                   (* plain text line *)
    | Ul of string                     (* unordered list item *)

  type gemini = gemini_line list

  val gemini_line_to_str : gemini_line -> string

  val gemini_to_str : gemini -> string

  val gemini_line_to_canon_str : gemini_line -> string

  val gemini_to_canon_str : gemini -> string

  val str_to_header_line : string -> gemini_line

  val str_to_link_line : string -> gemini_line

  val str_to_text_line : string -> gemini_line

  val str_to_pre_line : string -> gemini_line

  val str_to_pre_block : string -> gemini_line list

  val str_to_list_line : string -> gemini_line

  val str_to_gemini : string -> gemini
end

module MimeTextGemini : MIME_TEXT_GEMINI =
struct

  exception ParseError of string

  type gemini_line =
    | H1 of string
    | H2 of string
    | H3 of string
    | Link of string * (string option)
    | Pre of string
    | Text of string
    | Ul of string

  type gemini = gemini_line list

  let gemini_line_to_str = function
    | H1 s -> "H1 " ^ s
    | H2 s -> "H2 " ^ s
    | H3 s -> "H3 " ^ s
    | Link (s, Some n) -> "Link (" ^ s ^ ", Some" ^ n ^ ")"
    | Link (s, None) -> "Link (" ^ s ^ ", None)"
    | Pre s -> "Pre " ^ s
    | Text s -> "Text " ^ s
    | Ul s -> "Ul " ^ s

  let gemini_line_to_canon_str = function
    | H1 s -> "# " ^ s
    | H2 s -> "## " ^ s
    | H3 s -> "### " ^ s
    | Link (s, Some n) -> "=> " ^ s ^ " " ^ n
    | Link (s, None) -> "=> " ^ s
    | Pre s -> s
    | Text s -> s
    | Ul s -> "* " ^ s

  let gemini_to_str g =
    List.fold_right (fun l -> fun r -> (gemini_line_to_str l) ^ "\n" ^ r) g ""

  let gemini_to_canon_str g =
    let eol = "\r\n" in
    let f (l : gemini_line) ((in_pre, r) : bool * string) =
      match l, in_pre with
      | Pre s, false ->
        (true, s ^ eol ^ "```" ^ eol ^ r)
      | Pre s, true ->
        (true, s ^ eol ^ r)
      | _, true ->
        (false, (gemini_line_to_canon_str l) ^ eol ^ "```" ^ eol ^ r)
      | _, false ->
        (false, (gemini_line_to_canon_str l) ^ eol ^ r) in
    let (_, s) = List.fold_right f g (false, "") in
    s


  let parse_string p s =
    match Angstrom.parse_string ~consume:All (p >>= return) s with
    | Ok parsed -> parsed
    | Error m -> raise (ParseError m)

  let is_space = function
    | ' ' | '\t' -> true
    | _ -> false

  let is_cr = function
    | '\r' -> true
    | _ -> false

  let skip_spaces = skip_while is_space

  let take_till_cr = take_till is_cr <* string "\r\n"

  let header_parser =
    lift (fun x -> H3 x) (string "###" *> skip_spaces *> take_till_cr)
    <|> lift (fun x -> H2 x) (string "##" *> skip_spaces *> take_till_cr)
    <|> lift (fun x -> H1 x) (string "#" *> skip_spaces *> take_till_cr)
    <?> "gemini_header"

  let str_to_header_line = parse_string header_parser

  let link_parser =
    let end_of_url b = is_space b || is_cr b in
    let to_link s = function
      | "" -> Link (s, None)
      | n -> Link (s, Some n)
    in
    lift2 to_link
      (string "=>" *> skip_spaces *> take_till end_of_url)
      (* the optional name, will be "" if there is none *)
      (skip_spaces *> take_till_cr)
    <?> "gemini_link"

  let str_to_link_line = parse_string link_parser

  (* Makes sure a line is actually a plain text line *)
  let text_parser =
    let take_line = lift (fun s -> Text s) take_till_cr in
    peek_char >>= function
    | Some '#' -> fail "text line cannot start with #"
    | Some '*' -> fail "text line cannot start with *"
    | Some '`' -> (peek_string 3 >>= function
      | "```" -> fail "text line cannot start with ```"
      | _ -> take_line)
    | Some '=' -> (peek_char >>= function
      | Some '>' -> fail "text line cannot start with =>"
      | _ -> take_line)
    | _ -> take_line

  let str_to_text_line = parse_string text_parser

  (* The gemini spec says that pre blocks are demarcated by lines that
     start with "```". It says nothing about what follows on these
     lines. *)
  let pre_divider = string "```" *> take_till_cr *> return None

  let pre_line_parser = peek_string 3
    >>= function
    | "```" -> fail "preformatted line cannot start with ```"
    | _ -> lift (fun s -> Pre s) take_till_cr

  let pre_block_parser = pre_divider *> many pre_line_parser <* pre_divider

  let str_to_pre_line = parse_string pre_line_parser
  let str_to_pre_block = parse_string pre_block_parser

  let list_item_parser =
    lift (fun x -> Ul x) (string "*" *> skip_spaces *> take_till_cr)

  let str_to_list_line = parse_string list_item_parser

  let gemini : gemini_line list Angstrom.t =
    (* Because many tries each in order, we know that we will have a
       plain text line whenever we get to the text parser. As a result,
       we don't need to recheck that it's actually a text line and not
       some special line. *)
    let plain_text_parser = lift (fun s -> Text s) take_till_cr
    in
    fix (fun gemini ->
        lift2 List.append pre_block_parser gemini
        <|> lift2 List.cons (header_parser
                             <|> link_parser
                             <|> list_item_parser
                             <|> plain_text_parser) gemini
        <|> return [])
    <?> "gemini"

  let str_to_gemini = parse_string gemini

end