summaryrefslogblamecommitdiff
path: root/parse_gemini.ml
blob: 2246a90dd7944e51e8c7a1ade2777a812337613f (plain) (tree)








































































































                                                                                
(* Copyright (C) 2020 Ryan Kavanagh <rak@rak.ac> *)
(* Implements a parser for the text/gemini mime-type specified on
   https://gemini.circumlunar.space/docs/spec-spec.txt .  *)

open Angstrom

type gemini_line =
  | H1 of string                     (* # header   *)
  | H2 of string                     (* ## header  *)
  | H3 of string                     (* ### header *)
  | Link of string * (string option) (* link with url and optional name *)
  | Pre of string                    (* preformatted text line *)
  | Text of string                   (* plain text line *)
  | Ul of string                     (* unordered list item *)

type gemini = gemini_line list

let is_space = function
  | ' ' | '\t' -> true
  | _ -> false

let is_cr = function
  | '\r' -> true
  | _ -> false

let skip_spaces = skip_while is_space

let take_till_cr = take_till is_cr <* string "\r\n"

let header =
  lift (fun x -> H3 x) (string "###" *> skip_spaces *> take_till_cr)
  <|> lift (fun x -> H2 x) (string "##" *> skip_spaces *> take_till_cr)
  <|> lift (fun x -> H1 x) (string "#" *> skip_spaces *> take_till_cr)
  <?> "gemini_header"

let link =
  let end_of_url b = is_space b || is_cr b in
  let to_link s = function
    | "" -> Link (s, None)
    | n -> Link (s, Some n)
  in
  lift2 to_link
    (string "=>" *> skip_spaces *> take_till end_of_url)
    (* the optional name, will be "" if there is none *)
    (skip_spaces *> take_till_cr)
    <?> "gemini_link"

(* Makes sure a line is actually a plain text line *)
let text =
  let take_line = lift (fun s -> Text s) take_till_cr in
  peek_char >>= function
  | Some '#' -> fail "text line cannot start with #"
  | Some '*' -> fail "text line cannot start with *"
  | Some '`' -> (peek_string 3 >>= function
    | "```" -> fail "text line cannot start with ```"
    | _ -> take_line)
  | Some '=' -> (peek_char >>= function
    | Some '>' -> fail "text line cannot start with =>"
    | _ -> take_line)
  | _ -> take_line

(* The gemini spec says that pre blocks are demarcated by lines that
   start with "```". It says nothing about what follows on these
   lines. *)
let pre_divider = string "```" *> take_till_cr *> return None

let pre_line = peek_string 3
  >>= function
  | "```" -> fail "preformatted line cannot start with ```"
  | _ -> lift (fun s -> Pre s) take_till_cr

let pre_block = pre_divider *> many pre_line <* pre_divider

let list_item = lift (fun x -> Ul x) (string "*" *> skip_spaces *> take_till_cr)

let gemini : gemini Angstrom.t =
  (* Because many tries each in order, we know that we will have a
     plain text line whenever we get to the text parser. As a result,
     we don't need to recheck that it's actually a text line and not
     some special line. *)
  let plain_text = lift (fun s -> Text s) take_till_cr
  in
  fix (fun gemini ->
      lift2 List.append pre_block gemini
      <|> lift2 List.cons (header <|> link <|> list_item <|> plain_text) gemini
      <|> return [])
  <?> "gemini"

let gemini_line_to_str = function
  | H1 s -> "# " ^ s
  | H2 s -> "## " ^ s
  | H3 s -> "### " ^ s
  | Link (s, Some n) -> "=> " ^ s ^ " " ^ n
  | Link (s, None) -> "=> " ^ s
  | Pre s -> "P " ^ s
  | Text s -> "T " ^ s
  | Ul s -> "* " ^ s

let gemini_to_str l =
  List.fold_right (fun s -> fun r -> (gemini_line_to_str s) ^ "\n" ^ r) l ""

let test_parse p str =
  match parse_string ~consume:All (p >>= return) str with
  | Ok x -> gemini_to_str x
  | _ -> ""