summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRyan Kavanagh <rak@rak.ac>2020-05-30 14:01:16 -0400
committerRyan Kavanagh <rak@rak.ac>2020-05-30 14:01:16 -0400
commit9f00f50e520bc8155db52b8c4977a429993a754c (patch)
treefe2e00c3232eb9abb012a2c6eed09a9a1defa31d
Initial import of a text/gemini parser
-rw-r--r--LICENSE23
-rw-r--r--parse_gemini.ml105
2 files changed, 128 insertions, 0 deletions
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..8152510
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,23 @@
+Copyright (c) 2020 Ryan Kavanagh <rak@rak.ac>
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/parse_gemini.ml b/parse_gemini.ml
new file mode 100644
index 0000000..2246a90
--- /dev/null
+++ b/parse_gemini.ml
@@ -0,0 +1,105 @@
+(* Copyright (C) 2020 Ryan Kavanagh <rak@rak.ac> *)
+(* Implements a parser for the text/gemini mime-type specified on
+ https://gemini.circumlunar.space/docs/spec-spec.txt . *)
+
+open Angstrom
+
+type gemini_line =
+ | H1 of string (* # header *)
+ | H2 of string (* ## header *)
+ | H3 of string (* ### header *)
+ | Link of string * (string option) (* link with url and optional name *)
+ | Pre of string (* preformatted text line *)
+ | Text of string (* plain text line *)
+ | Ul of string (* unordered list item *)
+
+type gemini = gemini_line list
+
+let is_space = function
+ | ' ' | '\t' -> true
+ | _ -> false
+
+let is_cr = function
+ | '\r' -> true
+ | _ -> false
+
+let skip_spaces = skip_while is_space
+
+let take_till_cr = take_till is_cr <* string "\r\n"
+
+let header =
+ lift (fun x -> H3 x) (string "###" *> skip_spaces *> take_till_cr)
+ <|> lift (fun x -> H2 x) (string "##" *> skip_spaces *> take_till_cr)
+ <|> lift (fun x -> H1 x) (string "#" *> skip_spaces *> take_till_cr)
+ <?> "gemini_header"
+
+let link =
+ let end_of_url b = is_space b || is_cr b in
+ let to_link s = function
+ | "" -> Link (s, None)
+ | n -> Link (s, Some n)
+ in
+ lift2 to_link
+ (string "=>" *> skip_spaces *> take_till end_of_url)
+ (* the optional name, will be "" if there is none *)
+ (skip_spaces *> take_till_cr)
+ <?> "gemini_link"
+
+(* Makes sure a line is actually a plain text line *)
+let text =
+ let take_line = lift (fun s -> Text s) take_till_cr in
+ peek_char >>= function
+ | Some '#' -> fail "text line cannot start with #"
+ | Some '*' -> fail "text line cannot start with *"
+ | Some '`' -> (peek_string 3 >>= function
+ | "```" -> fail "text line cannot start with ```"
+ | _ -> take_line)
+ | Some '=' -> (peek_char >>= function
+ | Some '>' -> fail "text line cannot start with =>"
+ | _ -> take_line)
+ | _ -> take_line
+
+(* The gemini spec says that pre blocks are demarcated by lines that
+ start with "```". It says nothing about what follows on these
+ lines. *)
+let pre_divider = string "```" *> take_till_cr *> return None
+
+let pre_line = peek_string 3
+ >>= function
+ | "```" -> fail "preformatted line cannot start with ```"
+ | _ -> lift (fun s -> Pre s) take_till_cr
+
+let pre_block = pre_divider *> many pre_line <* pre_divider
+
+let list_item = lift (fun x -> Ul x) (string "*" *> skip_spaces *> take_till_cr)
+
+let gemini : gemini Angstrom.t =
+ (* Because many tries each in order, we know that we will have a
+ plain text line whenever we get to the text parser. As a result,
+ we don't need to recheck that it's actually a text line and not
+ some special line. *)
+ let plain_text = lift (fun s -> Text s) take_till_cr
+ in
+ fix (fun gemini ->
+ lift2 List.append pre_block gemini
+ <|> lift2 List.cons (header <|> link <|> list_item <|> plain_text) gemini
+ <|> return [])
+ <?> "gemini"
+
+let gemini_line_to_str = function
+ | H1 s -> "# " ^ s
+ | H2 s -> "## " ^ s
+ | H3 s -> "### " ^ s
+ | Link (s, Some n) -> "=> " ^ s ^ " " ^ n
+ | Link (s, None) -> "=> " ^ s
+ | Pre s -> "P " ^ s
+ | Text s -> "T " ^ s
+ | Ul s -> "* " ^ s
+
+let gemini_to_str l =
+ List.fold_right (fun s -> fun r -> (gemini_line_to_str s) ^ "\n" ^ r) l ""
+
+let test_parse p str =
+ match parse_string ~consume:All (p >>= return) str with
+ | Ok x -> gemini_to_str x
+ | _ -> ""