summaryrefslogtreecommitdiff
path: root/parse_gemini.ml
diff options
context:
space:
mode:
authorRyan Kavanagh <rak@rak.ac>2020-05-30 19:16:03 -0400
committerRyan Kavanagh <rak@rak.ac>2020-05-31 00:04:48 -0400
commitc760484c1abfed9cd85db8ec5eb4d659ad16ba2a (patch)
tree3997432e131028c5947687b0722304fb739558db /parse_gemini.ml
parentInitial import of a text/gemini parser (diff)
Added a build system and adopt modules
Diffstat (limited to '')
-rw-r--r--parse_gemini.ml105
1 files changed, 0 insertions, 105 deletions
diff --git a/parse_gemini.ml b/parse_gemini.ml
deleted file mode 100644
index 2246a90..0000000
--- a/parse_gemini.ml
+++ /dev/null
@@ -1,105 +0,0 @@
-(* Copyright (C) 2020 Ryan Kavanagh <rak@rak.ac> *)
-(* Implements a parser for the text/gemini mime-type specified on
- https://gemini.circumlunar.space/docs/spec-spec.txt . *)
-
-open Angstrom
-
-type gemini_line =
- | H1 of string (* # header *)
- | H2 of string (* ## header *)
- | H3 of string (* ### header *)
- | Link of string * (string option) (* link with url and optional name *)
- | Pre of string (* preformatted text line *)
- | Text of string (* plain text line *)
- | Ul of string (* unordered list item *)
-
-type gemini = gemini_line list
-
-let is_space = function
- | ' ' | '\t' -> true
- | _ -> false
-
-let is_cr = function
- | '\r' -> true
- | _ -> false
-
-let skip_spaces = skip_while is_space
-
-let take_till_cr = take_till is_cr <* string "\r\n"
-
-let header =
- lift (fun x -> H3 x) (string "###" *> skip_spaces *> take_till_cr)
- <|> lift (fun x -> H2 x) (string "##" *> skip_spaces *> take_till_cr)
- <|> lift (fun x -> H1 x) (string "#" *> skip_spaces *> take_till_cr)
- <?> "gemini_header"
-
-let link =
- let end_of_url b = is_space b || is_cr b in
- let to_link s = function
- | "" -> Link (s, None)
- | n -> Link (s, Some n)
- in
- lift2 to_link
- (string "=>" *> skip_spaces *> take_till end_of_url)
- (* the optional name, will be "" if there is none *)
- (skip_spaces *> take_till_cr)
- <?> "gemini_link"
-
-(* Makes sure a line is actually a plain text line *)
-let text =
- let take_line = lift (fun s -> Text s) take_till_cr in
- peek_char >>= function
- | Some '#' -> fail "text line cannot start with #"
- | Some '*' -> fail "text line cannot start with *"
- | Some '`' -> (peek_string 3 >>= function
- | "```" -> fail "text line cannot start with ```"
- | _ -> take_line)
- | Some '=' -> (peek_char >>= function
- | Some '>' -> fail "text line cannot start with =>"
- | _ -> take_line)
- | _ -> take_line
-
-(* The gemini spec says that pre blocks are demarcated by lines that
- start with "```". It says nothing about what follows on these
- lines. *)
-let pre_divider = string "```" *> take_till_cr *> return None
-
-let pre_line = peek_string 3
- >>= function
- | "```" -> fail "preformatted line cannot start with ```"
- | _ -> lift (fun s -> Pre s) take_till_cr
-
-let pre_block = pre_divider *> many pre_line <* pre_divider
-
-let list_item = lift (fun x -> Ul x) (string "*" *> skip_spaces *> take_till_cr)
-
-let gemini : gemini Angstrom.t =
- (* Because many tries each in order, we know that we will have a
- plain text line whenever we get to the text parser. As a result,
- we don't need to recheck that it's actually a text line and not
- some special line. *)
- let plain_text = lift (fun s -> Text s) take_till_cr
- in
- fix (fun gemini ->
- lift2 List.append pre_block gemini
- <|> lift2 List.cons (header <|> link <|> list_item <|> plain_text) gemini
- <|> return [])
- <?> "gemini"
-
-let gemini_line_to_str = function
- | H1 s -> "# " ^ s
- | H2 s -> "## " ^ s
- | H3 s -> "### " ^ s
- | Link (s, Some n) -> "=> " ^ s ^ " " ^ n
- | Link (s, None) -> "=> " ^ s
- | Pre s -> "P " ^ s
- | Text s -> "T " ^ s
- | Ul s -> "* " ^ s
-
-let gemini_to_str l =
- List.fold_right (fun s -> fun r -> (gemini_line_to_str s) ^ "\n" ^ r) l ""
-
-let test_parse p str =
- match parse_string ~consume:All (p >>= return) str with
- | Ok x -> gemini_to_str x
- | _ -> ""