blob: 2246a90dd7944e51e8c7a1ade2777a812337613f (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
|
(* Copyright (C) 2020 Ryan Kavanagh <rak@rak.ac> *)
(* Implements a parser for the text/gemini mime-type specified on
https://gemini.circumlunar.space/docs/spec-spec.txt . *)
open Angstrom
type gemini_line =
| H1 of string (* # header *)
| H2 of string (* ## header *)
| H3 of string (* ### header *)
| Link of string * (string option) (* link with url and optional name *)
| Pre of string (* preformatted text line *)
| Text of string (* plain text line *)
| Ul of string (* unordered list item *)
type gemini = gemini_line list
let is_space = function
| ' ' | '\t' -> true
| _ -> false
let is_cr = function
| '\r' -> true
| _ -> false
let skip_spaces = skip_while is_space
let take_till_cr = take_till is_cr <* string "\r\n"
let header =
lift (fun x -> H3 x) (string "###" *> skip_spaces *> take_till_cr)
<|> lift (fun x -> H2 x) (string "##" *> skip_spaces *> take_till_cr)
<|> lift (fun x -> H1 x) (string "#" *> skip_spaces *> take_till_cr)
<?> "gemini_header"
let link =
let end_of_url b = is_space b || is_cr b in
let to_link s = function
| "" -> Link (s, None)
| n -> Link (s, Some n)
in
lift2 to_link
(string "=>" *> skip_spaces *> take_till end_of_url)
(* the optional name, will be "" if there is none *)
(skip_spaces *> take_till_cr)
<?> "gemini_link"
(* Makes sure a line is actually a plain text line *)
let text =
let take_line = lift (fun s -> Text s) take_till_cr in
peek_char >>= function
| Some '#' -> fail "text line cannot start with #"
| Some '*' -> fail "text line cannot start with *"
| Some '`' -> (peek_string 3 >>= function
| "```" -> fail "text line cannot start with ```"
| _ -> take_line)
| Some '=' -> (peek_char >>= function
| Some '>' -> fail "text line cannot start with =>"
| _ -> take_line)
| _ -> take_line
(* The gemini spec says that pre blocks are demarcated by lines that
start with "```". It says nothing about what follows on these
lines. *)
let pre_divider = string "```" *> take_till_cr *> return None
let pre_line = peek_string 3
>>= function
| "```" -> fail "preformatted line cannot start with ```"
| _ -> lift (fun s -> Pre s) take_till_cr
let pre_block = pre_divider *> many pre_line <* pre_divider
let list_item = lift (fun x -> Ul x) (string "*" *> skip_spaces *> take_till_cr)
let gemini : gemini Angstrom.t =
(* Because many tries each in order, we know that we will have a
plain text line whenever we get to the text parser. As a result,
we don't need to recheck that it's actually a text line and not
some special line. *)
let plain_text = lift (fun s -> Text s) take_till_cr
in
fix (fun gemini ->
lift2 List.append pre_block gemini
<|> lift2 List.cons (header <|> link <|> list_item <|> plain_text) gemini
<|> return [])
<?> "gemini"
let gemini_line_to_str = function
| H1 s -> "# " ^ s
| H2 s -> "## " ^ s
| H3 s -> "### " ^ s
| Link (s, Some n) -> "=> " ^ s ^ " " ^ n
| Link (s, None) -> "=> " ^ s
| Pre s -> "P " ^ s
| Text s -> "T " ^ s
| Ul s -> "* " ^ s
let gemini_to_str l =
List.fold_right (fun s -> fun r -> (gemini_line_to_str s) ^ "\n" ^ r) l ""
let test_parse p str =
match parse_string ~consume:All (p >>= return) str with
| Ok x -> gemini_to_str x
| _ -> ""
|