1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
|
(* Copyright (C) 2020 Ryan Kavanagh <rak@rak.ac> *)
(* Implements a parser for the text/gemini mime-type specified on
https://gemini.circumlunar.space/docs/spec-spec.txt . *)
open Angstrom
module type MIME_TEXT_GEMINI =
sig
exception ParseError of string
type gemini_line =
| H1 of string (* # header *)
| H2 of string (* ## header *)
| H3 of string (* ### header *)
| Link of string * (string option) (* link with url and optional name *)
| Pre of string (* preformatted text line *)
| Text of string (* plain text line *)
| Ul of string (* unordered list item *)
type gemini = gemini_line list
val gemini_line_to_str : gemini_line -> string
val gemini_to_str : gemini -> string
val gemini_line_to_canon_str : gemini_line -> string
val gemini_to_canon_str : gemini -> string
val str_to_header_line : string -> gemini_line
val str_to_link_line : string -> gemini_line
val str_to_text_line : string -> gemini_line
val str_to_pre_line : string -> gemini_line
val str_to_pre_block : string -> gemini_line list
val str_to_list_line : string -> gemini_line
val str_to_gemini : string -> gemini
end
module MimeTextGemini : MIME_TEXT_GEMINI =
struct
open ParseCommon.ParseCommon
exception ParseError of string
type gemini_line =
| H1 of string
| H2 of string
| H3 of string
| Link of string * (string option)
| Pre of string
| Text of string
| Ul of string
type gemini = gemini_line list
let gemini_line_to_str = function
| H1 s -> "H1 " ^ s
| H2 s -> "H2 " ^ s
| H3 s -> "H3 " ^ s
| Link (s, Some n) -> "Link (" ^ s ^ ", Some" ^ n ^ ")"
| Link (s, None) -> "Link (" ^ s ^ ", None)"
| Pre s -> "Pre " ^ s
| Text s -> "Text " ^ s
| Ul s -> "Ul " ^ s
let gemini_line_to_canon_str = function
| H1 s -> "# " ^ s
| H2 s -> "## " ^ s
| H3 s -> "### " ^ s
| Link (s, Some n) -> "=> " ^ s ^ " " ^ n
| Link (s, None) -> "=> " ^ s
| Pre s -> s
| Text s -> s
| Ul s -> "* " ^ s
let gemini_to_str g =
List.fold_right (fun l -> fun r -> (gemini_line_to_str l) ^ "\n" ^ r) g ""
let gemini_to_canon_str g =
let eol = "\r\n" in
let f (l : gemini_line) ((in_pre, r) : bool * string) =
match l, in_pre with
| Pre s, false ->
(true, s ^ eol ^ "```" ^ eol ^ r)
| Pre s, true ->
(true, s ^ eol ^ r)
| _, true ->
(false, (gemini_line_to_canon_str l) ^ eol ^ "```" ^ eol ^ r)
| _, false ->
(false, (gemini_line_to_canon_str l) ^ eol ^ r) in
let (_, s) = List.fold_right f g (false, "") in
s
let parse_string p s =
match Angstrom.parse_string ~consume:All (p >>= return) s with
| Ok parsed -> parsed
| Error m -> raise (ParseError m)
let header_parser =
lift (fun x -> H3 x) (string "###" *> skip_spaces *> take_till_cr)
<|> lift (fun x -> H2 x) (string "##" *> skip_spaces *> take_till_cr)
<|> lift (fun x -> H1 x) (string "#" *> skip_spaces *> take_till_cr)
<?> "gemini_header"
let str_to_header_line = parse_string header_parser
let link_parser =
let end_of_url b = is_whitespace b || is_cr b in
let to_link s = function
| "" -> Link (s, None)
| n -> Link (s, Some n)
in
lift2 to_link
(string "=>" *> skip_spaces *> take_till end_of_url)
(* the optional name, will be "" if there is none *)
(skip_spaces *> take_till_cr)
<?> "gemini_link"
let str_to_link_line = parse_string link_parser
(* Makes sure a line is actually a plain text line *)
let text_parser =
let take_line = lift (fun s -> Text s) take_till_cr in
peek_char >>= function
| Some '#' -> fail "text line cannot start with #"
| Some '*' -> fail "text line cannot start with *"
| Some '`' -> (peek_string 3 >>= function
| "```" -> fail "text line cannot start with ```"
| _ -> take_line)
| Some '=' -> (peek_char >>= function
| Some '>' -> fail "text line cannot start with =>"
| _ -> take_line)
| _ -> take_line
let str_to_text_line = parse_string text_parser
(* The gemini spec says that pre blocks are demarcated by lines that
start with "```". It says nothing about what follows on these
lines. *)
let pre_divider = string "```" *> take_till_cr *> return None
let pre_line_parser = peek_string 3
>>= function
| "```" -> fail "preformatted line cannot start with ```"
| _ -> lift (fun s -> Pre s) take_till_cr
let pre_block_parser = pre_divider *> many pre_line_parser <* pre_divider
let str_to_pre_line = parse_string pre_line_parser
let str_to_pre_block = parse_string pre_block_parser
let list_item_parser =
lift (fun x -> Ul x) (string "*" *> skip_spaces *> take_till_cr)
let str_to_list_line = parse_string list_item_parser
let gemini : gemini_line list Angstrom.t =
(* Because many tries each in order, we know that we will have a
plain text line whenever we get to the text parser. As a result,
we don't need to recheck that it's actually a text line and not
some special line. *)
let plain_text_parser = lift (fun s -> Text s) take_till_cr
in
fix (fun gemini ->
lift2 List.append pre_block_parser gemini
<|> lift2 List.cons (header_parser
<|> link_parser
<|> list_item_parser
<|> plain_text_parser) gemini
<|> return [])
<?> "gemini"
let str_to_gemini = parse_string gemini
end
|