summaryrefslogtreecommitdiff
path: root/gemini/mimeTextGemini.ml
blob: 385871fee7e4bbe490b8f7edb17075ad7e6c43e6 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
(* Copyright (C) 2020 Ryan Kavanagh <rak@rak.ac> *)
(* Implements a parser for the text/gemini mime-type specified on
   https://gemini.circumlunar.space/docs/spec-spec.txt .  *)

open Angstrom

module type MIME_TEXT_GEMINI =
sig
  exception ParseError of string

  type gemini_line =
    | H1 of string                     (* # header   *)
    | H2 of string                     (* ## header  *)
    | H3 of string                     (* ### header *)
    | Link of string * (string option) (* link with url and optional name *)
    | Pre of string                    (* preformatted text line *)
    | Text of string                   (* plain text line *)
    | Ul of string                     (* unordered list item *)
    | Quoted of string                 (* quoted text *)

  type gemini = gemini_line list

  val gemini_line_to_str : gemini_line -> string

  val gemini_to_str : gemini -> string

  val gemini_line_to_canon_str : gemini_line -> string

  val gemini_to_canon_str : gemini -> string

  val str_to_header_line : string -> gemini_line

  val str_to_link_line : string -> gemini_line

  val str_to_text_line : string -> gemini_line

  val str_to_pre_line : string -> gemini_line

  val str_to_pre_block : string -> gemini_line list

  val str_to_list_line : string -> gemini_line

  val str_to_quoted_line : string -> gemini_line

  val str_to_gemini : string -> gemini
end

module MimeTextGemini : MIME_TEXT_GEMINI =
struct

  open ParseCommon.ParseCommon

  exception ParseError of string

  type gemini_line =
    | H1 of string
    | H2 of string
    | H3 of string
    | Link of string * (string option)
    | Pre of string
    | Text of string
    | Ul of string
    | Quoted of string

  type gemini = gemini_line list

  let gemini_line_to_str = function
    | H1 s -> "H1 " ^ s
    | H2 s -> "H2 " ^ s
    | H3 s -> "H3 " ^ s
    | Link (s, Some n) -> "Link (" ^ s ^ ", Some" ^ n ^ ")"
    | Link (s, None) -> "Link (" ^ s ^ ", None)"
    | Pre s -> "Pre " ^ s
    | Text s -> "Text " ^ s
    | Ul s -> "Ul " ^ s
    | Quoted s -> "Quoted " ^ s

  let gemini_line_to_canon_str = function
    | H1 s -> "# " ^ s
    | H2 s -> "## " ^ s
    | H3 s -> "### " ^ s
    | Link (s, Some n) -> "=> " ^ s ^ " " ^ n
    | Link (s, None) -> "=> " ^ s
    | Pre s -> s
    | Text s -> s
    | Ul s -> "* " ^ s
    | Quoted s -> ">" ^ s

  let gemini_to_str g =
    List.fold_right (fun l -> fun r -> (gemini_line_to_str l) ^ "\n" ^ r) g ""

  let gemini_to_canon_str g =
    let eol = "\r\n" in
    let f (l : gemini_line) ((in_pre, r) : bool * string) =
      match l, in_pre with
      | Pre s, false ->
        (true, s ^ eol ^ "```" ^ eol ^ r)
      | Pre s, true ->
        (true, s ^ eol ^ r)
      | _, true ->
        (false, (gemini_line_to_canon_str l) ^ eol ^ "```" ^ eol ^ r)
      | _, false ->
        (false, (gemini_line_to_canon_str l) ^ eol ^ r) in
    let (_, s) = List.fold_right f g (false, "") in
    s

  let parse_string p s =
    match Angstrom.parse_string ~consume:All (p >>= return) s with
    | Ok parsed -> parsed
    | Error m -> raise (ParseError m)

  let header_parser =
    lift (fun x -> H3 x) (string "###" *> skip_spaces *> take_till_eol)
    <|> lift (fun x -> H2 x) (string "##" *> skip_spaces *> take_till_eol)
    <|> lift (fun x -> H1 x) (string "#" *> skip_spaces *> take_till_eol)
    <?> "gemini_header"

  let str_to_header_line = parse_string header_parser

  let link_parser =
    let end_of_url b = is_whitespace b || is_cr b in
    let to_link s = function
      | "" -> Link (s, None)
      | n -> Link (s, Some n)
    in
    lift2 to_link
      (string "=>" *> skip_spaces *> take_till end_of_url)
      (* the optional name, will be "" if there is none *)
      (skip_spaces *> take_till_eol)
    <?> "gemini_link"

  let str_to_link_line = parse_string link_parser

  (* Makes sure a line is actually a plain text line *)
  let text_parser =
    let take_line = lift (fun s -> Text s) take_till_eol in
    peek_char >>= function
    | Some '#' -> fail "text line cannot start with #"
    | Some '>' -> fail "text line cannot start with >"
    | Some '`' -> (peek_string 3 >>= function
      | "```" -> fail "text line cannot start with ```"
      | _ -> take_line)
    | Some '=' -> (peek_char >>= function
      | Some '>' -> fail "text line cannot start with =>"
      | _ -> take_line)
    | Some '*' -> (peek_char >>= function
      | Some ' ' -> fail "text line cannot start with `* '"
      | _ -> take_line)
    | _ -> take_line

  let str_to_text_line = parse_string text_parser

  (* The gemini spec says that pre blocks are demarcated by lines that
     start with "```". It says nothing about what follows on these
     lines. *)
  let pre_divider = string "```" *> take_till_eol *> return None

  let pre_line_parser = peek_string 3
    >>= function
    | "```" -> fail "preformatted line cannot start with ```"
    | _ -> lift (fun s -> Pre s) take_till_eol

  let pre_block_parser = pre_divider *> many pre_line_parser <* pre_divider

  let str_to_pre_line = parse_string pre_line_parser
  let str_to_pre_block = parse_string pre_block_parser

  let list_item_parser =
    lift (fun x -> Ul x) (string "* " *> skip_spaces *> take_till_eol)

  let str_to_list_line = parse_string list_item_parser

  let quoted_parser =
    lift (fun x -> Ul x) (string ">" *> skip_spaces *> take_till_eol)

  let str_to_quoted_line = parse_string quoted_parser

  let gemini : gemini_line list Angstrom.t =
    (* Because many tries each in order, we know that we will have a
       plain text line whenever we get to the text parser. As a result,
       we don't need to recheck that it's actually a text line and not
       some special line. *)
    let plain_text_parser = lift (fun s -> Text s) take_till_eol
    in
    fix (fun gemini ->
        lift2 List.append pre_block_parser gemini
        <|> lift2 List.cons (header_parser
                             <|> link_parser
                             <|> list_item_parser
                             <|> quoted_parser
                             <|> plain_text_parser) gemini
        <|> return [])
    <?> "gemini"

  let str_to_gemini = parse_string gemini

end