summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRyan Kavanagh <rak@rak.ac>2020-05-30 19:16:03 -0400
committerRyan Kavanagh <rak@rak.ac>2020-05-31 00:04:48 -0400
commitc760484c1abfed9cd85db8ec5eb4d659ad16ba2a (patch)
tree3997432e131028c5947687b0722304fb739558db
parentInitial import of a text/gemini parser (diff)
Added a build system and adopt modules
Diffstat (limited to '')
-rw-r--r--.gitignore11
-rw-r--r--_oasis20
-rw-r--r--bin/dune4
-rw-r--r--bin/leda.ml20
-rw-r--r--dune-project13
-rw-r--r--gemini/dune3
-rw-r--r--gemini/mimeTextGemini.ml192
-rw-r--r--gemini/mimeTextGemini.mli43
-rw-r--r--parse_gemini.ml105
9 files changed, 306 insertions, 105 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..2ef0fce
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,11 @@
+.gitignore
+_build
+*.native
+*.byte
+*.docdir
+*.merlin
+
+configure
+setup.data
+setup.log
+setup.ml
diff --git a/_oasis b/_oasis
new file mode 100644
index 0000000..9c5db65
--- /dev/null
+++ b/_oasis
@@ -0,0 +1,20 @@
+Name: leda
+Version: 0.1
+Synopsis: a gemini client
+Authors: Ryan Kavanagh <rak@rak.ac>
+License: BSD-2-clause
+
+Description: Leda is a gemini client implementin in OCaml. It is named after the mother of the Gemini.
+
+Homepage: https://rak.ac
+
+OASISFormat: 0.4
+BuildTools: ocamlbuild
+Plugins: META (0.4), DevFiles (0.4)
+
+Executable "leda"
+ Path: src
+ MainIs: leda.ml
+ CompiledObject: best
+ BuildDepends:
+ angstrom \ No newline at end of file
diff --git a/bin/dune b/bin/dune
new file mode 100644
index 0000000..ddff424
--- /dev/null
+++ b/bin/dune
@@ -0,0 +1,4 @@
+(executable
+ (public_name leda)
+ (name main)
+ (libraries base gemini))
diff --git a/bin/leda.ml b/bin/leda.ml
new file mode 100644
index 0000000..139f5fe
--- /dev/null
+++ b/bin/leda.ml
@@ -0,0 +1,20 @@
+module M = Gemini.MimeTextGemini
+
+let test_gemini = "\
+ => /blah mylink\r\n\
+ => /blah\r\n\
+ ```\r\n\
+ preformatted\r\n\
+ ``` ending preformatted text\r\n\
+ some text\r\n\
+ more text\r\n\
+ \r\n\
+ * unordered list\r\n"
+
+let _ = print_endline test_gemini
+
+let _ = print_endline "\n\n...gets parsed as...\n\n"
+
+let test_parse s = s (* M.gemini_to_str (M.str_to_gemini s) *)
+
+let _ = print_endline (test_parse test_gemini)
diff --git a/dune-project b/dune-project
new file mode 100644
index 0000000..2db5774
--- /dev/null
+++ b/dune-project
@@ -0,0 +1,13 @@
+(lang dune 2.5)
+(name leda)
+
+(generate_opam_files true)
+
+(license BSD-2-Clause)
+(authors "Ryan Kavanagh <rak@rak.ac>")
+
+(package
+ (name leda)
+ (synopsis "gemini client")
+ (depends
+ (angstrom :dev)))
diff --git a/gemini/dune b/gemini/dune
new file mode 100644
index 0000000..cf041d8
--- /dev/null
+++ b/gemini/dune
@@ -0,0 +1,3 @@
+(library
+ (name gemini)
+ (libraries base angstrom))
diff --git a/gemini/mimeTextGemini.ml b/gemini/mimeTextGemini.ml
new file mode 100644
index 0000000..ea370fd
--- /dev/null
+++ b/gemini/mimeTextGemini.ml
@@ -0,0 +1,192 @@
+(* Copyright (C) 2020 Ryan Kavanagh <rak@rak.ac> *)
+(* Implements a parser for the text/gemini mime-type specified on
+ https://gemini.circumlunar.space/docs/spec-spec.txt . *)
+
+open Angstrom
+
+module type MIME_TEXT_GEMINI =
+sig
+ exception ParseError of string
+
+ type gemini_line =
+ | H1 of string (* # header *)
+ | H2 of string (* ## header *)
+ | H3 of string (* ### header *)
+ | Link of string * (string option) (* link with url and optional name *)
+ | Pre of string (* preformatted text line *)
+ | Text of string (* plain text line *)
+ | Ul of string (* unordered list item *)
+
+ type gemini = gemini_line list
+
+ val gemini_line_to_str : gemini_line -> string
+
+ val gemini_to_str : gemini -> string
+
+ val gemini_line_to_canon_str : gemini_line -> string
+
+ val gemini_to_canon_str : gemini -> string
+
+ val str_to_header_line : string -> gemini_line
+
+ val str_to_link_line : string -> gemini_line
+
+ val str_to_text_line : string -> gemini_line
+
+ val str_to_pre_line : string -> gemini_line
+
+ val str_to_pre_block : string -> gemini_line list
+
+ val str_to_list_line : string -> gemini_line
+
+ val str_to_gemini : string -> gemini
+end
+
+module MimeTextGemini : MIME_TEXT_GEMINI =
+struct
+
+ exception ParseError of string
+
+ type gemini_line =
+ | H1 of string
+ | H2 of string
+ | H3 of string
+ | Link of string * (string option)
+ | Pre of string
+ | Text of string
+ | Ul of string
+
+ type gemini = gemini_line list
+
+ let gemini_line_to_str = function
+ | H1 s -> "H1 " ^ s
+ | H2 s -> "H2 " ^ s
+ | H3 s -> "H3 " ^ s
+ | Link (s, Some n) -> "Link (" ^ s ^ ", Some" ^ n ^ ")"
+ | Link (s, None) -> "Link (" ^ s ^ ", None)"
+ | Pre s -> "Pre " ^ s
+ | Text s -> "Text " ^ s
+ | Ul s -> "Ul " ^ s
+
+ let gemini_line_to_canon_str = function
+ | H1 s -> "# " ^ s
+ | H2 s -> "## " ^ s
+ | H3 s -> "### " ^ s
+ | Link (s, Some n) -> "=> " ^ s ^ " " ^ n
+ | Link (s, None) -> "=> " ^ s
+ | Pre s -> s
+ | Text s -> s
+ | Ul s -> "* " ^ s
+
+ let gemini_to_str g =
+ List.fold_right (fun l -> fun r -> (gemini_line_to_str l) ^ "\n" ^ r) g ""
+
+ let gemini_to_canon_str g =
+ let in_pre = ref false in
+ let f l r =
+ match (l, !in_pre) with
+ | Pre s, true -> s ^ "\r\n" ^ r
+ | Pre s, false ->
+ let () = in_pre := true in
+ "```\r\n" ^ s ^ "\r\n" ^ r
+ | _, true ->
+ let () = in_pre := false in
+ "```\r\n" ^ (gemini_line_to_canon_str l) ^ "\r\n" ^ r
+ | _, false -> (gemini_line_to_canon_str l) ^ "\r\n" ^ r
+ in
+ List.fold_right f g "\r\n"
+
+ let parse_string p s =
+ match Angstrom.parse_string ~consume:All (p >>= return) s with
+ | Ok parsed -> parsed
+ | Error m -> raise (ParseError m)
+
+ let is_space = function
+ | ' ' | '\t' -> true
+ | _ -> false
+
+ let is_cr = function
+ | '\r' -> true
+ | _ -> false
+
+ let skip_spaces = skip_while is_space
+
+ let take_till_cr = take_till is_cr <* string "\r\n"
+
+ let header_parser =
+ lift (fun x -> H3 x) (string "###" *> skip_spaces *> take_till_cr)
+ <|> lift (fun x -> H2 x) (string "##" *> skip_spaces *> take_till_cr)
+ <|> lift (fun x -> H1 x) (string "#" *> skip_spaces *> take_till_cr)
+ <?> "gemini_header"
+
+ let str_to_header_line = parse_string header_parser
+
+ let link_parser =
+ let end_of_url b = is_space b || is_cr b in
+ let to_link s = function
+ | "" -> Link (s, None)
+ | n -> Link (s, Some n)
+ in
+ lift2 to_link
+ (string "=>" *> skip_spaces *> take_till end_of_url)
+ (* the optional name, will be "" if there is none *)
+ (skip_spaces *> take_till_cr)
+ <?> "gemini_link"
+
+ let str_to_link_line = parse_string link_parser
+
+ (* Makes sure a line is actually a plain text line *)
+ let text_parser =
+ let take_line = lift (fun s -> Text s) take_till_cr in
+ peek_char >>= function
+ | Some '#' -> fail "text line cannot start with #"
+ | Some '*' -> fail "text line cannot start with *"
+ | Some '`' -> (peek_string 3 >>= function
+ | "```" -> fail "text line cannot start with ```"
+ | _ -> take_line)
+ | Some '=' -> (peek_char >>= function
+ | Some '>' -> fail "text line cannot start with =>"
+ | _ -> take_line)
+ | _ -> take_line
+
+ let str_to_text_line = parse_string text_parser
+
+ (* The gemini spec says that pre blocks are demarcated by lines that
+ start with "```". It says nothing about what follows on these
+ lines. *)
+ let pre_divider = string "```" *> take_till_cr *> return None
+
+ let pre_line_parser = peek_string 3
+ >>= function
+ | "```" -> fail "preformatted line cannot start with ```"
+ | _ -> lift (fun s -> Pre s) take_till_cr
+
+ let pre_block_parser = pre_divider *> many pre_line_parser <* pre_divider
+
+ let str_to_pre_line = parse_string pre_line_parser
+ let str_to_pre_block = parse_string pre_block_parser
+
+ let list_item_parser =
+ lift (fun x -> Ul x) (string "*" *> skip_spaces *> take_till_cr)
+
+ let str_to_list_line = parse_string list_item_parser
+
+ let gemini : gemini_line list Angstrom.t =
+ (* Because many tries each in order, we know that we will have a
+ plain text line whenever we get to the text parser. As a result,
+ we don't need to recheck that it's actually a text line and not
+ some special line. *)
+ let plain_text_parser = lift (fun s -> Text s) take_till_cr
+ in
+ fix (fun gemini ->
+ lift2 List.append pre_block_parser gemini
+ <|> lift2 List.cons (header_parser
+ <|> link_parser
+ <|> list_item_parser
+ <|> plain_text_parser) gemini
+ <|> return [])
+ <?> "gemini"
+
+ let str_to_gemini = parse_string gemini
+
+end
diff --git a/gemini/mimeTextGemini.mli b/gemini/mimeTextGemini.mli
new file mode 100644
index 0000000..aa8d462
--- /dev/null
+++ b/gemini/mimeTextGemini.mli
@@ -0,0 +1,43 @@
+(* Copyright (C) 2020 Ryan Kavanagh <rak@rak.ac> *)
+(* Implements a parser for the text/gemini mime-type specified on
+ https://gemini.circumlunar.space/docs/spec-spec.txt . *)
+
+module type MIME_TEXT_GEMINI =
+sig
+ exception ParseError of string
+
+ type gemini_line =
+ | H1 of string (* # header *)
+ | H2 of string (* ## header *)
+ | H3 of string (* ### header *)
+ | Link of string * (string option) (* link with url and optional name *)
+ | Pre of string (* preformatted text line *)
+ | Text of string (* plain text line *)
+ | Ul of string (* unordered list item *)
+
+ type gemini = gemini_line list
+
+ val gemini_line_to_str : gemini_line -> string
+
+ val gemini_to_str : gemini -> string
+
+ val gemini_line_to_canon_str : gemini_line -> string
+
+ val gemini_to_canon_str : gemini -> string
+
+ val str_to_header_line : string -> gemini_line
+
+ val str_to_link_line : string -> gemini_line
+
+ val str_to_text_line : string -> gemini_line
+
+ val str_to_pre_line : string -> gemini_line
+
+ val str_to_pre_block : string -> gemini_line list
+
+ val str_to_list_line : string -> gemini_line
+
+ val str_to_gemini : string -> gemini
+end
+
+module MimeTextGemini : MIME_TEXT_GEMINI
diff --git a/parse_gemini.ml b/parse_gemini.ml
deleted file mode 100644
index 2246a90..0000000
--- a/parse_gemini.ml
+++ /dev/null
@@ -1,105 +0,0 @@
-(* Copyright (C) 2020 Ryan Kavanagh <rak@rak.ac> *)
-(* Implements a parser for the text/gemini mime-type specified on
- https://gemini.circumlunar.space/docs/spec-spec.txt . *)
-
-open Angstrom
-
-type gemini_line =
- | H1 of string (* # header *)
- | H2 of string (* ## header *)
- | H3 of string (* ### header *)
- | Link of string * (string option) (* link with url and optional name *)
- | Pre of string (* preformatted text line *)
- | Text of string (* plain text line *)
- | Ul of string (* unordered list item *)
-
-type gemini = gemini_line list
-
-let is_space = function
- | ' ' | '\t' -> true
- | _ -> false
-
-let is_cr = function
- | '\r' -> true
- | _ -> false
-
-let skip_spaces = skip_while is_space
-
-let take_till_cr = take_till is_cr <* string "\r\n"
-
-let header =
- lift (fun x -> H3 x) (string "###" *> skip_spaces *> take_till_cr)
- <|> lift (fun x -> H2 x) (string "##" *> skip_spaces *> take_till_cr)
- <|> lift (fun x -> H1 x) (string "#" *> skip_spaces *> take_till_cr)
- <?> "gemini_header"
-
-let link =
- let end_of_url b = is_space b || is_cr b in
- let to_link s = function
- | "" -> Link (s, None)
- | n -> Link (s, Some n)
- in
- lift2 to_link
- (string "=>" *> skip_spaces *> take_till end_of_url)
- (* the optional name, will be "" if there is none *)
- (skip_spaces *> take_till_cr)
- <?> "gemini_link"
-
-(* Makes sure a line is actually a plain text line *)
-let text =
- let take_line = lift (fun s -> Text s) take_till_cr in
- peek_char >>= function
- | Some '#' -> fail "text line cannot start with #"
- | Some '*' -> fail "text line cannot start with *"
- | Some '`' -> (peek_string 3 >>= function
- | "```" -> fail "text line cannot start with ```"
- | _ -> take_line)
- | Some '=' -> (peek_char >>= function
- | Some '>' -> fail "text line cannot start with =>"
- | _ -> take_line)
- | _ -> take_line
-
-(* The gemini spec says that pre blocks are demarcated by lines that
- start with "```". It says nothing about what follows on these
- lines. *)
-let pre_divider = string "```" *> take_till_cr *> return None
-
-let pre_line = peek_string 3
- >>= function
- | "```" -> fail "preformatted line cannot start with ```"
- | _ -> lift (fun s -> Pre s) take_till_cr
-
-let pre_block = pre_divider *> many pre_line <* pre_divider
-
-let list_item = lift (fun x -> Ul x) (string "*" *> skip_spaces *> take_till_cr)
-
-let gemini : gemini Angstrom.t =
- (* Because many tries each in order, we know that we will have a
- plain text line whenever we get to the text parser. As a result,
- we don't need to recheck that it's actually a text line and not
- some special line. *)
- let plain_text = lift (fun s -> Text s) take_till_cr
- in
- fix (fun gemini ->
- lift2 List.append pre_block gemini
- <|> lift2 List.cons (header <|> link <|> list_item <|> plain_text) gemini
- <|> return [])
- <?> "gemini"
-
-let gemini_line_to_str = function
- | H1 s -> "# " ^ s
- | H2 s -> "## " ^ s
- | H3 s -> "### " ^ s
- | Link (s, Some n) -> "=> " ^ s ^ " " ^ n
- | Link (s, None) -> "=> " ^ s
- | Pre s -> "P " ^ s
- | Text s -> "T " ^ s
- | Ul s -> "* " ^ s
-
-let gemini_to_str l =
- List.fold_right (fun s -> fun r -> (gemini_line_to_str s) ^ "\n" ^ r) l ""
-
-let test_parse p str =
- match parse_string ~consume:All (p >>= return) str with
- | Ok x -> gemini_to_str x
- | _ -> ""