diff options
author | Ryan Kavanagh <rak@rak.ac> | 2020-05-30 19:16:03 -0400 |
---|---|---|
committer | Ryan Kavanagh <rak@rak.ac> | 2020-05-31 00:04:48 -0400 |
commit | c760484c1abfed9cd85db8ec5eb4d659ad16ba2a (patch) | |
tree | 3997432e131028c5947687b0722304fb739558db | |
parent | Initial import of a text/gemini parser (diff) |
Added a build system and adopt modules
-rw-r--r-- | .gitignore | 11 | ||||
-rw-r--r-- | _oasis | 20 | ||||
-rw-r--r-- | bin/dune | 4 | ||||
-rw-r--r-- | bin/leda.ml | 20 | ||||
-rw-r--r-- | dune-project | 13 | ||||
-rw-r--r-- | gemini/dune | 3 | ||||
-rw-r--r-- | gemini/mimeTextGemini.ml | 192 | ||||
-rw-r--r-- | gemini/mimeTextGemini.mli | 43 | ||||
-rw-r--r-- | parse_gemini.ml | 105 |
9 files changed, 306 insertions, 105 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2ef0fce --- /dev/null +++ b/.gitignore @@ -0,0 +1,11 @@ +.gitignore +_build +*.native +*.byte +*.docdir +*.merlin + +configure +setup.data +setup.log +setup.ml @@ -0,0 +1,20 @@ +Name: leda +Version: 0.1 +Synopsis: a gemini client +Authors: Ryan Kavanagh <rak@rak.ac> +License: BSD-2-clause + +Description: Leda is a gemini client implementin in OCaml. It is named after the mother of the Gemini. + +Homepage: https://rak.ac + +OASISFormat: 0.4 +BuildTools: ocamlbuild +Plugins: META (0.4), DevFiles (0.4) + +Executable "leda" + Path: src + MainIs: leda.ml + CompiledObject: best + BuildDepends: + angstrom
\ No newline at end of file diff --git a/bin/dune b/bin/dune new file mode 100644 index 0000000..ddff424 --- /dev/null +++ b/bin/dune @@ -0,0 +1,4 @@ +(executable + (public_name leda) + (name main) + (libraries base gemini)) diff --git a/bin/leda.ml b/bin/leda.ml new file mode 100644 index 0000000..139f5fe --- /dev/null +++ b/bin/leda.ml @@ -0,0 +1,20 @@ +module M = Gemini.MimeTextGemini + +let test_gemini = "\ + => /blah mylink\r\n\ + => /blah\r\n\ + ```\r\n\ + preformatted\r\n\ + ``` ending preformatted text\r\n\ + some text\r\n\ + more text\r\n\ + \r\n\ + * unordered list\r\n" + +let _ = print_endline test_gemini + +let _ = print_endline "\n\n...gets parsed as...\n\n" + +let test_parse s = s (* M.gemini_to_str (M.str_to_gemini s) *) + +let _ = print_endline (test_parse test_gemini) diff --git a/dune-project b/dune-project new file mode 100644 index 0000000..2db5774 --- /dev/null +++ b/dune-project @@ -0,0 +1,13 @@ +(lang dune 2.5) +(name leda) + +(generate_opam_files true) + +(license BSD-2-Clause) +(authors "Ryan Kavanagh <rak@rak.ac>") + +(package + (name leda) + (synopsis "gemini client") + (depends + (angstrom :dev))) diff --git a/gemini/dune b/gemini/dune new file mode 100644 index 0000000..cf041d8 --- /dev/null +++ b/gemini/dune @@ -0,0 +1,3 @@ +(library + (name gemini) + (libraries base angstrom)) diff --git a/gemini/mimeTextGemini.ml b/gemini/mimeTextGemini.ml new file mode 100644 index 0000000..ea370fd --- /dev/null +++ b/gemini/mimeTextGemini.ml @@ -0,0 +1,192 @@ +(* Copyright (C) 2020 Ryan Kavanagh <rak@rak.ac> *) +(* Implements a parser for the text/gemini mime-type specified on + https://gemini.circumlunar.space/docs/spec-spec.txt . *) + +open Angstrom + +module type MIME_TEXT_GEMINI = +sig + exception ParseError of string + + type gemini_line = + | H1 of string (* # header *) + | H2 of string (* ## header *) + | H3 of string (* ### header *) + | Link of string * (string option) (* link with url and optional name *) + | Pre of string (* preformatted text line *) + | Text of string (* plain text line *) + | Ul of string (* unordered list item *) + + type gemini = gemini_line list + + val gemini_line_to_str : gemini_line -> string + + val gemini_to_str : gemini -> string + + val gemini_line_to_canon_str : gemini_line -> string + + val gemini_to_canon_str : gemini -> string + + val str_to_header_line : string -> gemini_line + + val str_to_link_line : string -> gemini_line + + val str_to_text_line : string -> gemini_line + + val str_to_pre_line : string -> gemini_line + + val str_to_pre_block : string -> gemini_line list + + val str_to_list_line : string -> gemini_line + + val str_to_gemini : string -> gemini +end + +module MimeTextGemini : MIME_TEXT_GEMINI = +struct + + exception ParseError of string + + type gemini_line = + | H1 of string + | H2 of string + | H3 of string + | Link of string * (string option) + | Pre of string + | Text of string + | Ul of string + + type gemini = gemini_line list + + let gemini_line_to_str = function + | H1 s -> "H1 " ^ s + | H2 s -> "H2 " ^ s + | H3 s -> "H3 " ^ s + | Link (s, Some n) -> "Link (" ^ s ^ ", Some" ^ n ^ ")" + | Link (s, None) -> "Link (" ^ s ^ ", None)" + | Pre s -> "Pre " ^ s + | Text s -> "Text " ^ s + | Ul s -> "Ul " ^ s + + let gemini_line_to_canon_str = function + | H1 s -> "# " ^ s + | H2 s -> "## " ^ s + | H3 s -> "### " ^ s + | Link (s, Some n) -> "=> " ^ s ^ " " ^ n + | Link (s, None) -> "=> " ^ s + | Pre s -> s + | Text s -> s + | Ul s -> "* " ^ s + + let gemini_to_str g = + List.fold_right (fun l -> fun r -> (gemini_line_to_str l) ^ "\n" ^ r) g "" + + let gemini_to_canon_str g = + let in_pre = ref false in + let f l r = + match (l, !in_pre) with + | Pre s, true -> s ^ "\r\n" ^ r + | Pre s, false -> + let () = in_pre := true in + "```\r\n" ^ s ^ "\r\n" ^ r + | _, true -> + let () = in_pre := false in + "```\r\n" ^ (gemini_line_to_canon_str l) ^ "\r\n" ^ r + | _, false -> (gemini_line_to_canon_str l) ^ "\r\n" ^ r + in + List.fold_right f g "\r\n" + + let parse_string p s = + match Angstrom.parse_string ~consume:All (p >>= return) s with + | Ok parsed -> parsed + | Error m -> raise (ParseError m) + + let is_space = function + | ' ' | '\t' -> true + | _ -> false + + let is_cr = function + | '\r' -> true + | _ -> false + + let skip_spaces = skip_while is_space + + let take_till_cr = take_till is_cr <* string "\r\n" + + let header_parser = + lift (fun x -> H3 x) (string "###" *> skip_spaces *> take_till_cr) + <|> lift (fun x -> H2 x) (string "##" *> skip_spaces *> take_till_cr) + <|> lift (fun x -> H1 x) (string "#" *> skip_spaces *> take_till_cr) + <?> "gemini_header" + + let str_to_header_line = parse_string header_parser + + let link_parser = + let end_of_url b = is_space b || is_cr b in + let to_link s = function + | "" -> Link (s, None) + | n -> Link (s, Some n) + in + lift2 to_link + (string "=>" *> skip_spaces *> take_till end_of_url) + (* the optional name, will be "" if there is none *) + (skip_spaces *> take_till_cr) + <?> "gemini_link" + + let str_to_link_line = parse_string link_parser + + (* Makes sure a line is actually a plain text line *) + let text_parser = + let take_line = lift (fun s -> Text s) take_till_cr in + peek_char >>= function + | Some '#' -> fail "text line cannot start with #" + | Some '*' -> fail "text line cannot start with *" + | Some '`' -> (peek_string 3 >>= function + | "```" -> fail "text line cannot start with ```" + | _ -> take_line) + | Some '=' -> (peek_char >>= function + | Some '>' -> fail "text line cannot start with =>" + | _ -> take_line) + | _ -> take_line + + let str_to_text_line = parse_string text_parser + + (* The gemini spec says that pre blocks are demarcated by lines that + start with "```". It says nothing about what follows on these + lines. *) + let pre_divider = string "```" *> take_till_cr *> return None + + let pre_line_parser = peek_string 3 + >>= function + | "```" -> fail "preformatted line cannot start with ```" + | _ -> lift (fun s -> Pre s) take_till_cr + + let pre_block_parser = pre_divider *> many pre_line_parser <* pre_divider + + let str_to_pre_line = parse_string pre_line_parser + let str_to_pre_block = parse_string pre_block_parser + + let list_item_parser = + lift (fun x -> Ul x) (string "*" *> skip_spaces *> take_till_cr) + + let str_to_list_line = parse_string list_item_parser + + let gemini : gemini_line list Angstrom.t = + (* Because many tries each in order, we know that we will have a + plain text line whenever we get to the text parser. As a result, + we don't need to recheck that it's actually a text line and not + some special line. *) + let plain_text_parser = lift (fun s -> Text s) take_till_cr + in + fix (fun gemini -> + lift2 List.append pre_block_parser gemini + <|> lift2 List.cons (header_parser + <|> link_parser + <|> list_item_parser + <|> plain_text_parser) gemini + <|> return []) + <?> "gemini" + + let str_to_gemini = parse_string gemini + +end diff --git a/gemini/mimeTextGemini.mli b/gemini/mimeTextGemini.mli new file mode 100644 index 0000000..aa8d462 --- /dev/null +++ b/gemini/mimeTextGemini.mli @@ -0,0 +1,43 @@ +(* Copyright (C) 2020 Ryan Kavanagh <rak@rak.ac> *) +(* Implements a parser for the text/gemini mime-type specified on + https://gemini.circumlunar.space/docs/spec-spec.txt . *) + +module type MIME_TEXT_GEMINI = +sig + exception ParseError of string + + type gemini_line = + | H1 of string (* # header *) + | H2 of string (* ## header *) + | H3 of string (* ### header *) + | Link of string * (string option) (* link with url and optional name *) + | Pre of string (* preformatted text line *) + | Text of string (* plain text line *) + | Ul of string (* unordered list item *) + + type gemini = gemini_line list + + val gemini_line_to_str : gemini_line -> string + + val gemini_to_str : gemini -> string + + val gemini_line_to_canon_str : gemini_line -> string + + val gemini_to_canon_str : gemini -> string + + val str_to_header_line : string -> gemini_line + + val str_to_link_line : string -> gemini_line + + val str_to_text_line : string -> gemini_line + + val str_to_pre_line : string -> gemini_line + + val str_to_pre_block : string -> gemini_line list + + val str_to_list_line : string -> gemini_line + + val str_to_gemini : string -> gemini +end + +module MimeTextGemini : MIME_TEXT_GEMINI diff --git a/parse_gemini.ml b/parse_gemini.ml deleted file mode 100644 index 2246a90..0000000 --- a/parse_gemini.ml +++ /dev/null @@ -1,105 +0,0 @@ -(* Copyright (C) 2020 Ryan Kavanagh <rak@rak.ac> *) -(* Implements a parser for the text/gemini mime-type specified on - https://gemini.circumlunar.space/docs/spec-spec.txt . *) - -open Angstrom - -type gemini_line = - | H1 of string (* # header *) - | H2 of string (* ## header *) - | H3 of string (* ### header *) - | Link of string * (string option) (* link with url and optional name *) - | Pre of string (* preformatted text line *) - | Text of string (* plain text line *) - | Ul of string (* unordered list item *) - -type gemini = gemini_line list - -let is_space = function - | ' ' | '\t' -> true - | _ -> false - -let is_cr = function - | '\r' -> true - | _ -> false - -let skip_spaces = skip_while is_space - -let take_till_cr = take_till is_cr <* string "\r\n" - -let header = - lift (fun x -> H3 x) (string "###" *> skip_spaces *> take_till_cr) - <|> lift (fun x -> H2 x) (string "##" *> skip_spaces *> take_till_cr) - <|> lift (fun x -> H1 x) (string "#" *> skip_spaces *> take_till_cr) - <?> "gemini_header" - -let link = - let end_of_url b = is_space b || is_cr b in - let to_link s = function - | "" -> Link (s, None) - | n -> Link (s, Some n) - in - lift2 to_link - (string "=>" *> skip_spaces *> take_till end_of_url) - (* the optional name, will be "" if there is none *) - (skip_spaces *> take_till_cr) - <?> "gemini_link" - -(* Makes sure a line is actually a plain text line *) -let text = - let take_line = lift (fun s -> Text s) take_till_cr in - peek_char >>= function - | Some '#' -> fail "text line cannot start with #" - | Some '*' -> fail "text line cannot start with *" - | Some '`' -> (peek_string 3 >>= function - | "```" -> fail "text line cannot start with ```" - | _ -> take_line) - | Some '=' -> (peek_char >>= function - | Some '>' -> fail "text line cannot start with =>" - | _ -> take_line) - | _ -> take_line - -(* The gemini spec says that pre blocks are demarcated by lines that - start with "```". It says nothing about what follows on these - lines. *) -let pre_divider = string "```" *> take_till_cr *> return None - -let pre_line = peek_string 3 - >>= function - | "```" -> fail "preformatted line cannot start with ```" - | _ -> lift (fun s -> Pre s) take_till_cr - -let pre_block = pre_divider *> many pre_line <* pre_divider - -let list_item = lift (fun x -> Ul x) (string "*" *> skip_spaces *> take_till_cr) - -let gemini : gemini Angstrom.t = - (* Because many tries each in order, we know that we will have a - plain text line whenever we get to the text parser. As a result, - we don't need to recheck that it's actually a text line and not - some special line. *) - let plain_text = lift (fun s -> Text s) take_till_cr - in - fix (fun gemini -> - lift2 List.append pre_block gemini - <|> lift2 List.cons (header <|> link <|> list_item <|> plain_text) gemini - <|> return []) - <?> "gemini" - -let gemini_line_to_str = function - | H1 s -> "# " ^ s - | H2 s -> "## " ^ s - | H3 s -> "### " ^ s - | Link (s, Some n) -> "=> " ^ s ^ " " ^ n - | Link (s, None) -> "=> " ^ s - | Pre s -> "P " ^ s - | Text s -> "T " ^ s - | Ul s -> "* " ^ s - -let gemini_to_str l = - List.fold_right (fun s -> fun r -> (gemini_line_to_str s) ^ "\n" ^ r) l "" - -let test_parse p str = - match parse_string ~consume:All (p >>= return) str with - | Ok x -> gemini_to_str x - | _ -> "" |