From f45423a9bac148f5dd5634ca701d3290552a204f Mon Sep 17 00:00:00 2001 From: Anthony Scemama Date: Tue, 29 Dec 2020 18:06:54 +0100 Subject: [PATCH] Added nuclei.org --- particles/lib/nuclei.ml | 121 +++++----- particles/lib/nuclei.mli | 49 ++-- particles/lib/nuclei_lexer.mll | 12 +- particles/lib/xyz_ast.mli | 10 +- particles/lib/xyz_parser.mly | 4 - particles/nuclei.org | 393 +++++++++++++++++++++++++++++++++ particles/test/nuclei.ml | 5 + 7 files changed, 509 insertions(+), 85 deletions(-) create mode 100644 particles/nuclei.org diff --git a/particles/lib/nuclei.ml b/particles/lib/nuclei.ml index d5de19e..0408799 100644 --- a/particles/lib/nuclei.ml +++ b/particles/lib/nuclei.ml @@ -1,8 +1,21 @@ +(* [[file:../nuclei.org::*Type][Type:2]] *) open Common -open Xyz_ast type t = (Element.t * Coordinate.t) array +open Xyz_ast +(* Type:2 ends here *) + + +(* | ~of_xyz_string~ | Create from a string, in xyz format | + * | ~of_xyz_file~ | Create from a file, in xyz format | + * | ~of_zmt_string~ | Create from a string, in z-matrix format | + * | ~of_zmt_file~ | Create from a file, in z-matrix format | + * | ~to_string~ | Transform to a string, for printing | + * | ~of_filename~ | Detects the type of file (xyz, z-matrix) and reads the file | *) + + +(* [[file:../nuclei.org::*Conversion][Conversion:2]] *) let of_xyz_lexbuf lexbuf = let data = Xyz_parser.input Nuclei_lexer.read_all lexbuf @@ -41,9 +54,9 @@ let of_zmt_string buffer = Zmatrix.of_string buffer |> Zmatrix.to_xyz |> Array.map (fun (e,x,y,z) -> - (e, Coordinate.(angstrom_to_bohr @@ make_angstrom { x ; y ; z} )) - ) - + (e, Coordinate.(angstrom_to_bohr @@ make_angstrom { x ; y ; z} )) + ) + let of_zmt_file filename = let ic = open_in filename in @@ -56,11 +69,11 @@ let of_zmt_file filename = List.rev accu |> String.concat "\n" in aux [] - |> of_zmt_string - + |> of_zmt_string + let to_string atoms = - " + " Nuclear Coordinates (Angstrom) ------------------------------ @@ -69,27 +82,50 @@ let to_string atoms = Number X Y Z ----------------------------------------------------------------------- " ^ - (Array.mapi (fun i (e, coord) -> - let open Coordinate in - let coord = - bohr_to_angstrom coord - in - Printf.sprintf " %5d %5d %5s %12.6f %12.6f %12.6f" - (i+1) (Element.to_int e) (Element.to_string e) - coord.x coord.y coord.z - ) atoms - |> Array.to_list - |> String.concat "\n" ) ^ -" + (Array.mapi (fun i (e, coord) -> + let open Coordinate in + let coord = + bohr_to_angstrom coord + in + Printf.sprintf " %5d %5d %5s %12.6f %12.6f %12.6f" + (i+1) (Element.to_int e) (Element.to_string e) + coord.x coord.y coord.z + ) atoms + |> Array.to_list + |> String.concat "\n" ) ^ + " ----------------------------------------------------------------------- " let of_filename filename = - of_xyz_file filename - + of_xyz_file filename + +let to_xyz_string t = + [ string_of_int (Array.length t) ; "" ] @ + ( Array.map (fun (e, coord) -> + let open Coordinate in + let coord = + bohr_to_angstrom coord + in + Printf.sprintf " %5s %12.6f %12.6f %12.6f" + (Element.to_string e) coord.x coord.y coord.z + ) t + |> Array.to_list ) + |> String.concat "\n" +(* Conversion:2 ends here *) + + + +(* | ~repulsion~ | Nuclear repulsion energy, in atomic units | + * | ~charge~ | Sum of the charges of the nuclei | + * | ~small_core~ | Number of core electrons in the small core model | + * | ~large_core~ | Number of core electrons in the large core model | *) + + +(* [[file:../nuclei.org::*Query][Query:2]] *) let repulsion nuclei = let get_charge e = Element.to_charge e @@ -100,10 +136,10 @@ let repulsion nuclei = Array.fold_left (fun accu (e2, coord2) -> let r = Coordinate.(norm (coord1 |- coord2)) in if r > 0. then - accu +. 0.5 *. (get_charge e2) *. (get_charge e1) /. r + accu +. 0.5 *. (get_charge e2) *. (get_charge e1) /. r else accu - ) 0. nuclei - ) 0. nuclei + ) 0. nuclei + ) 0. nuclei let charge nuclei = @@ -112,36 +148,15 @@ let charge nuclei = |> Charge.of_int -let to_xyz_string t = - [ string_of_int (Array.length t) ; "" ] @ - ( Array.map (fun (e, coord) -> - let open Coordinate in - let coord = - bohr_to_angstrom coord - in - Printf.sprintf " %5s %12.6f %12.6f %12.6f" - (Element.to_string e) coord.x coord.y coord.z - ) t - |> Array.to_list ) - |> String.concat "\n" - - -let to_t2_string t = - [ "# nAt nEl nCore nRyd" ; - Printf.sprintf " %d %d %d 0" (Array.length t) - (Array.fold_left (+) 0 (Array.map (fun (e,_) -> Element.to_int e) t) ) - (2 * Array.length t); - "# Znuc x y z" ] - @ (Array.map (fun (e, coord) -> - let open Coordinate in - Printf.sprintf " %5f %12.6f %12.6f %12.6f" - (Element.to_int e |> float_of_int) coord.x coord.y coord.z - ) t - |> Array.to_list ) - |> String.concat "\n" - - let small_core a = Array.fold_left (fun accu (e,_) -> accu + (Element.small_core e)) 0 a +let large_core a = + Array.fold_left (fun accu (e,_) -> accu + (Element.large_core e)) 0 a +(* Query:2 ends here *) + +(* [[file:../nuclei.org::*Printers][Printers:2]] *) +let pp ppf t = + Format.fprintf ppf "@[%s@]" (to_string t) +(* Printers:2 ends here *) diff --git a/particles/lib/nuclei.mli b/particles/lib/nuclei.mli index 1947b5a..74bc3d9 100644 --- a/particles/lib/nuclei.mli +++ b/particles/lib/nuclei.mli @@ -1,39 +1,42 @@ -(** Data structure for the molecular geometry, represented as an array -of tuples ({!Element.t}, {!Coordinate.t}). -*) +(* Type + * + * #+NAME: types *) +(* [[file:../nuclei.org::types][types]] *) open Common type t = (Element.t * Coordinate.t) array +(* types ends here *) + +(* Conversion *) +(* [[file:../nuclei.org::*Conversion][Conversion:1]] *) val of_xyz_string : string -> t -(** Create from a string, in [xyz] format. *) - -val of_xyz_file : string -> t -(** Create from a file, in [xyz] format. *) +val to_xyz_string : t -> string +val of_xyz_file : string -> t val of_zmt_string : string -> t -(** Create from a string, in z-matrix format. *) +val of_zmt_file : string -> t -val of_zmt_file : string -> t -(** Create from a file, in z-matrix format. *) - -val to_string : t -> string -(** Transform to a string, for printing. *) +val to_string : t -> string val of_filename : string -> t -(** Detects the type of file ([xyz], z-matrix) and reads the file. *) +(* Conversion:1 ends here *) + +(* Query *) -val repulsion : t -> float -(** Nuclear repulsion energy, in atomic units. *) - -val charge : t -> Charge.t -(** Sum of the charges of the nuclei. *) - +(* [[file:../nuclei.org::*Query][Query:1]] *) +val repulsion : t -> float +val charge : t -> Charge.t val small_core : t -> int -(** Number of core electrons in the small core model. *) +val large_core : t -> int +(* Query:1 ends here *) -val to_xyz_string : t -> string -val to_t2_string : t -> string +(* Printers *) + + +(* [[file:../nuclei.org::*Printers][Printers:1]] *) +val pp : Format.formatter -> t -> unit +(* Printers:1 ends here *) diff --git a/particles/lib/nuclei_lexer.mll b/particles/lib/nuclei_lexer.mll index f02d4f5..390b551 100644 --- a/particles/lib/nuclei_lexer.mll +++ b/particles/lib/nuclei_lexer.mll @@ -1,3 +1,10 @@ +(* Lexer + * + * =nuclei_lexer.mll= contains the description of the lexemes used in + * an xyz file. *) + + +(* [[file:../nuclei.org::*Lexer][Lexer:1]] *) { open Xyz_parser } @@ -6,8 +13,8 @@ let eol = ['\n'] let white = [' ' '\t']+ let word = [^' ' '\t' '\n']+ let letter = ['A'-'Z' 'a'-'z'] -let integer = ['0'-'9']+ -let real = '-'? (integer '.' integer | integer '.' | '.' integer) (['e' 'E'] ('+'|'-')? integer)? +let integer = ['0'-'9']+ +let real = '-'? (integer '.' integer | integer '.' | '.' integer) (['e' 'E'] ('+'|'-')? integer)? rule read_all = parse @@ -38,3 +45,4 @@ rule read_all = parse done; *) } +(* Lexer:1 ends here *) diff --git a/particles/lib/xyz_ast.mli b/particles/lib/xyz_ast.mli index 2205522..a74f7fe 100644 --- a/particles/lib/xyz_ast.mli +++ b/particles/lib/xyz_ast.mli @@ -1,6 +1,10 @@ -(** When an [xyz] file is read by the [Xyz_parser.mly], it is converted into - an {!xyz_file} data structure. *) + +(* When an xyz file is read by =xyz_parser.mly=, it is converted into + * an ~xyz_file~ data structure. *) + + +(* [[file:../nuclei.org::*Parser][Parser:2]] *) open Common type nucleus = @@ -15,4 +19,4 @@ type xyz_file = file_title : string ; nuclei : nucleus list ; } - +(* Parser:2 ends here *) diff --git a/particles/lib/xyz_parser.mly b/particles/lib/xyz_parser.mly index 3b54b6d..ebeb70d 100644 --- a/particles/lib/xyz_parser.mly +++ b/particles/lib/xyz_parser.mly @@ -1,5 +1,3 @@ -/* Parses nuclear coordinates in xyz format */ - %{ open Common @@ -84,5 +82,3 @@ atoms_list: | atoms_list SPACE INTEGER SPACE FLOAT SPACE FLOAT SPACE FLOAT EOL { output_of_int $5 $7 $9 $3 :: $1 } | atoms_list SPACE INTEGER SPACE FLOAT SPACE FLOAT SPACE FLOAT SPACE EOL { output_of_int $5 $7 $9 $3 :: $1 } ; - - diff --git a/particles/nuclei.org b/particles/nuclei.org new file mode 100644 index 0000000..6ba7f1a --- /dev/null +++ b/particles/nuclei.org @@ -0,0 +1,393 @@ +#+begin_src elisp tangle: no :results none :exports none +(setq pwd (file-name-directory buffer-file-name)) +(setq name (file-name-nondirectory (substring buffer-file-name 0 -4))) +(setq lib (concat pwd "lib/")) +(setq testdir (concat pwd "test/")) +(setq mli (concat lib name ".mli")) +(setq ml (concat lib name ".ml")) +(setq test-ml (concat testdir name ".ml")) +(org-babel-tangle) +#+end_src + +* Nuclei + :PROPERTIES: + :header-args: :noweb yes :comments both + :END: + +** Type + + #+NAME: types + #+begin_src ocaml :tangle (eval mli) +open Common + +type t = (Element.t * Coordinate.t) array + #+end_src + + #+begin_src ocaml :tangle (eval ml) :exports none +<> +open Xyz_ast + #+end_src + +** xyz file lexer/parser + +*** Lexer + + =nuclei_lexer.mll= contains the description of the lexemes used in + an xyz file. + + #+begin_src ocaml :tangle lib/nuclei_lexer.mll :export none +{ +open Xyz_parser +} + +let eol = ['\n'] +let white = [' ' '\t']+ +let word = [^' ' '\t' '\n']+ +let letter = ['A'-'Z' 'a'-'z'] +let integer = ['0'-'9']+ +let real = '-'? (integer '.' integer | integer '.' | '.' integer) (['e' 'E'] ('+'|'-')? integer)? + + +rule read_all = parse + | eof { EOF } + | eol { EOL } + | white as w { SPACE w } + | integer as i { INTEGER (int_of_string i) } + | real as f { FLOAT (float_of_string f) } + | word as w { WORD w } + + +{ +(* DEBUG + let () = + let ic = open_in "h2o.xyz" in + let lexbuf = Lexing.from_channel ic in + while true do + let s = + match read_all lexbuf with + | EOL -> "EOL" + | SPACE w -> "SPACE("^w^")" + | INTEGER i -> "INTEGER("^(string_of_int i)^")" + | FLOAT f -> "FLOAT("^(string_of_float f)^")" + | WORD w -> "WORD("^w^")" + | EOF -> "EOF" + in + print_endline s + done; +*) +} + #+end_src + +*** Parser + + =xyz_parser.mly= parses nuclear coordinates in xyz format. + #+begin_src ocaml :tangle lib/xyz_parser.mly :export none :comments none +%{ +open Common + +let make_angstrom x y z = + Coordinate.(make_angstrom { + x ; y ; z + }) + +let output_of f x y z = + let a = make_angstrom x y z in + fun e -> + { + Xyz_ast. + element = f e; + coord = a ; + } + +let output_of_string = output_of Element.of_string +let output_of_int = output_of Element.of_int + +%} + +%token EOL +%token SPACE +%token WORD +%token INTEGER +%token FLOAT +%token EOF + +%start input +%type input + +%% /* Grammar rules and actions follow */ + +input: + | integer title atoms_xyz { + { + number_of_atoms = $1; + file_title = $2; + nuclei = $3; + } + } +; + + +integer: + | INTEGER EOL { $1 } + | INTEGER SPACE EOL { $1 } + | SPACE INTEGER EOL { $2 } + | SPACE INTEGER SPACE EOL { $2 } +; + +title: + | title_list EOL { $1 } +; + +text: + | WORD { $1 } + | SPACE { $1 } + | FLOAT { (string_of_float $1)} + | INTEGER { (string_of_int $1)} +; + +title_list: + | { "" } + | title_list text { ($1 ^ $2) } +; + +atoms_xyz: + | atoms_list EOL { List.rev $1 } + | atoms_list EOF { List.rev $1 } +; + +atoms_list: + | { [] } + | atoms_list WORD SPACE FLOAT SPACE FLOAT SPACE FLOAT EOL { output_of_string $4 $6 $8 $2 :: $1 } + | atoms_list WORD SPACE FLOAT SPACE FLOAT SPACE FLOAT SPACE EOL { output_of_string $4 $6 $8 $2 :: $1 } + | atoms_list INTEGER SPACE FLOAT SPACE FLOAT SPACE FLOAT EOL { output_of_int $4 $6 $8 $2 :: $1 } + | atoms_list INTEGER SPACE FLOAT SPACE FLOAT SPACE FLOAT SPACE EOL { output_of_int $4 $6 $8 $2 :: $1 } + | atoms_list SPACE WORD SPACE FLOAT SPACE FLOAT SPACE FLOAT EOL { output_of_string $5 $7 $9 $3 :: $1 } + | atoms_list SPACE WORD SPACE FLOAT SPACE FLOAT SPACE FLOAT SPACE EOL { output_of_string $5 $7 $9 $3 :: $1 } + | atoms_list SPACE INTEGER SPACE FLOAT SPACE FLOAT SPACE FLOAT EOL { output_of_int $5 $7 $9 $3 :: $1 } + | atoms_list SPACE INTEGER SPACE FLOAT SPACE FLOAT SPACE FLOAT SPACE EOL { output_of_int $5 $7 $9 $3 :: $1 } +; + #+end_src + + When an xyz file is read by =xyz_parser.mly=, it is converted into + an ~xyz_file~ data structure. + + #+begin_src ocaml :tangle lib/xyz_ast.mli +open Common + +type nucleus = + { + element: Element.t ; + coord : Coordinate.angstrom Coordinate.point; + } + +type xyz_file = + { + number_of_atoms : int ; + file_title : string ; + nuclei : nucleus list ; + } + #+end_src + +** Conversion + + #+begin_src ocaml :tangle (eval mli) +val of_xyz_string : string -> t +val to_xyz_string : t -> string +val of_xyz_file : string -> t + +val of_zmt_string : string -> t +val of_zmt_file : string -> t + +val to_string : t -> string + +val of_filename : string -> t + #+end_src + + | ~of_xyz_string~ | Create from a string, in xyz format | + | ~of_xyz_file~ | Create from a file, in xyz format | + | ~of_zmt_string~ | Create from a string, in z-matrix format | + | ~of_zmt_file~ | Create from a file, in z-matrix format | + | ~to_string~ | Transform to a string, for printing | + | ~of_filename~ | Detects the type of file (xyz, z-matrix) and reads the file | + + #+begin_src ocaml :tangle (eval ml) :exports none +let of_xyz_lexbuf lexbuf = + let data = + Xyz_parser.input Nuclei_lexer.read_all lexbuf + in + + let len = List.length data.nuclei in + if len <> data.number_of_atoms then + Printf.sprintf "Error: expected %d atoms but %d read" + data.number_of_atoms len + |> failwith; + + List.map (fun nucleus -> + nucleus.element, Coordinate.angstrom_to_bohr nucleus.coord + ) data.nuclei + |> Array.of_list + + +let of_xyz_string input_string = + Lexing.from_string input_string + |> of_xyz_lexbuf + + +let of_xyz_file filename = + let ic = open_in filename in + let lexbuf = + Lexing.from_channel ic + in + let result = + of_xyz_lexbuf lexbuf + in + close_in ic; + result + + +let of_zmt_string buffer = + Zmatrix.of_string buffer + |> Zmatrix.to_xyz + |> Array.map (fun (e,x,y,z) -> + (e, Coordinate.(angstrom_to_bohr @@ make_angstrom { x ; y ; z} )) + ) + + +let of_zmt_file filename = + let ic = open_in filename in + let rec aux accu = + try + let line = input_line ic in + aux (line::accu) + with End_of_file -> + close_in ic; + List.rev accu + |> String.concat "\n" + in aux [] + |> of_zmt_string + + +let to_string atoms = + " + Nuclear Coordinates (Angstrom) + ------------------------------ + +----------------------------------------------------------------------- + Center Atomic Element Coordinates (Angstroms) + Number X Y Z +----------------------------------------------------------------------- +" ^ + (Array.mapi (fun i (e, coord) -> + let open Coordinate in + let coord = + bohr_to_angstrom coord + in + Printf.sprintf " %5d %5d %5s %12.6f %12.6f %12.6f" + (i+1) (Element.to_int e) (Element.to_string e) + coord.x coord.y coord.z + ) atoms + |> Array.to_list + |> String.concat "\n" ) ^ + " +----------------------------------------------------------------------- + +" + + +let of_filename filename = + of_xyz_file filename + + +let to_xyz_string t = + [ string_of_int (Array.length t) ; "" ] @ + ( Array.map (fun (e, coord) -> + let open Coordinate in + let coord = + bohr_to_angstrom coord + in + Printf.sprintf " %5s %12.6f %12.6f %12.6f" + (Element.to_string e) coord.x coord.y coord.z + ) t + |> Array.to_list ) + |> String.concat "\n" + #+end_src + +** Query + + #+begin_src ocaml :tangle (eval mli) +val repulsion : t -> float +val charge : t -> Charge.t +val small_core : t -> int +val large_core : t -> int + #+end_src + + | ~repulsion~ | Nuclear repulsion energy, in atomic units | + | ~charge~ | Sum of the charges of the nuclei | + | ~small_core~ | Number of core electrons in the small core model | + | ~large_core~ | Number of core electrons in the large core model | + + #+begin_src ocaml :tangle (eval ml) :exports none +let repulsion nuclei = + let get_charge e = + Element.to_charge e + |> Charge.to_float + in + Array.fold_left ( fun accu (e1, coord1) -> + accu +. + Array.fold_left (fun accu (e2, coord2) -> + let r = Coordinate.(norm (coord1 |- coord2)) in + if r > 0. then + accu +. 0.5 *. (get_charge e2) *. (get_charge e1) /. r + else accu + ) 0. nuclei + ) 0. nuclei + + +let charge nuclei = + Array.fold_left (fun accu (e, _) -> accu + Charge.to_int (Element.to_charge e) ) + 0 nuclei + |> Charge.of_int + + +let small_core a = + Array.fold_left (fun accu (e,_) -> accu + (Element.small_core e)) 0 a + + +let large_core a = + Array.fold_left (fun accu (e,_) -> accu + (Element.large_core e)) 0 a + #+end_src + +** Printers + + #+begin_src ocaml :tangle (eval mli) +val pp : Format.formatter -> t -> unit + #+end_src + + #+begin_src ocaml :tangle (eval ml) :exports none +let pp ppf t = + Format.fprintf ppf "@[%s@]" (to_string t) + #+end_src + +** Tests + + #+begin_src ocaml :tangle (eval test-ml) :exports none +open Common +open Particles +open Alcotest + +let wd = Common.Qcaml.root ^ Filename.dir_sep ^ "test" + +let test_xyz molecule length repulsion charge core = + let xyz = Nuclei.of_xyz_file (wd^Filename.dir_sep^molecule^".xyz") in + check int "length" length (Array.length xyz); + check (float 1.e-4) "repulsion" repulsion (Nuclei.repulsion xyz); + check int "charge" charge (Charge.to_int @@ Nuclei.charge xyz); + check int "small_core" core (Nuclei.small_core xyz); + () + +let tests = [ + "caffeine", `Quick, (fun () -> test_xyz "caffeine" 24 917.0684 102 28); + "water", `Quick, (fun () -> test_xyz "water" 3 9.19497 10 2); +] + #+end_src + diff --git a/particles/test/nuclei.ml b/particles/test/nuclei.ml index 1366a69..c2c8bb3 100644 --- a/particles/test/nuclei.ml +++ b/particles/test/nuclei.ml @@ -1,3 +1,7 @@ +(* Tests *) + + +(* [[file:../nuclei.org::*Tests][Tests:1]] *) open Common open Particles open Alcotest @@ -16,3 +20,4 @@ let tests = [ "caffeine", `Quick, (fun () -> test_xyz "caffeine" 24 917.0684 102 28); "water", `Quick, (fun () -> test_xyz "water" 3 9.19497 10 2); ] +(* Tests:1 ends here *)