#+begin_src elisp tangle: no :results none :exports none (setq pwd (file-name-directory buffer-file-name)) (setq name (file-name-nondirectory (substring buffer-file-name 0 -4))) (setq lib (concat pwd "lib/")) (setq testdir (concat pwd "test/")) (setq mli (concat lib name ".mli")) (setq ml (concat lib name ".ml")) (setq test-ml (concat testdir name ".ml")) (org-babel-tangle) #+end_src * Nuclei :PROPERTIES: :header-args: :noweb yes :comments both :END: ** Type <<<~Nuclei.t~>>> #+NAME: types #+begin_src ocaml :tangle (eval mli) open Common type t = (Element.t * Coordinate.t) array #+end_src #+begin_src ocaml :tangle (eval ml) :exports none open Common type t = (Element.t * Coordinate.t) array open Xyz_ast #+end_src ** xyz file lexer/parser *** Lexer =nuclei_lexer.mll= contains the description of the lexemes used in an xyz file. #+begin_src ocaml :tangle lib/nuclei_lexer.mll :export none { open Xyz_parser } let eol = ['\n'] let white = [' ' '\t']+ let word = [^' ' '\t' '\n']+ let letter = ['A'-'Z' 'a'-'z'] let integer = ['0'-'9']+ let real = '-'? (integer '.' integer | integer '.' | '.' integer) (['e' 'E'] ('+'|'-')? integer)? rule read_all = parse | eof { EOF } | eol { EOL } | white as w { SPACE w } | integer as i { INTEGER (int_of_string i) } | real as f { FLOAT (float_of_string f) } | word as w { WORD w } { (* DEBUG let () = let ic = open_in "h2o.xyz" in let lexbuf = Lexing.from_channel ic in while true do let s = match read_all lexbuf with | EOL -> "EOL" | SPACE w -> "SPACE("^w^")" | INTEGER i -> "INTEGER("^(string_of_int i)^")" | FLOAT f -> "FLOAT("^(string_of_float f)^")" | WORD w -> "WORD("^w^")" | EOF -> "EOF" in print_endline s done; *) } #+end_src *** Parser =xyz_parser.mly= parses nuclear coordinates in xyz format. #+begin_src ocaml :tangle lib/xyz_parser.mly :export none :comments none %{ open Common let make_angstrom x y z = Coordinate.(make_angstrom { x ; y ; z }) let output_of f x y z = let a = make_angstrom x y z in fun e -> { Xyz_ast. element = f e; coord = a ; } let output_of_string = output_of Element.of_string let output_of_int = output_of Element.of_int %} %token EOL %token SPACE %token WORD %token INTEGER %token FLOAT %token EOF %start input %type input %% /* Grammar rules and actions follow */ input: | integer title atoms_xyz { { number_of_atoms = $1; file_title = $2; nuclei = $3; } } ; integer: | INTEGER EOL { $1 } | INTEGER SPACE EOL { $1 } | SPACE INTEGER EOL { $2 } | SPACE INTEGER SPACE EOL { $2 } ; title: | title_list EOL { $1 } ; text: | WORD { $1 } | SPACE { $1 } | FLOAT { (string_of_float $1)} | INTEGER { (string_of_int $1)} ; title_list: | { "" } | title_list text { ($1 ^ $2) } ; atoms_xyz: | atoms_list EOL { List.rev $1 } | atoms_list EOF { List.rev $1 } ; atoms_list: | { [] } | atoms_list WORD SPACE FLOAT SPACE FLOAT SPACE FLOAT EOL { output_of_string $4 $6 $8 $2 :: $1 } | atoms_list WORD SPACE FLOAT SPACE FLOAT SPACE FLOAT SPACE EOL { output_of_string $4 $6 $8 $2 :: $1 } | atoms_list INTEGER SPACE FLOAT SPACE FLOAT SPACE FLOAT EOL { output_of_int $4 $6 $8 $2 :: $1 } | atoms_list INTEGER SPACE FLOAT SPACE FLOAT SPACE FLOAT SPACE EOL { output_of_int $4 $6 $8 $2 :: $1 } | atoms_list SPACE WORD SPACE FLOAT SPACE FLOAT SPACE FLOAT EOL { output_of_string $5 $7 $9 $3 :: $1 } | atoms_list SPACE WORD SPACE FLOAT SPACE FLOAT SPACE FLOAT SPACE EOL { output_of_string $5 $7 $9 $3 :: $1 } | atoms_list SPACE INTEGER SPACE FLOAT SPACE FLOAT SPACE FLOAT EOL { output_of_int $5 $7 $9 $3 :: $1 } | atoms_list SPACE INTEGER SPACE FLOAT SPACE FLOAT SPACE FLOAT SPACE EOL { output_of_int $5 $7 $9 $3 :: $1 } ; #+end_src When an xyz file is read by =xyz_parser.mly=, it is converted into an ~xyz_file~ data structure. #+begin_src ocaml :tangle lib/xyz_ast.mli open Common type nucleus = { element: Element.t ; coord : Coordinate.angstrom Coordinate.point; } type xyz_file = { number_of_atoms : int ; file_title : string ; nuclei : nucleus list ; } #+end_src ** Conversion #+begin_src ocaml :tangle (eval mli) val of_xyz_string : string -> t val to_xyz_string : t -> string val of_xyz_file : string -> t val of_zmt_string : string -> t val of_zmt_file : string -> t val to_string : t -> string val of_filename : string -> t #+end_src | ~of_xyz_string~ | Create from a string, in xyz format | | ~of_xyz_file~ | Create from a file, in xyz format | | ~of_zmt_string~ | Create from a string, in z-matrix format | | ~of_zmt_file~ | Create from a file, in z-matrix format | | ~to_string~ | Transform to a string, for printing | | ~of_filename~ | Detects the type of file (xyz, z-matrix) and reads the file | #+begin_src ocaml :tangle (eval ml) :exports none let of_xyz_lexbuf lexbuf = let data = Xyz_parser.input Nuclei_lexer.read_all lexbuf in let len = List.length data.nuclei in if len <> data.number_of_atoms then Printf.sprintf "Error: expected %d atoms but %d read" data.number_of_atoms len |> failwith; List.map (fun nucleus -> nucleus.element, Coordinate.angstrom_to_bohr nucleus.coord ) data.nuclei |> Array.of_list let of_xyz_string input_string = Lexing.from_string input_string |> of_xyz_lexbuf let of_xyz_file filename = let ic = open_in filename in let lexbuf = Lexing.from_channel ic in let result = of_xyz_lexbuf lexbuf in close_in ic; result let of_zmt_string buffer = Zmatrix.of_string buffer |> Zmatrix.to_xyz |> Array.map (fun (e,x,y,z) -> (e, Coordinate.(angstrom_to_bohr @@ make_angstrom { x ; y ; z} )) ) let of_zmt_file filename = let ic = open_in filename in let rec aux accu = try let line = input_line ic in aux (line::accu) with End_of_file -> close_in ic; List.rev accu |> String.concat "\n" in aux [] |> of_zmt_string let to_string atoms = " Nuclear Coordinates (Angstrom) ------------------------------ ----------------------------------------------------------------------- Center Atomic Element Coordinates (Angstroms) Number X Y Z ----------------------------------------------------------------------- " ^ (Array.mapi (fun i (e, coord) -> let open Coordinate in let coord = bohr_to_angstrom coord in Printf.sprintf " %5d %5d %5s %12.6f %12.6f %12.6f" (i+1) (Element.to_int e) (Element.to_string e) coord.x coord.y coord.z ) atoms |> Array.to_list |> String.concat "\n" ) ^ " ----------------------------------------------------------------------- " let of_filename filename = of_xyz_file filename let to_xyz_string t = [ string_of_int (Array.length t) ; "" ] @ ( Array.map (fun (e, coord) -> let open Coordinate in let coord = bohr_to_angstrom coord in Printf.sprintf " %5s %12.6f %12.6f %12.6f" (Element.to_string e) coord.x coord.y coord.z ) t |> Array.to_list ) |> String.concat "\n" #+end_src ** Query #+begin_src ocaml :tangle (eval mli) val formula : t -> string val repulsion : t -> float val charge : t -> Charge.t val small_core : t -> int val large_core : t -> int #+end_src | ~formula~ | Returns the chemical formula | | ~repulsion~ | Nuclear repulsion energy, in atomic units | | ~charge~ | Sum of the charges of the nuclei | | ~small_core~ | Number of core electrons in the small core model | | ~large_core~ | Number of core electrons in the large core model | #+begin_src ocaml :tangle (eval ml) :exports none let formula t = let dict = Hashtbl.create 67 in Array.iter (fun (e,_) -> let e = Element.to_string e in let value = try (Hashtbl.find dict e) + 1 with Not_found -> 1 in Hashtbl.replace dict e value ) t; Hashtbl.to_seq_keys dict |> List.of_seq |> List.sort String.compare |> List.fold_left (fun accu key -> let x = Hashtbl.find dict key in accu ^ key ^ "_{" ^ (string_of_int x) ^ "}") "" let repulsion nuclei = let get_charge e = Element.to_charge e |> Charge.to_float in Array.fold_left ( fun accu (e1, coord1) -> accu +. Array.fold_left (fun accu (e2, coord2) -> let r = Coordinate.(norm (coord1 |- coord2)) in if r > 0. then accu +. 0.5 *. (get_charge e2) *. (get_charge e1) /. r else accu ) 0. nuclei ) 0. nuclei let charge nuclei = Array.fold_left (fun accu (e, _) -> accu + Charge.to_int (Element.to_charge e) ) 0 nuclei |> Charge.of_int let small_core a = Array.fold_left (fun accu (e,_) -> accu + (Element.small_core e)) 0 a let large_core a = Array.fold_left (fun accu (e,_) -> accu + (Element.large_core e)) 0 a #+end_src ** TREXIO *** Read #+begin_src ocaml :tangle (eval mli) val of_trexio : Trexio.trexio_file -> t #+end_src #+begin_src ocaml :tangle (eval ml) :exports none let of_trexio f = let num = Trexio.read_nucleus_num f in let charge = Trexio.read_nucleus_charge f |> Array.map Charge.of_float in let coord = Trexio.read_nucleus_coord f in Array.init num (fun i -> let coord = Coordinate.{ x = coord.(3*i) ; y = coord.(3*i+1) ; z = coord.(3*i+2) } in (Element.of_charge charge.(i), Coordinate.make coord) ) #+end_src *** Write #+begin_src ocaml :tangle (eval mli) val to_trexio : Trexio.trexio_file -> t -> unit #+end_src #+begin_src ocaml :tangle (eval ml) :exports none let to_trexio f t = let num = Array.length t in Trexio.write_nucleus_num f num; Array.map (fun (e, _) -> Element.to_charge e |> Charge.to_float) t |> Trexio.write_nucleus_charge f; Array.map (fun (e, _) -> Element.to_string e) t |> Trexio.write_nucleus_label f; let coord = Array.init (num*3) (fun _ -> 0.) in Array.iteri (fun i (_, xyz) -> coord.(3*i) <- Coordinate.(get X xyz) ; coord.(3*i+1) <- Coordinate.(get Y xyz) ; coord.(3*i+2) <- Coordinate.(get Z xyz) ) t; Trexio.write_nucleus_coord f coord; repulsion t |> Trexio.write_nucleus_repulsion f #+end_src ** Printers #+begin_src ocaml :tangle (eval mli) val pp : Format.formatter -> t -> unit #+end_src #+begin_src ocaml :tangle (eval ml) :exports none let pp ppf t = Format.fprintf ppf "@[%s@]" (to_string t) #+end_src ** Tests #+begin_src ocaml :tangle (eval test-ml) :exports none open Common open Particles open Alcotest let wd = Common.Qcaml.root ^ Filename.dir_sep ^ "test" let test_xyz molecule length repulsion charge core = let xyz = Nuclei.of_xyz_file (wd^Filename.dir_sep^molecule^".xyz") in check int "length" length (Array.length xyz); check (float 1.e-4) "repulsion" repulsion (Nuclei.repulsion xyz); check int "charge" charge (Charge.to_int @@ Nuclei.charge xyz); check int "small_core" core (Nuclei.small_core xyz); () let tests = [ "caffeine", `Quick, (fun () -> test_xyz "caffeine" 24 917.0684 102 28); "water", `Quick, (fun () -> test_xyz "water" 3 9.19497 10 2); ] #+end_src