mirror of
https://gitlab.com/scemama/QCaml.git
synced 2025-01-03 18:15:47 +01:00
472 lines
12 KiB
Org Mode
472 lines
12 KiB
Org Mode
#+begin_src elisp tangle: no :results none :exports none
|
|
(setq pwd (file-name-directory buffer-file-name))
|
|
(setq name (file-name-nondirectory (substring buffer-file-name 0 -4)))
|
|
(setq lib (concat pwd "lib/"))
|
|
(setq testdir (concat pwd "test/"))
|
|
(setq mli (concat lib name ".mli"))
|
|
(setq ml (concat lib name ".ml"))
|
|
(setq test-ml (concat testdir name ".ml"))
|
|
(org-babel-tangle)
|
|
#+end_src
|
|
|
|
* Nuclei
|
|
:PROPERTIES:
|
|
:header-args: :noweb yes :comments both
|
|
:END:
|
|
|
|
** Type
|
|
<<<~Nuclei.t~>>>
|
|
|
|
#+NAME: types
|
|
#+begin_src ocaml :tangle (eval mli)
|
|
open Common
|
|
|
|
type t = (Element.t * Coordinate.t) array
|
|
#+end_src
|
|
|
|
#+begin_src ocaml :tangle (eval ml) :exports none
|
|
open Common
|
|
|
|
type t = (Element.t * Coordinate.t) array
|
|
open Xyz_ast
|
|
#+end_src
|
|
|
|
** xyz file lexer/parser
|
|
|
|
*** Lexer =nuclei_lexer.mll= contains the description of the lexemes used in
|
|
an xyz file.
|
|
|
|
#+begin_src ocaml :tangle lib/nuclei_lexer.mll :export none
|
|
{
|
|
open Xyz_parser
|
|
}
|
|
|
|
let eol = ['\n']
|
|
let white = [' ' '\t']+
|
|
let word = [^' ' '\t' '\n']+
|
|
let letter = ['A'-'Z' 'a'-'z']
|
|
let integer = ['0'-'9']+
|
|
let real = '-'? (integer '.' integer | integer '.' | '.' integer) (['e' 'E'] ('+'|'-')? integer)?
|
|
|
|
|
|
rule read_all = parse
|
|
| eof { EOF }
|
|
| eol { EOL }
|
|
| white as w { SPACE w }
|
|
| integer as i { INTEGER (int_of_string i) }
|
|
| real as f { FLOAT (float_of_string f) }
|
|
| word as w { WORD w }
|
|
|
|
|
|
{
|
|
(* DEBUG
|
|
let () =
|
|
let ic = open_in "h2o.xyz" in
|
|
let lexbuf = Lexing.from_channel ic in
|
|
while true do
|
|
let s =
|
|
match read_all lexbuf with
|
|
| EOL -> "EOL"
|
|
| SPACE w -> "SPACE("^w^")"
|
|
| INTEGER i -> "INTEGER("^(string_of_int i)^")"
|
|
| FLOAT f -> "FLOAT("^(string_of_float f)^")"
|
|
| WORD w -> "WORD("^w^")"
|
|
| EOF -> "EOF"
|
|
in
|
|
print_endline s
|
|
done;
|
|
*)
|
|
}
|
|
#+end_src
|
|
|
|
*** Parser =xyz_parser.mly= parses nuclear coordinates in xyz format.
|
|
#+begin_src ocaml :tangle lib/xyz_parser.mly :export none :comments none
|
|
%{
|
|
open Common
|
|
|
|
let make_angstrom x y z =
|
|
Coordinate.(make_angstrom {
|
|
x ; y ; z
|
|
})
|
|
|
|
let output_of f x y z =
|
|
let a = make_angstrom x y z in
|
|
fun e ->
|
|
{
|
|
Xyz_ast.
|
|
element = f e;
|
|
coord = a ;
|
|
}
|
|
|
|
let output_of_string = output_of Element.of_string
|
|
let output_of_int = output_of Element.of_int
|
|
|
|
%}
|
|
|
|
%token EOL
|
|
%token <string> SPACE
|
|
%token <string> WORD
|
|
%token <int> INTEGER
|
|
%token <float> FLOAT
|
|
%token EOF
|
|
|
|
%start input
|
|
%type <Xyz_ast.xyz_file> input
|
|
|
|
%% /* Grammar rules and actions follow */
|
|
|
|
input:
|
|
| integer title atoms_xyz {
|
|
{
|
|
number_of_atoms = $1;
|
|
file_title = $2;
|
|
nuclei = $3;
|
|
}
|
|
}
|
|
;
|
|
|
|
|
|
integer:
|
|
| INTEGER EOL { $1 }
|
|
| INTEGER SPACE EOL { $1 }
|
|
| SPACE INTEGER EOL { $2 }
|
|
| SPACE INTEGER SPACE EOL { $2 }
|
|
;
|
|
|
|
title:
|
|
| title_list EOL { $1 }
|
|
;
|
|
|
|
text:
|
|
| WORD { $1 }
|
|
| SPACE { $1 }
|
|
| FLOAT { (string_of_float $1)}
|
|
| INTEGER { (string_of_int $1)}
|
|
;
|
|
|
|
title_list:
|
|
| { "" }
|
|
| title_list text { ($1 ^ $2) }
|
|
;
|
|
|
|
atoms_xyz:
|
|
| atoms_list EOL { List.rev $1 }
|
|
| atoms_list EOF { List.rev $1 }
|
|
;
|
|
|
|
atoms_list:
|
|
| { [] }
|
|
| atoms_list WORD SPACE FLOAT SPACE FLOAT SPACE FLOAT EOL { output_of_string $4 $6 $8 $2 :: $1 }
|
|
| atoms_list WORD SPACE FLOAT SPACE FLOAT SPACE FLOAT SPACE EOL { output_of_string $4 $6 $8 $2 :: $1 }
|
|
| atoms_list INTEGER SPACE FLOAT SPACE FLOAT SPACE FLOAT EOL { output_of_int $4 $6 $8 $2 :: $1 }
|
|
| atoms_list INTEGER SPACE FLOAT SPACE FLOAT SPACE FLOAT SPACE EOL { output_of_int $4 $6 $8 $2 :: $1 }
|
|
| atoms_list SPACE WORD SPACE FLOAT SPACE FLOAT SPACE FLOAT EOL { output_of_string $5 $7 $9 $3 :: $1 }
|
|
| atoms_list SPACE WORD SPACE FLOAT SPACE FLOAT SPACE FLOAT SPACE EOL { output_of_string $5 $7 $9 $3 :: $1 }
|
|
| atoms_list SPACE INTEGER SPACE FLOAT SPACE FLOAT SPACE FLOAT EOL { output_of_int $5 $7 $9 $3 :: $1 }
|
|
| atoms_list SPACE INTEGER SPACE FLOAT SPACE FLOAT SPACE FLOAT SPACE EOL { output_of_int $5 $7 $9 $3 :: $1 }
|
|
;
|
|
#+end_src
|
|
|
|
When an xyz file is read by =xyz_parser.mly=, it is converted into
|
|
an ~xyz_file~ data structure.
|
|
|
|
#+begin_src ocaml :tangle lib/xyz_ast.mli
|
|
open Common
|
|
|
|
type nucleus =
|
|
{
|
|
element: Element.t ;
|
|
coord : Coordinate.angstrom Coordinate.point;
|
|
}
|
|
|
|
type xyz_file =
|
|
{
|
|
number_of_atoms : int ;
|
|
file_title : string ;
|
|
nuclei : nucleus list ;
|
|
}
|
|
#+end_src
|
|
|
|
** Conversion
|
|
|
|
#+begin_src ocaml :tangle (eval mli)
|
|
val of_xyz_string : string -> t
|
|
val to_xyz_string : t -> string
|
|
val of_xyz_file : string -> t
|
|
|
|
val of_zmt_string : string -> t
|
|
val of_zmt_file : string -> t
|
|
|
|
val to_string : t -> string
|
|
|
|
val of_filename : string -> t
|
|
#+end_src
|
|
|
|
| ~of_xyz_string~ | Create from a string, in xyz format |
|
|
| ~of_xyz_file~ | Create from a file, in xyz format |
|
|
| ~of_zmt_string~ | Create from a string, in z-matrix format |
|
|
| ~of_zmt_file~ | Create from a file, in z-matrix format |
|
|
| ~to_string~ | Transform to a string, for printing |
|
|
| ~of_filename~ | Detects the type of file (xyz, z-matrix) and reads the file |
|
|
|
|
#+begin_src ocaml :tangle (eval ml) :exports none
|
|
let of_xyz_lexbuf lexbuf =
|
|
let data =
|
|
Xyz_parser.input Nuclei_lexer.read_all lexbuf
|
|
in
|
|
|
|
let len = List.length data.nuclei in
|
|
if len <> data.number_of_atoms then
|
|
Printf.sprintf "Error: expected %d atoms but %d read"
|
|
data.number_of_atoms len
|
|
|> failwith;
|
|
|
|
List.map (fun nucleus ->
|
|
nucleus.element, Coordinate.angstrom_to_bohr nucleus.coord
|
|
) data.nuclei
|
|
|> Array.of_list
|
|
|
|
|
|
let of_xyz_string input_string =
|
|
Lexing.from_string input_string
|
|
|> of_xyz_lexbuf
|
|
|
|
|
|
let of_xyz_file filename =
|
|
let ic = open_in filename in
|
|
let lexbuf =
|
|
Lexing.from_channel ic
|
|
in
|
|
let result =
|
|
of_xyz_lexbuf lexbuf
|
|
in
|
|
close_in ic;
|
|
result
|
|
|
|
|
|
let of_zmt_string buffer =
|
|
Zmatrix.of_string buffer
|
|
|> Zmatrix.to_xyz
|
|
|> Array.map (fun (e,x,y,z) ->
|
|
(e, Coordinate.(angstrom_to_bohr @@ make_angstrom { x ; y ; z} ))
|
|
)
|
|
|
|
|
|
let of_zmt_file filename =
|
|
let ic = open_in filename in
|
|
let rec aux accu =
|
|
try
|
|
let line = input_line ic in
|
|
aux (line::accu)
|
|
with End_of_file ->
|
|
close_in ic;
|
|
List.rev accu
|
|
|> String.concat "\n"
|
|
in aux []
|
|
|> of_zmt_string
|
|
|
|
|
|
let to_string atoms =
|
|
"
|
|
Nuclear Coordinates (Angstrom)
|
|
------------------------------
|
|
|
|
-----------------------------------------------------------------------
|
|
Center Atomic Element Coordinates (Angstroms)
|
|
Number X Y Z
|
|
-----------------------------------------------------------------------
|
|
" ^
|
|
(Array.mapi (fun i (e, coord) ->
|
|
let open Coordinate in
|
|
let coord =
|
|
bohr_to_angstrom coord
|
|
in
|
|
Printf.sprintf " %5d %5d %5s %12.6f %12.6f %12.6f"
|
|
(i+1) (Element.to_int e) (Element.to_string e)
|
|
coord.x coord.y coord.z
|
|
) atoms
|
|
|> Array.to_list
|
|
|> String.concat "\n" ) ^
|
|
"
|
|
-----------------------------------------------------------------------
|
|
|
|
"
|
|
|
|
|
|
let of_filename filename =
|
|
of_xyz_file filename
|
|
|
|
|
|
let to_xyz_string t =
|
|
[ string_of_int (Array.length t) ; "" ] @
|
|
( Array.map (fun (e, coord) ->
|
|
let open Coordinate in
|
|
let coord =
|
|
bohr_to_angstrom coord
|
|
in
|
|
Printf.sprintf " %5s %12.6f %12.6f %12.6f"
|
|
(Element.to_string e) coord.x coord.y coord.z
|
|
) t
|
|
|> Array.to_list )
|
|
|> String.concat "\n"
|
|
|
|
|
|
#+end_src
|
|
|
|
** Query
|
|
|
|
#+begin_src ocaml :tangle (eval mli)
|
|
val formula : t -> string
|
|
val repulsion : t -> float
|
|
val charge : t -> Charge.t
|
|
val small_core : t -> int
|
|
val large_core : t -> int
|
|
#+end_src
|
|
|
|
| ~formula~ | Returns the chemical formula |
|
|
| ~repulsion~ | Nuclear repulsion energy, in atomic units |
|
|
| ~charge~ | Sum of the charges of the nuclei |
|
|
| ~small_core~ | Number of core electrons in the small core model |
|
|
| ~large_core~ | Number of core electrons in the large core model |
|
|
|
|
#+begin_src ocaml :tangle (eval ml) :exports none
|
|
let formula t =
|
|
let dict = Hashtbl.create 67 in
|
|
Array.iter (fun (e,_) ->
|
|
let e = Element.to_string e in
|
|
let value =
|
|
try (Hashtbl.find dict e) + 1
|
|
with Not_found -> 1
|
|
in
|
|
Hashtbl.replace dict e value
|
|
) t;
|
|
Hashtbl.to_seq_keys dict
|
|
|> List.of_seq
|
|
|> List.sort String.compare
|
|
|> List.fold_left (fun accu key ->
|
|
let x = Hashtbl.find dict key in
|
|
accu ^ key ^ "_{" ^ (string_of_int x) ^ "}") ""
|
|
|
|
|
|
|
|
let repulsion nuclei =
|
|
let get_charge e =
|
|
Element.to_charge e
|
|
|> Charge.to_float
|
|
in
|
|
Array.fold_left ( fun accu (e1, coord1) ->
|
|
accu +.
|
|
Array.fold_left (fun accu (e2, coord2) ->
|
|
let r = Coordinate.(norm (coord1 |- coord2)) in
|
|
if r > 0. then
|
|
accu +. 0.5 *. (get_charge e2) *. (get_charge e1) /. r
|
|
else accu
|
|
) 0. nuclei
|
|
) 0. nuclei
|
|
|
|
|
|
let charge nuclei =
|
|
Array.fold_left (fun accu (e, _) -> accu + Charge.to_int (Element.to_charge e) )
|
|
0 nuclei
|
|
|> Charge.of_int
|
|
|
|
|
|
let small_core a =
|
|
Array.fold_left (fun accu (e,_) -> accu + (Element.small_core e)) 0 a
|
|
|
|
|
|
let large_core a =
|
|
Array.fold_left (fun accu (e,_) -> accu + (Element.large_core e)) 0 a
|
|
|
|
#+end_src
|
|
|
|
** TREXIO
|
|
|
|
*** Read
|
|
|
|
#+begin_src ocaml :tangle (eval mli)
|
|
val of_trexio : Trexio.trexio_file -> t
|
|
#+end_src
|
|
|
|
#+begin_src ocaml :tangle (eval ml) :exports none
|
|
let of_trexio f =
|
|
let num = Trexio.read_nucleus_num f in
|
|
let charge = Trexio.read_nucleus_charge f
|
|
|> Array.map Charge.of_float in
|
|
let coord = Trexio.read_nucleus_coord f in
|
|
Array.init num (fun i ->
|
|
let coord = Coordinate.{ x = coord.(3*i) ;
|
|
y = coord.(3*i+1) ;
|
|
z = coord.(3*i+2) } in
|
|
(Element.of_charge charge.(i), Coordinate.make coord)
|
|
)
|
|
#+end_src
|
|
|
|
*** Write
|
|
|
|
#+begin_src ocaml :tangle (eval mli)
|
|
val to_trexio : Trexio.trexio_file -> t -> unit
|
|
#+end_src
|
|
|
|
#+begin_src ocaml :tangle (eval ml) :exports none
|
|
let to_trexio f t =
|
|
let num = Array.length t in
|
|
Trexio.write_nucleus_num f num;
|
|
|
|
Array.map (fun (e, _) -> Element.to_charge e |> Charge.to_float) t
|
|
|> Trexio.write_nucleus_charge f;
|
|
|
|
Array.map (fun (e, _) -> Element.to_string e) t
|
|
|> Trexio.write_nucleus_label f;
|
|
|
|
let coord = Array.init (num*3) (fun _ -> 0.) in
|
|
Array.iteri (fun i (_, xyz) ->
|
|
coord.(3*i) <- Coordinate.(get X xyz) ;
|
|
coord.(3*i+1) <- Coordinate.(get Y xyz) ;
|
|
coord.(3*i+2) <- Coordinate.(get Z xyz) ) t;
|
|
Trexio.write_nucleus_coord f coord;
|
|
|
|
repulsion t
|
|
|> Trexio.write_nucleus_repulsion f
|
|
|
|
#+end_src
|
|
|
|
** Printers
|
|
|
|
#+begin_src ocaml :tangle (eval mli)
|
|
val pp : Format.formatter -> t -> unit
|
|
#+end_src
|
|
|
|
#+begin_src ocaml :tangle (eval ml) :exports none
|
|
let pp ppf t =
|
|
Format.fprintf ppf "@[%s@]" (to_string t)
|
|
#+end_src
|
|
|
|
** Tests
|
|
|
|
#+begin_src ocaml :tangle (eval test-ml) :exports none
|
|
open Common
|
|
open Particles
|
|
open Alcotest
|
|
|
|
let wd = Common.Qcaml.root ^ Filename.dir_sep ^ "test"
|
|
|
|
let test_xyz molecule length repulsion charge core =
|
|
let xyz = Nuclei.of_xyz_file (wd^Filename.dir_sep^molecule^".xyz") in
|
|
check int "length" length (Array.length xyz);
|
|
check (float 1.e-4) "repulsion" repulsion (Nuclei.repulsion xyz);
|
|
check int "charge" charge (Charge.to_int @@ Nuclei.charge xyz);
|
|
check int "small_core" core (Nuclei.small_core xyz);
|
|
()
|
|
|
|
let tests = [
|
|
"caffeine", `Quick, (fun () -> test_xyz "caffeine" 24 917.0684 102 28);
|
|
"water", `Quick, (fun () -> test_xyz "water" 3 9.19497 10 2);
|
|
]
|
|
#+end_src
|
|
|