10
1
mirror of https://gitlab.com/scemama/QCaml.git synced 2024-06-17 10:45:20 +02:00
QCaml/particles/nuclei.org

472 lines
12 KiB
Org Mode
Raw Normal View History

2020-12-29 18:06:54 +01:00
#+begin_src elisp tangle: no :results none :exports none
(setq pwd (file-name-directory buffer-file-name))
(setq name (file-name-nondirectory (substring buffer-file-name 0 -4)))
(setq lib (concat pwd "lib/"))
(setq testdir (concat pwd "test/"))
(setq mli (concat lib name ".mli"))
(setq ml (concat lib name ".ml"))
(setq test-ml (concat testdir name ".ml"))
(org-babel-tangle)
2023-04-24 19:01:42 +02:00
#+end_src
2020-12-29 18:06:54 +01:00
* Nuclei
:PROPERTIES:
:header-args: :noweb yes :comments both
:END:
** Type
2021-01-20 23:55:34 +01:00
<<<~Nuclei.t~>>>
2020-12-29 18:06:54 +01:00
#+NAME: types
#+begin_src ocaml :tangle (eval mli)
open Common
type t = (Element.t * Coordinate.t) array
#+end_src
#+begin_src ocaml :tangle (eval ml) :exports none
2021-01-01 11:46:11 +01:00
open Common
type t = (Element.t * Coordinate.t) array
2020-12-29 18:06:54 +01:00
open Xyz_ast
#+end_src
** xyz file lexer/parser
*** Lexer
2023-04-24 19:01:42 +02:00
2020-12-29 18:06:54 +01:00
=nuclei_lexer.mll= contains the description of the lexemes used in
an xyz file.
2023-04-24 19:01:42 +02:00
2020-12-29 18:06:54 +01:00
#+begin_src ocaml :tangle lib/nuclei_lexer.mll :export none
{
open Xyz_parser
}
let eol = ['\n']
let white = [' ' '\t']+
let word = [^' ' '\t' '\n']+
let letter = ['A'-'Z' 'a'-'z']
let integer = ['0'-'9']+
let real = '-'? (integer '.' integer | integer '.' | '.' integer) (['e' 'E'] ('+'|'-')? integer)?
rule read_all = parse
| eof { EOF }
| eol { EOL }
| white as w { SPACE w }
| integer as i { INTEGER (int_of_string i) }
| real as f { FLOAT (float_of_string f) }
| word as w { WORD w }
{
(* DEBUG
let () =
let ic = open_in "h2o.xyz" in
let lexbuf = Lexing.from_channel ic in
while true do
let s =
match read_all lexbuf with
| EOL -> "EOL"
| SPACE w -> "SPACE("^w^")"
| INTEGER i -> "INTEGER("^(string_of_int i)^")"
| FLOAT f -> "FLOAT("^(string_of_float f)^")"
| WORD w -> "WORD("^w^")"
| EOF -> "EOF"
in
print_endline s
done;
*)
}
2023-04-24 19:01:42 +02:00
#+end_src
2020-12-29 18:06:54 +01:00
*** Parser
2023-04-24 19:01:42 +02:00
=xyz_parser.mly= parses nuclear coordinates in xyz format.
2020-12-29 18:06:54 +01:00
#+begin_src ocaml :tangle lib/xyz_parser.mly :export none :comments none
%{
open Common
let make_angstrom x y z =
Coordinate.(make_angstrom {
x ; y ; z
})
let output_of f x y z =
let a = make_angstrom x y z in
fun e ->
{
Xyz_ast.
element = f e;
coord = a ;
}
let output_of_string = output_of Element.of_string
let output_of_int = output_of Element.of_int
%}
%token EOL
%token <string> SPACE
%token <string> WORD
%token <int> INTEGER
%token <float> FLOAT
%token EOF
%start input
%type <Xyz_ast.xyz_file> input
%% /* Grammar rules and actions follow */
input:
| integer title atoms_xyz {
{
number_of_atoms = $1;
file_title = $2;
nuclei = $3;
}
}
;
integer:
| INTEGER EOL { $1 }
| INTEGER SPACE EOL { $1 }
| SPACE INTEGER EOL { $2 }
| SPACE INTEGER SPACE EOL { $2 }
;
title:
| title_list EOL { $1 }
;
text:
| WORD { $1 }
| SPACE { $1 }
| FLOAT { (string_of_float $1)}
| INTEGER { (string_of_int $1)}
;
title_list:
| { "" }
| title_list text { ($1 ^ $2) }
;
atoms_xyz:
| atoms_list EOL { List.rev $1 }
| atoms_list EOF { List.rev $1 }
;
atoms_list:
| { [] }
| atoms_list WORD SPACE FLOAT SPACE FLOAT SPACE FLOAT EOL { output_of_string $4 $6 $8 $2 :: $1 }
| atoms_list WORD SPACE FLOAT SPACE FLOAT SPACE FLOAT SPACE EOL { output_of_string $4 $6 $8 $2 :: $1 }
| atoms_list INTEGER SPACE FLOAT SPACE FLOAT SPACE FLOAT EOL { output_of_int $4 $6 $8 $2 :: $1 }
| atoms_list INTEGER SPACE FLOAT SPACE FLOAT SPACE FLOAT SPACE EOL { output_of_int $4 $6 $8 $2 :: $1 }
| atoms_list SPACE WORD SPACE FLOAT SPACE FLOAT SPACE FLOAT EOL { output_of_string $5 $7 $9 $3 :: $1 }
| atoms_list SPACE WORD SPACE FLOAT SPACE FLOAT SPACE FLOAT SPACE EOL { output_of_string $5 $7 $9 $3 :: $1 }
| atoms_list SPACE INTEGER SPACE FLOAT SPACE FLOAT SPACE FLOAT EOL { output_of_int $5 $7 $9 $3 :: $1 }
| atoms_list SPACE INTEGER SPACE FLOAT SPACE FLOAT SPACE FLOAT SPACE EOL { output_of_int $5 $7 $9 $3 :: $1 }
;
2023-04-24 19:01:42 +02:00
#+end_src
2020-12-29 18:06:54 +01:00
When an xyz file is read by =xyz_parser.mly=, it is converted into
an ~xyz_file~ data structure.
2023-04-24 19:01:42 +02:00
#+begin_src ocaml :tangle lib/xyz_ast.mli
2020-12-29 18:06:54 +01:00
open Common
type nucleus =
{
element: Element.t ;
coord : Coordinate.angstrom Coordinate.point;
}
type xyz_file =
{
number_of_atoms : int ;
file_title : string ;
nuclei : nucleus list ;
}
#+end_src
** Conversion
#+begin_src ocaml :tangle (eval mli)
val of_xyz_string : string -> t
val to_xyz_string : t -> string
val of_xyz_file : string -> t
val of_zmt_string : string -> t
val of_zmt_file : string -> t
val to_string : t -> string
val of_filename : string -> t
#+end_src
| ~of_xyz_string~ | Create from a string, in xyz format |
| ~of_xyz_file~ | Create from a file, in xyz format |
| ~of_zmt_string~ | Create from a string, in z-matrix format |
| ~of_zmt_file~ | Create from a file, in z-matrix format |
| ~to_string~ | Transform to a string, for printing |
| ~of_filename~ | Detects the type of file (xyz, z-matrix) and reads the file |
#+begin_src ocaml :tangle (eval ml) :exports none
let of_xyz_lexbuf lexbuf =
2023-04-24 19:01:42 +02:00
let data =
2020-12-29 18:06:54 +01:00
Xyz_parser.input Nuclei_lexer.read_all lexbuf
in
let len = List.length data.nuclei in
if len <> data.number_of_atoms then
2023-04-24 19:01:42 +02:00
Printf.sprintf "Error: expected %d atoms but %d read"
2020-12-29 18:06:54 +01:00
data.number_of_atoms len
|> failwith;
List.map (fun nucleus ->
nucleus.element, Coordinate.angstrom_to_bohr nucleus.coord
) data.nuclei
|> Array.of_list
2023-04-24 19:01:42 +02:00
let of_xyz_string input_string =
2020-12-29 18:06:54 +01:00
Lexing.from_string input_string
|> of_xyz_lexbuf
let of_xyz_file filename =
let ic = open_in filename in
let lexbuf =
Lexing.from_channel ic
in
2023-04-24 19:01:42 +02:00
let result =
2020-12-29 18:06:54 +01:00
of_xyz_lexbuf lexbuf
in
close_in ic;
result
let of_zmt_string buffer =
Zmatrix.of_string buffer
|> Zmatrix.to_xyz
|> Array.map (fun (e,x,y,z) ->
(e, Coordinate.(angstrom_to_bohr @@ make_angstrom { x ; y ; z} ))
)
let of_zmt_file filename =
let ic = open_in filename in
let rec aux accu =
try
let line = input_line ic in
aux (line::accu)
with End_of_file ->
close_in ic;
List.rev accu
|> String.concat "\n"
in aux []
|> of_zmt_string
let to_string atoms =
"
Nuclear Coordinates (Angstrom)
------------------------------
-----------------------------------------------------------------------
Center Atomic Element Coordinates (Angstroms)
Number X Y Z
-----------------------------------------------------------------------
" ^
(Array.mapi (fun i (e, coord) ->
let open Coordinate in
let coord =
bohr_to_angstrom coord
in
Printf.sprintf " %5d %5d %5s %12.6f %12.6f %12.6f"
2023-04-24 19:01:42 +02:00
(i+1) (Element.to_int e) (Element.to_string e)
2020-12-29 18:06:54 +01:00
coord.x coord.y coord.z
) atoms
|> Array.to_list
|> String.concat "\n" ) ^
"
-----------------------------------------------------------------------
"
let of_filename filename =
of_xyz_file filename
let to_xyz_string t =
[ string_of_int (Array.length t) ; "" ] @
( Array.map (fun (e, coord) ->
let open Coordinate in
let coord =
bohr_to_angstrom coord
in
Printf.sprintf " %5s %12.6f %12.6f %12.6f"
(Element.to_string e) coord.x coord.y coord.z
2023-04-24 19:01:42 +02:00
) t
2020-12-29 18:06:54 +01:00
|> Array.to_list )
|> String.concat "\n"
2023-04-24 14:13:36 +02:00
2020-12-29 18:06:54 +01:00
#+end_src
2023-04-24 14:13:36 +02:00
** Query
2020-12-29 18:06:54 +01:00
#+begin_src ocaml :tangle (eval mli)
2021-01-01 11:46:11 +01:00
val formula : t -> string
2020-12-29 18:06:54 +01:00
val repulsion : t -> float
val charge : t -> Charge.t
val small_core : t -> int
val large_core : t -> int
#+end_src
2021-01-01 11:46:11 +01:00
| ~formula~ | Returns the chemical formula |
2020-12-29 18:06:54 +01:00
| ~repulsion~ | Nuclear repulsion energy, in atomic units |
| ~charge~ | Sum of the charges of the nuclei |
| ~small_core~ | Number of core electrons in the small core model |
| ~large_core~ | Number of core electrons in the large core model |
2023-04-24 19:01:42 +02:00
#+begin_src ocaml :tangle (eval ml) :exports none
2021-01-01 11:46:11 +01:00
let formula t =
let dict = Hashtbl.create 67 in
Array.iter (fun (e,_) ->
let e = Element.to_string e in
let value =
try (Hashtbl.find dict e) + 1
with Not_found -> 1
in
Hashtbl.replace dict e value
) t;
Hashtbl.to_seq_keys dict
|> List.of_seq
|> List.sort String.compare
|> List.fold_left (fun accu key ->
let x = Hashtbl.find dict key in
accu ^ key ^ "_{" ^ (string_of_int x) ^ "}") ""
2023-04-24 19:01:42 +02:00
2020-12-29 18:06:54 +01:00
let repulsion nuclei =
2023-04-24 19:01:42 +02:00
let get_charge e =
2020-12-29 18:06:54 +01:00
Element.to_charge e
|> Charge.to_float
in
2023-04-24 19:01:42 +02:00
Array.fold_left ( fun accu (e1, coord1) ->
accu +.
2020-12-29 18:06:54 +01:00
Array.fold_left (fun accu (e2, coord2) ->
let r = Coordinate.(norm (coord1 |- coord2)) in
if r > 0. then
accu +. 0.5 *. (get_charge e2) *. (get_charge e1) /. r
else accu
) 0. nuclei
) 0. nuclei
2023-04-24 19:01:42 +02:00
let charge nuclei =
2020-12-29 18:06:54 +01:00
Array.fold_left (fun accu (e, _) -> accu + Charge.to_int (Element.to_charge e) )
2023-04-24 19:01:42 +02:00
0 nuclei
2020-12-29 18:06:54 +01:00
|> Charge.of_int
2023-04-24 19:01:42 +02:00
let small_core a =
2020-12-29 18:06:54 +01:00
Array.fold_left (fun accu (e,_) -> accu + (Element.small_core e)) 0 a
2023-04-24 19:01:42 +02:00
let large_core a =
2020-12-29 18:06:54 +01:00
Array.fold_left (fun accu (e,_) -> accu + (Element.large_core e)) 0 a
2023-04-24 14:13:36 +02:00
2020-12-29 18:06:54 +01:00
#+end_src
2023-04-24 19:01:42 +02:00
** TREXIO
*** Read
#+begin_src ocaml :tangle (eval mli)
val of_trexio : Trexio.trexio_file -> t
#+end_src
#+begin_src ocaml :tangle (eval ml) :exports none
let of_trexio f =
let num = Trexio.read_nucleus_num f in
let charge = Trexio.read_nucleus_charge f
|> Array.map Charge.of_float in
let coord = Trexio.read_nucleus_coord f in
Array.init num (fun i ->
let coord = Coordinate.{ x = coord.(3*i) ;
y = coord.(3*i+1) ;
z = coord.(3*i+2) } in
(Element.of_charge charge.(i), Coordinate.make coord)
)
#+end_src
*** Write
#+begin_src ocaml :tangle (eval mli)
val to_trexio : Trexio.trexio_file -> t -> unit
#+end_src
#+begin_src ocaml :tangle (eval ml) :exports none
let to_trexio f t =
let num = Array.length t in
Trexio.write_nucleus_num f num;
Array.map (fun (e, _) -> Element.to_charge e |> Charge.to_float) t
|> Trexio.write_nucleus_charge f;
Array.map (fun (e, _) -> Element.to_string e) t
|> Trexio.write_nucleus_label f;
let coord = Array.init (num*3) (fun _ -> 0.) in
Array.iteri (fun i (_, xyz) ->
coord.(3*i) <- Coordinate.(get X xyz) ;
coord.(3*i+1) <- Coordinate.(get Y xyz) ;
coord.(3*i+2) <- Coordinate.(get Z xyz) ) t;
Trexio.write_nucleus_coord f coord;
repulsion t
|> Trexio.write_nucleus_repulsion f
#+end_src
2020-12-29 18:06:54 +01:00
** Printers
#+begin_src ocaml :tangle (eval mli)
val pp : Format.formatter -> t -> unit
#+end_src
#+begin_src ocaml :tangle (eval ml) :exports none
let pp ppf t =
Format.fprintf ppf "@[%s@]" (to_string t)
#+end_src
** Tests
#+begin_src ocaml :tangle (eval test-ml) :exports none
open Common
open Particles
open Alcotest
let wd = Common.Qcaml.root ^ Filename.dir_sep ^ "test"
let test_xyz molecule length repulsion charge core =
let xyz = Nuclei.of_xyz_file (wd^Filename.dir_sep^molecule^".xyz") in
check int "length" length (Array.length xyz);
check (float 1.e-4) "repulsion" repulsion (Nuclei.repulsion xyz);
check int "charge" charge (Charge.to_int @@ Nuclei.charge xyz);
check int "small_core" core (Nuclei.small_core xyz);
()
let tests = [
"caffeine", `Quick, (fun () -> test_xyz "caffeine" 24 917.0684 102 28);
"water", `Quick, (fun () -> test_xyz "water" 3 9.19497 10 2);
]
#+end_src