diff --git a/bin/qp_test b/bin/qp_test index 288b7291..d3a188fb 100755 --- a/bin/qp_test +++ b/bin/qp_test @@ -46,7 +46,7 @@ def main(arguments): append_bats(dirname, filenames) else: for (dirname, _, filenames) in os.walk(os.getcwd(), followlinks=False): - if "IRPF90_temp" not in dirname: + if "IRPF90_temp" not in dirname and "external" not in dirname: append_bats(dirname, filenames) l_bats = [y for _, y in sorted(l_bats)] @@ -67,6 +67,7 @@ def main(arguments): os.system(test+" python3 bats_to_sh.py "+bats_file+ "| bash") else: +# print(" ".join(["bats", "--verbose-run", "--trace", bats_file])) subprocess.check_call(["bats", "--verbose-run", "--trace", bats_file], env=os.environ) diff --git a/etc/qp.rc b/etc/qp.rc index c56661c7..d339f475 100644 --- a/etc/qp.rc +++ b/etc/qp.rc @@ -110,6 +110,11 @@ function qp() unset COMMAND ;; + "test") + shift + qp_test $@ + ;; + *) which "qp_$1" &> /dev/null if [[ $? -eq 0 ]] ; then @@ -183,7 +188,7 @@ _qp_Complete() ;; esac;; set_file) - COMPREPLY=( $(compgen -W "$(for i in * ; do [[ -f ${i}/ezfio/.version ]] && echo $i ; done)" -- ${cur} ) ) + COMPREPLY=( $(compgen -W "$(for i in $(find . -name ezfio | sed 's/ezfio$/.version/') ; do [[ -f $i ]] && echo ${i%/.version} ; done)" -- ${cur} ) ) return 0 ;; plugins) @@ -215,10 +220,15 @@ _qp_Complete() return 0 ;; esac;; + test) + COMPREPLY=( $(compgen -W "-v -a " -- $cur ) ) + return 0 + ;; *) COMPREPLY=( $(compgen -W 'plugins set_file \ unset_file man \ create_ezfio \ + test \ convert_output_to_ezfio \ -h update' -- $cur ) ) diff --git a/src/utils_trust_region/TANGLE_org_mode.sh b/src/ccsd/org/TANGLE_org_mode.sh similarity index 100% rename from src/utils_trust_region/TANGLE_org_mode.sh rename to src/ccsd/org/TANGLE_org_mode.sh diff --git a/src/mo_localization/84.mo_localization.bats b/src/mo_localization/84.mo_localization.bats new file mode 100644 index 00000000..b34c0bd5 --- /dev/null +++ b/src/mo_localization/84.mo_localization.bats @@ -0,0 +1,97 @@ +#!/usr/bin/env bats + +source $QP_ROOT/tests/bats/common.bats.sh +source $QP_ROOT/quantum_package.rc + +zero () { + if [ -z "$1" ]; then echo 0.0; else echo $1; fi +} + +function run() { + thresh1=1e-10 + thresh2=1e-12 + thresh3=1e-4 + test_exe scf || skip + qp set_file $1 + qp edit --check + qp reset -d + qp set_frozen_core + qp set localization localization_method boys + file="$(echo $1 | sed 's/.ezfio//g')" + energy="$(cat $1/hartree_fock/energy)" + fb_err1="$(qp run debug_gradient_loc | grep 'Max error' | tail -n 1 | awk '{print $3}')" + fb_err2="$(qp run debug_hessian_loc | grep 'Max error' | tail -n 1 | awk '{print $3}')" + qp run localization > $file.loc.out + fb_energy="$(qp run print_energy | grep -A 1 'Nuclear repulsion energy' | tail -n 1 )" + fb_c="$(cat $file.loc.out | grep 'Criterion:Core' | tail -n 1 | awk '{print $3}')i" + fb_i="$(cat $file.loc.out | grep 'Criterion:Inactive' | tail -n 1 | awk '{print $3}')" + fb_a="$(cat $file.loc.out | grep 'Criterion:Active' | tail -n 1 | awk '{print $3}')" + fb_v="$(cat $file.loc.out | grep 'Criterion:Virtual' | tail -n 1 | awk '{print $3}')" + qp reset -a + qp run scf + qp set_frozen_core + qp set localization localization_method pipek + pm_err1="$(qp run debug_gradient_loc | grep 'Max error' | tail -n 1 | awk '{print $3}')" + pm_err2="$(qp run debug_hessian_loc | grep 'Max error' | tail -n 1 | awk '{print $3}')" + qp run localization > $file.loc.out + pm_c="$(cat $file.loc.out | grep 'Criterion:Core' | tail -n 1 | awk '{print $3}')i" + pm_i="$(cat $file.loc.out | grep 'Criterion:Inactive' | tail -n 1 | awk '{print $3}')" + pm_a="$(cat $file.loc.out | grep 'Criterion:Active' | tail -n 1 | awk '{print $3}')" + pm_v="$(cat $file.loc.out | grep 'Criterion:Virtual' | tail -n 1 | awk '{print $3}')" + pm_energy="$(qp run print_energy | grep -A 1 'Nuclear repulsion energy' | tail -n 1 )" + qp set localization localization_method boys + qp reset -a + qp run scf + qp set_frozen_core + eq $energy $fb_energy $thresh1 + eq $fb_err1 0.0 $thresh2 + eq $fb_err2 0.0 $thresh2 + eq $energy $pm_energy $thresh1 + eq $pm_err1 0.0 $thresh2 + eq $pm_err2 0.0 $thresh2 + fb_c=$(zero $fb_c) + fb_i=$(zero $fb_i) + fb_a=$(zero $fb_a) + fb_v=$(zero $fb_v) + pm_c=$(zero $pm_c) + pm_i=$(zero $pm_i) + pm_a=$(zero $pm_a) + pm_v=$(zero $pm_v) + eq $fb_c $2 $thresh3 + eq $fb_i $3 $thresh3 + eq $fb_a $4 $thresh3 + eq $fb_v $5 $thresh3 + eq $pm_c $6 $thresh3 + eq $pm_i $7 $thresh3 + eq $pm_a $8 $thresh3 + eq $pm_v $9 $thresh3 +} + +@test "b2_stretched" { +run b2_stretched.ezfio -32.1357551678876 -47.0041982094667 0.0 -223.470015856259 -1.99990778964451 -2.51376723927071 0.0 -12.8490602539275 +} + +@test "clo" { +run clo.ezfio -44.1624001765291 -32.4386660941387 0.0 -103.666309287187 -5.99985418946811 -5.46871580225222 0.0 -20.2480064922275 +} + +@test "clf" { +run clf.ezfio -47.5143398826967 -35.7206886315104 0.0 -107.043029033468 -5.99994222062230 -6.63916513458470 0.0 -19.7035159913484 +} + +@test "h2o2" { +run h2o2.ezfio -7.76848143170524 -30.9694344369829 0.0 -175.898343829453 -1.99990497554575 -5.62980322957485 0.0 -33.5699813186666 +} + +@test "h2o" { +run h2o.ezfio 0.0 -2.52317434969591 0.0 -45.3136377925359 0.0 -3.01248365356981 0.0 -22.4470831240924 +} + +@test "h3coh" { +run h3coh.ezfio -3.66763692804590 -24.0463089480870 0.0 -111.485948435075 -1.99714061342078 -4.89242181322988 0.0 -23.6405412057679 +} + +@test "n2h4" { +run n2h4.ezfio -7.46608163002070 -35.7632174051822 0.0 -305.913449004632 -1.99989326143356 -4.62496615892268 0.0 -51.5171904685553 +} + diff --git a/src/mo_localization/EZFIO.cfg b/src/mo_localization/EZFIO.cfg new file mode 100644 index 00000000..d1b844a5 --- /dev/null +++ b/src/mo_localization/EZFIO.cfg @@ -0,0 +1,54 @@ +[localization_method] +type: character*(32) +doc: Method for the orbital localization. boys: Foster-Boys, pipek: Pipek-Mezey. +interface: ezfio,provider,ocaml +default: boys + +[localization_max_nb_iter] +type: integer +doc: Maximal number of iterations for the orbital localization. +interface: ezfio,provider,ocaml +default: 1000 + +[localization_use_hessian] +type: logical +doc: If true, it uses the trust region algorithm with the gradient and the diagonal of the hessian. Else it computes the rotation between each pair of MOs that should be applied to maximize/minimize the localization criterion. The last option is not easy to converge. +interface: ezfio,provider,ocaml +default: true + +[auto_mo_class] +type: logical +doc: If true, set automatically the classes. +interface: ezfio,provider,ocaml +default: true + +[thresh_loc_max_elem_grad] +type: double precision +doc: Threshold for the convergence, the localization exits when the largest element in the gradient is smaller than thresh_localization_max_elem_grad. +interface: ezfio,provider,ocaml +default: 1.e-6 + +[kick_in_mos] +type: logical +doc: If True, it applies a rotation of an angle angle_pre_rot between the MOs of a same mo_class before the localization. +interface: ezfio,provider,ocaml +default: true + +[angle_pre_rot] +type: double precision +doc: To define the angle for the rotation of the MOs before the localization (in rad). +interface: ezfio,provider,ocaml +default: 0.1 + +[sort_mos_by_e] +type: logical +doc: If True, the MOs are sorted using the diagonal elements of the Fock matrix. +interface: ezfio,provider,ocaml +default: false + +[debug_hf] +type: logical +doc: If True, prints the HF energy before/after the different steps of the localization. Only for debugging. +interface: ezfio,provider,ocaml +default: false + diff --git a/src/mo_localization/NEED b/src/mo_localization/NEED new file mode 100644 index 00000000..b438f39d --- /dev/null +++ b/src/mo_localization/NEED @@ -0,0 +1,3 @@ +hartree_fock +utils_trust_region +determinants diff --git a/src/mo_localization/README.md b/src/mo_localization/README.md new file mode 100644 index 00000000..c28a5ee1 --- /dev/null +++ b/src/mo_localization/README.md @@ -0,0 +1,113 @@ +# Orbital localisation +To localize the MOs: +``` +qp run localization +``` +By default, the different otbital classes are automatically set by splitting +the orbitales in the following classes: +- Core -> Core +- Active, doubly occupied -> Inactive +- Active, singly occupied -> Active +- Active, empty -> Virtual +- Deleted -> Deleted +The orbitals will be localized among each class, excpect the deleted ones. +If you want to choose another splitting, you can set +``` +qp set mo_localization auto_mo_class false +``` +and define the classes with +``` +qp set_mo_class -c [] -a [] -v [] -i [] -d [] +``` +for more information +``` +qp set_mo_class -q +``` +We don't care about the name of the +mo classes. The algorithm just localizes all the MOs of +a given class between them, for all the classes, except the deleted MOs. +If you are using the last option don't forget to reset the initial mo classes +after the localization. + +Before the localization, a kick is done for each mo class +(except the deleted ones) to break the MOs. This is done by +doing a given rotation between the MOs. +This feature can be removed by setting: +``` +qp set localization kick_in_mos false +``` +and the default angle for the rotation can be changed with: +``` +qp set localization angle_pre_rot 1e-3 # or something else +``` + +After the localization, the MOs of each class (except the deleted ones) +can be sorted between them using the diagonal elements of +the fock matrix with: +``` +qp set localization sort_mos_by_e true +``` + +You can check the Hartree-Fock energy before/during/after the localization +by putting (only for debugging): +``` +qp set localization debug_hf true +``` + +## Foster-Boys & Pipek-Mezey +Foster-Boys: +``` +qp set localization localization_method boys +``` + +Pipek-Mezey: +``` +qp set localization localization_method pipek +``` + +# Break the spatial symmetry of the MOs +This program work exactly as the localization. +To break the spatial symmetry of the MOs: +``` +qp run break_spatial_sym +``` +The default angle for the rotations is too big for this kind of +application, a value between 1e-3 and 1e-6 should break the spatial +symmetry with just a small change in the energy: +``` +qp set localization angle_pre_rot 1e-3 +``` + +# With or without hessian + trust region +With hessian + trust region +``` +qp set localization localisation_use_hessian true +``` +It uses the trust region algorithm with the diagonal of the hessian of the +localization criterion with respect to the MO rotations. + +Without the hessian and the trust region +``` +qp set localization localisation_use_hessian false +``` +By doing so it does not require to store the hessian but the +convergence is not easy, in particular for virtual MOs. +It seems that it not possible to converge with Pipek-Mezey +localization with this approach. + +# Parameters +Some other parameters are available for the localization (qp edit for more details). + +# Tests +``` +qp test +``` + +# Org files +The org files are stored in the directory org in order to avoid overwriting on user changes. +The org files can be modified, to export the change to the source code, run +``` +./TANGLE_org_mode.sh +mv *.irp.f ../. +``` + diff --git a/src/mo_localization/break_spatial_sym.irp.f b/src/mo_localization/break_spatial_sym.irp.f new file mode 100644 index 00000000..2048aca6 --- /dev/null +++ b/src/mo_localization/break_spatial_sym.irp.f @@ -0,0 +1,27 @@ +! ! A small program to break the spatial symmetry of the MOs. + +! ! You have to defined your MO classes or set security_mo_class to false +! ! with: +! ! qp set orbital_optimization security_mo_class false + +! ! The default angle for the rotations is too big for this kind of +! ! application, a value between 1e-3 and 1e-6 should break the spatial +! ! symmetry with just a small change in the energy. + + +program break_spatial_sym + + !BEGIN_DOC + ! Break the symmetry of the MOs with a rotation + !END_DOC + + implicit none + + kick_in_mos = .True. + TOUCH kick_in_mos + + call set_classes_loc + call apply_pre_rotation + call unset_classes_loc + +end diff --git a/src/mo_localization/debug_gradient_loc.irp.f b/src/mo_localization/debug_gradient_loc.irp.f new file mode 100644 index 00000000..d935e782 --- /dev/null +++ b/src/mo_localization/debug_gradient_loc.irp.f @@ -0,0 +1,65 @@ +program debug_gradient_loc + + !BEGIN_DOC + ! Check if the gradient is correct + !END_DOC + + implicit none + + integer :: list_size, n + integer, allocatable :: list(:) + double precision, allocatable :: v_grad(:), v_grad2(:) + double precision :: norm, max_elem, threshold, max_error + integer :: i, nb_error + + threshold = 1d-12 + + list_size = dim_list_act_orb + + allocate(list(list_size)) + + list = list_act + + n = list_size*(list_size-1)/2 + + allocate(v_grad(n),v_grad2(n)) + + if (localization_method == 'boys') then + print*,'Foster-Boys' + call gradient_FB(n,list_size,list,v_grad,max_elem,norm) + call gradient_FB_omp(n,list_size,list,v_grad2,max_elem,norm) + elseif (localization_method == 'pipek') then + print*,'Pipek-Mezey' + call gradient_PM(n,list_size,list,v_grad,max_elem,norm) + call gradient_PM(n,list_size,list,v_grad2,max_elem,norm) + else + print*,'Unknown localization_method, please select boys or pipek' + call abort + endif + + do i = 1, n + print*,i,v_grad(i) + enddo + + v_grad = v_grad - v_grad2 + + nb_error = 0 + max_elem = 0d0 + + do i = 1, n + if (dabs(v_grad(i)) > threshold) then + print*,v_grad(i) + nb_error = nb_error + 1 + if (dabs(v_grad(i)) > max_elem) then + max_elem = v_grad(i) + endif + endif + enddo + + print*,'Threshold error', threshold + print*, 'Nb error', nb_error + print*,'Max error', max_elem + + deallocate(v_grad,v_grad2) + +end diff --git a/src/mo_localization/debug_hessian_loc.irp.f b/src/mo_localization/debug_hessian_loc.irp.f new file mode 100644 index 00000000..3ee4f0fa --- /dev/null +++ b/src/mo_localization/debug_hessian_loc.irp.f @@ -0,0 +1,65 @@ +program debug_hessian_loc + + !BEGIN_DOC + ! Check if the hessian is correct + !END_DOC + + implicit none + + integer :: list_size, n + integer, allocatable :: list(:) + double precision, allocatable :: H(:), H2(:) + double precision :: threshold, max_error, max_elem + integer :: i, nb_error + + threshold = 1d-12 + + list_size = dim_list_act_orb + + allocate(list(list_size)) + + list = list_act + + n = list_size*(list_size-1)/2 + + allocate(H(n),H2(n)) + + if (localization_method == 'boys') then + print*,'Foster-Boys' + call hessian_FB(n,list_size,list,H) + call hessian_FB_omp(n,list_size,list,H2) + elseif(localization_method == 'pipek') then + print*,'Pipek-Mezey' + call hessian_PM(n,list_size,list,H) + call hessian_PM(n,list_size,list,H2) + else + print*,'Unknown localization_method, please select boys or pipek' + call abort + endif + + do i = 1, n + print*,i,H(i) + enddo + + H = H - H2 + + nb_error = 0 + max_elem = 0d0 + + do i = 1, n + if (dabs(H(i)) > threshold) then + print*,H(i) + nb_error = nb_error + 1 + if (dabs(H(i)) > max_elem) then + max_elem = H(i) + endif + endif + enddo + + print*,'Threshold error', threshold + print*, 'Nb error', nb_error + print*,'Max error', max_elem + + deallocate(H,H2) + +end diff --git a/src/mo_localization/kick_the_mos.irp.f b/src/mo_localization/kick_the_mos.irp.f new file mode 100644 index 00000000..b6c77c9e --- /dev/null +++ b/src/mo_localization/kick_the_mos.irp.f @@ -0,0 +1,16 @@ +program kick_the_mos + + !BEGIN_DOC + ! To do a small rotation of the MOs + !END_DOC + + implicit none + + kick_in_mos = .True. + TOUCH kick_in_mos + + call set_classes_loc + call apply_pre_rotation + call unset_classes_loc + +end diff --git a/src/mo_localization/localization.irp.f b/src/mo_localization/localization.irp.f new file mode 100644 index 00000000..7ccb2f5a --- /dev/null +++ b/src/mo_localization/localization.irp.f @@ -0,0 +1,520 @@ +program localization + + implicit none + + call set_classes_loc + call run_localization + call unset_classes_loc + +end + + + + +! Variables: +! | pre_rot(mo_num, mo_num) | double precision | Matrix for the pre rotation | +! | R(mo_num,mo_num) | double precision | Rotation matrix | +! | tmp_R(:,:) | double precision | Rottation matrix in a subsapce | +! | prev_mos(ao_num, mo_num) | double precision | Previous mo_coef | +! | spatial_extent(mo_num) | double precision | Spatial extent of the orbitals | +! | criterion | double precision | Localization criterion | +! | prev_criterion | double precision | Previous criterion | +! | criterion_model | double precision | Estimated next criterion | +! | rho | double precision | Ratio to measure the agreement between the model | +! | | | and the reality | +! | delta | double precision | Radisu of the trust region | +! | norm_grad | double precision | Norm of the gradient | +! | info | integer | for dsyev from Lapack | +! | max_elem | double precision | maximal element in the gradient | +! | v_grad(:) | double precision | Gradient | +! | H(:,:) | double precision | Hessian (diagonal) | +! | e_val(:) | double precision | Eigenvalues of the hessian | +! | W(:,:) | double precision | Eigenvectors of the hessian | +! | tmp_x(:) | double precision | Step in 1D (in a subaspace) | +! | tmp_m_x(:,:) | double precision | Step in 2D (in a subaspace) | +! | tmp_list(:) | double precision | List of MOs in a mo_class | +! | i,j,k | integer | Indexes in the full MO space | +! | tmp_i, tmp_j, tmp_k | integer | Indexes in a subspace | +! | l | integer | Index for the mo_class | +! | key(:) | integer | Key to sort the eigenvalues of the hessian | +! | nb_iter | integer | Number of iterations | +! | must_exit | logical | To exit the trust region loop | +! | cancel_step | logical | To cancel a step | +! | not_*converged | logical | To localize the different mo classes | +! | t* | double precision | To measure the time | +! | n | integer | mo_num*(mo_num-1)/2, number of orbital parameters | +! | tmp_n | integer | dim_subspace*(dim_subspace-1)/2 | +! | | | Number of dimension in the subspace | + +! Variables in qp_edit for the localization: +! | localization_method | +! | localization_max_nb_iter | +! | default_mo_class | +! | thresh_loc_max_elem_grad | +! | kick_in_mos | +! | angle_pre_rot | + +! + all the variables for the trust region + +! Cf. qp_edit orbital optimization + + +subroutine run_localization + + include 'pi.h' + + BEGIN_DOC + ! Orbital localization + END_DOC + + implicit none + + ! Variables + double precision, allocatable :: pre_rot(:,:), R(:,:) + double precision, allocatable :: prev_mos(:,:), spatial_extent(:), tmp_R(:,:) + double precision :: criterion, norm_grad + integer :: i,j,k,l,p, tmp_i, tmp_j, tmp_k + integer :: info + integer :: n, tmp_n, tmp_list_size + double precision, allocatable :: v_grad(:), H(:), tmp_m_x(:,:), tmp_x(:),W(:),e_val(:) + double precision :: max_elem, t1, t2, t3, t4, t5, t6 + integer, allocatable :: tmp_list(:), key(:) + double precision :: prev_criterion, rho, delta, criterion_model + integer :: nb_iter, nb_sub_iter + logical :: not_converged, not_core_converged + logical :: not_act_converged, not_inact_converged, not_virt_converged + logical :: use_trust_region, must_exit, cancel_step,enforce_step_cancellation + + n = mo_num*(mo_num-1)/2 + + ! Allocation + allocate(spatial_extent(mo_num)) + allocate(pre_rot(mo_num, mo_num), R(mo_num, mo_num)) + allocate(prev_mos(ao_num, mo_num)) + + ! Locality before the localization + call compute_spatial_extent(spatial_extent) + + ! Choice of the method + print*,'' + print*,'Localization method:',localization_method + if (localization_method == 'boys') then + print*,'Foster-Boys localization' + elseif (localization_method == 'pipek') then + print*,'Pipek-Mezey localization' + else + print*,'Unknown localization_method, please select boys or pipek' + call abort + endif + print*,'' + + ! Localization criterion (FB, PM, ...) for each mo_class + print*,'### Before the pre rotation' + + ! Debug + if (debug_hf) then + print*,'HF energy:', HF_energy + endif + + do l = 1, 4 + if (l==1) then ! core + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + tmp_list_size = dim_list_inact_orb + else ! virt + tmp_list_size = dim_list_virt_orb + endif + + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + + if (tmp_list_size >= 2) then + call criterion_localization(tmp_list_size, tmp_list,criterion) + print*,'Criterion:', criterion, mo_class(tmp_list(1)) + endif + + deallocate(tmp_list) + + enddo + + ! Debug + !print*,'HF', HF_energy + +! Loc + +! Pre rotation, to give a little kick in the MOs + call apply_pre_rotation() + + ! Criterion after the pre rotation + ! Localization criterion (FB, PM, ...) for each mo_class + print*,'### After the pre rotation' + + ! Debug + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + + do l = 1, 4 + if (l==1) then ! core + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + tmp_list_size = dim_list_inact_orb + else ! virt + tmp_list_size = dim_list_virt_orb + endif + + if (tmp_list_size >= 2) then + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + + call criterion_localization(tmp_list_size, tmp_list,criterion) + print*,'Criterion:', criterion, trim(mo_class(tmp_list(1))) + + deallocate(tmp_list) + endif + + enddo + + ! Debug + !print*,'HF', HF_energy + + print*,'' + print*,'========================' + print*,' Orbital localization' + print*,'========================' + print*,'' + + !Initialization + not_converged = .TRUE. + + ! To do the localization only if there is at least 2 MOs + if (dim_list_core_orb >= 2) then + not_core_converged = .TRUE. + else + not_core_converged = .FALSE. + endif + + if (dim_list_act_orb >= 2) then + not_act_converged = .TRUE. + else + not_act_converged = .FALSE. + endif + + if (dim_list_inact_orb >= 2) then + not_inact_converged = .TRUE. + else + not_inact_converged = .FALSE. + endif + + if (dim_list_virt_orb >= 2) then + not_virt_converged = .TRUE. + else + not_virt_converged = .FALSE. + endif + + ! Loop over the mo_classes + do l = 1, 4 + + if (l==1) then ! core + not_converged = not_core_converged + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + not_converged = not_act_converged + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + not_converged = not_inact_converged + tmp_list_size = dim_list_inact_orb + else ! virt + not_converged = not_virt_converged + tmp_list_size = dim_list_virt_orb + endif + + ! Next iteration if converged = true + if (.not. not_converged) then + cycle + endif + + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + + ! Display + if (not_converged) then + print*,'' + print*,'###', trim(mo_class(tmp_list(1))), 'MOs ###' + print*,'' + endif + + ! Size for the 2D -> 1D transformation + tmp_n = tmp_list_size * (tmp_list_size - 1)/2 + + ! Without hessian + trust region + if (.not. localization_use_hessian) then + + ! Allocation of temporary arrays + allocate(v_grad(tmp_n), tmp_m_x(tmp_list_size, tmp_list_size)) + allocate(tmp_R(tmp_list_size, tmp_list_size), tmp_x(tmp_n)) + + ! Criterion + call criterion_localization(tmp_list_size, tmp_list, prev_criterion) + + ! Init + nb_iter = 0 + delta = 1d0 + + !Loop + do while (not_converged) + + print*,'' + print*,'***********************' + print*,'Iteration', nb_iter + print*,'***********************' + print*,'' + + ! Angles of rotation + call theta_localization(tmp_list, tmp_list_size, tmp_m_x, max_elem) + tmp_m_x = - tmp_m_x * delta + + ! Rotation submatrix + call rotation_matrix(tmp_m_x, tmp_list_size, tmp_R, tmp_list_size, tmp_list_size, & + info, enforce_step_cancellation) + + ! To ensure that the rotation matrix is unitary + if (enforce_step_cancellation) then + print*, 'Step cancellation, too large error in the rotation matrix' + delta = delta * 0.5d0 + cycle + else + delta = min(delta * 2d0, 1d0) + endif + + ! Full rotation matrix and application of the rotation + call sub_to_full_rotation_matrix(tmp_list_size, tmp_list, tmp_R, R) + call apply_mo_rotation(R, prev_mos) + + ! Update the needed data + call update_data_localization() + + ! New criterion + call criterion_localization(tmp_list_size, tmp_list, criterion) + print*,'Criterion:', trim(mo_class(tmp_list(1))), nb_iter, criterion + print*,'Max elem :', max_elem + print*,'Delta :', delta + + nb_iter = nb_iter + 1 + + ! Exit + if (nb_iter >= localization_max_nb_iter .or. dabs(max_elem) < thresh_loc_max_elem_grad) then + not_converged = .False. + endif + enddo + + ! Save the changes + call update_data_localization() + call save_mos() + TOUCH mo_coef + + ! Deallocate + deallocate(v_grad, tmp_m_x, tmp_list) + deallocate(tmp_R, tmp_x) + + ! Trust region + else + + ! Allocation of temporary arrays + allocate(v_grad(tmp_n), H(tmp_n), tmp_m_x(tmp_list_size, tmp_list_size)) + allocate(tmp_R(tmp_list_size, tmp_list_size)) + allocate(tmp_x(tmp_n), W(tmp_n), e_val(tmp_n), key(tmp_n)) + + ! ### Initialization ### + delta = 0d0 ! can be deleted (normally) + nb_iter = 0 ! Must start at 0 !!! + rho = 0.5d0 ! Must be 0.5 + + ! Compute the criterion before the loop + call criterion_localization(tmp_list_size, tmp_list, prev_criterion) + + ! Loop until the convergence + do while (not_converged) + + print*,'' + print*,'***********************' + print*,'Iteration', nb_iter + print*,'***********************' + print*,'' + + ! Gradient + call gradient_localization(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + ! Diagonal hessian + call hessian_localization(tmp_n, tmp_list_size, tmp_list, H) + + ! Diagonalization of the diagonal hessian by hands + !call diagonalization_hessian(tmp_n,H,e_val,w) + do i = 1, tmp_n + e_val(i) = H(i) + enddo + + ! Key list for dsort + do i = 1, tmp_n + key(i) = i + enddo + + ! Sort of the eigenvalues + call dsort(e_val, key, tmp_n) + + ! Eigenvectors + W = 0d0 + do i = 1, tmp_n + W(i) = dble(key(i)) + enddo + + ! To enter in the loop just after + cancel_step = .True. + nb_sub_iter = 0 + + ! Loop to reduce the trust radius until the criterion decreases and rho >= thresh_rho + do while (cancel_step) + print*,'-----------------------------' + print*, mo_class(tmp_list(1)) + print*,'Iteration:', nb_iter + print*,'Sub iteration:', nb_sub_iter + print*,'Max elem grad:', max_elem + print*,'-----------------------------' + + ! Hessian,gradient,Criterion -> x + call trust_region_step_w_expected_e(tmp_n,1, H, W, e_val, v_grad, prev_criterion, & + rho, nb_iter, delta, criterion_model, tmp_x, must_exit) + + ! Internal loop exit condition + if (must_exit) then + print*,'trust_region_step_w_expected_e sent: Exit' + exit + endif + + ! 1D tmp -> 2D tmp + call vec_to_mat_v2(tmp_n, tmp_list_size, tmp_x, tmp_m_x) + + ! Rotation submatrix (square matrix tmp_list_size by tmp_list_size) + call rotation_matrix(tmp_m_x, tmp_list_size, tmp_R, tmp_list_size, tmp_list_size, & + info, enforce_step_cancellation) + + if (enforce_step_cancellation) then + print*, 'Step cancellation, too large error in the rotation matrix' + rho = 0d0 + cycle + endif + + ! tmp_R to R, subspace to full space + call sub_to_full_rotation_matrix(tmp_list_size, tmp_list, tmp_R, R) + + ! Rotation of the MOs + call apply_mo_rotation(R, prev_mos) + + ! Update the things related to mo_coef + call update_data_localization() + + ! Update the criterion + call criterion_localization(tmp_list_size, tmp_list, criterion) + print*,'Criterion:', trim(mo_class(tmp_list(1))), nb_iter, criterion + + ! Criterion -> step accepted or rejected + call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, & + criterion_model, rho, cancel_step) + + ! Cancellation of the step, previous MOs + if (cancel_step) then + mo_coef = prev_mos + endif + + nb_sub_iter = nb_sub_iter + 1 + enddo + !call save_mos() !### depend of the time for 1 iteration + + ! To exit the external loop if must_exti = .True. + if (must_exit) then + exit + endif + + ! Step accepted, nb iteration + 1 + nb_iter = nb_iter + 1 + + ! External loop exit conditions + if (DABS(max_elem) < thresh_loc_max_elem_grad) then + not_converged = .False. + endif + if (nb_iter > localization_max_nb_iter) then + not_converged = .False. + endif + enddo + + ! Deallocation of temporary arrays + deallocate(v_grad, H, tmp_m_x, tmp_R, tmp_list, tmp_x, W, e_val, key) + + ! Save the MOs + call save_mos() + TOUCH mo_coef + + ! Debug + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + + endif + enddo + + ! Seems unecessary + TOUCH mo_coef + + ! To sort the MOs using the diagonal elements of the Fock matrix + if (sort_mos_by_e) then + call run_sort_by_fock_energies() + endif + + ! Debug + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + + ! Locality after the localization + call compute_spatial_extent(spatial_extent) + +end diff --git a/src/mo_localization/localization_sub.irp.f b/src/mo_localization/localization_sub.irp.f new file mode 100644 index 00000000..f5afed07 --- /dev/null +++ b/src/mo_localization/localization_sub.irp.f @@ -0,0 +1,2008 @@ +! Gathering +! Gradient/hessian/criterion for the localization: +! They are chosen in function of the localization method + +! Gradient: + +! qp_edit : +! | localization_method | method for the localization | + +! Input: +! | tmp_n | integer | Number of parameters in the MO subspace | +! | tmp_list_size | integer | Number of MOs in the mo_class we want to localize | +! | tmp_list(tmp_list_size) | integer | MOs in the mo_class | + +! Output: +! | v_grad(tmp_n) | double precision | Gradient in the subspace | +! | max_elem | double precision | Maximal element in the gradient | +! | norm_grad | double precision | Norm of the gradient | + + + +subroutine gradient_localization(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the gradient of the chosen localization method + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + + if (localization_method == 'boys') then + call gradient_FB_omp(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + !call gradient_FB(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + elseif (localization_method== 'pipek') then + call gradient_PM(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + else + print*,'Unkown method:'//localization_method + call abort + endif + +end + + + +! Hessian: + +! Output: +! | H(tmp_n,tmp_n) | double precision | Gradient in the subspace | +! | max_elem | double precision | Maximal element in the gradient | +! | norm_grad | double precision | Norm of the gradient | + + +subroutine hessian_localization(tmp_n, tmp_list_size, tmp_list, H) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the diagonal hessian of the chosen localization method + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n) + + if (localization_method == 'boys') then + call hessian_FB_omp(tmp_n, tmp_list_size, tmp_list, H) + !call hessian_FB(tmp_n, tmp_list_size, tmp_list, H) ! non OMP for debugging + elseif (localization_method == 'pipek') then + call hessian_PM(tmp_n, tmp_list_size, tmp_list, H) + else + print*,'Unkown method: '//localization_method + call abort + endif + +end + + + +! Criterion: + +! Output: +! | criterion | double precision | Criterion for the orbital localization | + + +subroutine criterion_localization(tmp_list_size, tmp_list,criterion) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the localization criterion of the chosen localization method + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + + if (localization_method == 'boys') then + call criterion_FB(tmp_list_size, tmp_list, criterion) + elseif (localization_method == 'pipek') then + !call criterion_PM(tmp_list_size, tmp_list,criterion) + call criterion_PM_v3(tmp_list_size, tmp_list, criterion) + else + print*,'Unkown method: '//localization_method + call abort + endif + +end + + + +! Subroutine to update the datas needed for the localization + +subroutine update_data_localization() + + include 'pi.h' + + implicit none + + if (localization_method == 'boys') then + ! Update the dipoles + call ao_to_mo_no_sym(ao_dipole_x, ao_num, mo_dipole_x, mo_num) + call ao_to_mo_no_sym(ao_dipole_y, ao_num, mo_dipole_y, mo_num) + call ao_to_mo_no_sym(ao_dipole_z, ao_num, mo_dipole_z, mo_num) + elseif (localization_method == 'pipek') then + ! Nothing required + else + print*,'Unkown method: '//localization_method + call abort + endif +end + + + +! Angles: + +! Output: +! | tmp_m_x(tmp_list_size, tmp_list_size) | double precision | Angles for the rotations in the subspace | +! | max_elem | double precision | Maximal angle | + + + +subroutine theta_localization(tmp_list, tmp_list_size, tmp_m_x, max_elem) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the rotation angles between the MOs for the chosen localization method + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: tmp_m_x(tmp_list_size,tmp_list_size), max_elem + + if (localization_method == 'boys') then + call theta_FB(tmp_list, tmp_list_size, tmp_m_x, max_elem) + elseif (localization_method== 'pipek') then + call theta_PM(tmp_list, tmp_list_size, tmp_m_x, max_elem) + else + print*,'Unkown method: '//localization_method + call abort + endif + +end + +! Gradient +! Input: +! | tmp_n | integer | Number of parameters in the MO subspace | +! | tmp_list_size | integer | Number of MOs in the mo_class we want to localize | +! | tmp_list(tmp_list_size) | integer | MOs in the mo_class | + +! Output: +! | v_grad(tmp_n) | double precision | Gradient in the subspace | +! | max_elem | double precision | Maximal element in the gradient | +! | norm_grad | double precision | Norm of the gradient | + +! Internal: +! | m_grad(tmp_n,tmp_n) | double precision | Gradient in the matrix form | +! | i,j,k | integer | indexes in the full space | +! | tmp_i,tmp_j,tmp_k | integer | indexes in the subspace | +! | t* | double precision | to compute the time | + + +subroutine gradient_FB(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + implicit none + + BEGIN_DOC + ! Compute the gradient for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k + double precision :: t1, t2, t3 + + print*,'' + print*,'---gradient_FB---' + + call wall_time(t1) + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size)) + + ! Calculation + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + m_grad(tmp_i,tmp_j) = 4d0 * mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + +4d0 * mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + +4d0 * mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j)) + enddo + enddo + + ! 2D -> 1D + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in gradient_FB:', t3 + + print*,'---End gradient_FB---' + +end subroutine + +! Gradient (OMP) + +subroutine gradient_FB_omp(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + use omp_lib + + implicit none + + BEGIN_DOC + ! Compute the gradient for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k + double precision :: t1, t2, t3 + + print*,'' + print*,'---gradient_FB_omp---' + + call wall_time(t1) + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size)) + + ! Initialization omp + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE(i,j,tmp_i,tmp_j,tmp_k) & + !$OMP SHARED(tmp_n,tmp_list_size,m_grad,v_grad,mo_dipole_x,mo_dipole_y,mo_dipole_z,tmp_list) & + !$OMP DEFAULT(NONE) + + ! Calculation + !$OMP DO + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + m_grad(tmp_i,tmp_j) = 4d0 * mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + +4d0 * mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + +4d0 * mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j)) + enddo + enddo + !$OMP END DO + + ! 2D -> 1D + !$OMP DO + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + !$OMP END DO + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in gradient_FB_omp:', t3 + + print*,'---End gradient_FB_omp---' + +end subroutine + +! Hessian + +! Output: +! | H(tmp_n,tmp_n) | double precision | Gradient in the subspace | +! | max_elem | double precision | Maximal element in the gradient | +! | norm_grad | double precision | Norm of the gradient | + +! Internal: +! Internal: +! | beta(tmp_n,tmp_n) | double precision | beta in the documentation below to compute the hesian | +! | i,j,k | integer | indexes in the full space | +! | tmp_i,tmp_j,tmp_k | integer | indexes in the subspace | +! | t* | double precision | to compute the time | + + +subroutine hessian_FB(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute the diagonal hessian for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n) + double precision, allocatable :: beta(:,:) + integer :: i,j,tmp_k,tmp_i, tmp_j + double precision :: max_elem, t1,t2,t3 + + print*,'' + print*,'---hessian_FB---' + + call wall_time(t1) + + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size)) + + ! Calculation + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + beta(tmp_i,tmp_j) = (mo_dipole_x(i,i) - mo_dipole_x(j,j))**2 - 4d0 * mo_dipole_x(i,j)**2 & + +(mo_dipole_y(i,i) - mo_dipole_y(j,j))**2 - 4d0 * mo_dipole_y(i,j)**2 & + +(mo_dipole_z(i,i) - mo_dipole_z(j,j))**2 - 4d0 * mo_dipole_z(i,j)**2 + enddo + enddo + + ! Diagonal of the hessian + H = 0d0 + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + + ! Deallocation + deallocate(beta) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in hessian_FB:', t3 + + print*,'---End hessian_FB---' + +end subroutine + +! Hessian (OMP) + +subroutine hessian_FB_omp(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute the diagonal hessian for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n) + double precision, allocatable :: beta(:,:) + integer :: i,j,tmp_k,tmp_i,tmp_j + double precision :: max_elem, t1,t2,t3 + + print*,'' + print*,'---hessian_FB_omp---' + + call wall_time(t1) + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size)) + + ! Initialization omp + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE(i,j,tmp_i,tmp_j,tmp_k) & + !$OMP SHARED(tmp_n,tmp_list_size,beta,H,mo_dipole_x,mo_dipole_y,mo_dipole_z,tmp_list) & + !$OMP DEFAULT(NONE) + + + ! Calculation + !$OMP DO + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + beta(tmp_i,tmp_j) = (mo_dipole_x(i,i) - mo_dipole_x(j,j))**2 - 4d0 * mo_dipole_x(i,j)**2 & + +(mo_dipole_y(i,i) - mo_dipole_y(j,j))**2 - 4d0 * mo_dipole_y(i,j)**2 & + +(mo_dipole_z(i,i) - mo_dipole_z(j,j))**2 - 4d0 * mo_dipole_z(i,j)**2 + enddo + enddo + !$OMP END DO + + ! Initialization + !$OMP DO + do i = 1, tmp_n + H(i) = 0d0 + enddo + !$OMP END DO + + ! Diagonalm of the hessian + !$OMP DO + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + !$OMP END DO + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + ! Deallocation + deallocate(beta) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in hessian_FB_omp:', t3 + + print*,'---End hessian_FB_omp---' + +end subroutine + +! Gradient v1 + +subroutine grad_pipek(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + implicit none + + BEGIN_DOC + ! Compute gradient for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:), tmp_int(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size), tmp_int(tmp_list_size, tmp_list_size)) + + ! Initialization + m_grad = 0d0 + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 ! Initialization for each nuclei + + ! Loop over the MOs of the a given mo_class to compute + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do rho = 1, ao_num ! loop over all the AOs + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) ! AO centered on atom a + + tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + enddo + enddo + + ! Gradient + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + m_grad(tmp_i,tmp_j) = m_grad(tmp_i,tmp_j) + 4d0 * tmp_int(tmp_i,tmp_j) * (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j)) + + enddo + enddo + + enddo + + ! 2D -> 1D + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad,tmp_int) + +end subroutine grad_pipek + +! Gradient + +! The gradient is + +! \begin{align*} +! \left. \frac{\partial \mathcal{P} (\theta)}{\partial \theta} \right|_{\theta=0}= \gamma^{PM} +! \end{align*} +! with +! \begin{align*} +! \gamma_{st}^{PM} = \sum_{A=1}^N \left[ - \right] +! \end{align*} + +! \begin{align*} +! = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right] +! \end{align*} +! $\sum_{\rho}$ -> sum over all the AOs +! $\sum_{\mu \in A}$ -> sum over the AOs which belongs to atom A +! $c^t$ -> expansion coefficient of orbital |t> + +! Input: +! | tmp_n | integer | Number of parameters in the MO subspace | +! | tmp_list_size | integer | Number of MOs in the mo_class we want to localize | +! | tmp_list(tmp_list_size) | integer | MOs in the mo_class | + +! Output: +! | v_grad(tmp_n) | double precision | Gradient in the subspace | +! | max_elem | double precision | Maximal element in the gradient | +! | norm_grad | double precision | Norm of the gradient | + +! Internal: +! | m_grad(tmp_list_size,tmp_list_size) | double precision | Gradient in a 2D array | +! | tmp_int(tmp_list_size,tmp_list_size) | | Temporary array to store the integrals | +! | tmp_accu(tmp_list_size,tmp_list_size) | | Temporary array to store a matrix | +! | | | product and compute tmp_int | +! | CS(tmp_list_size,ao_num) | | Array to store the result of mo_coef * ao_overlap | +! | tmp_mo_coef(ao_num,tmp_list_size) | | Array to store just the useful MO coefficients | +! | | | depending of the mo_class | +! | tmp_mo_coef2(nucl_n_aos(a),tmp_list_size) | | Array to store just the useful MO coefficients | +! | | | depending of the nuclei | +! | tmp_CS(tmp_list_size,nucl_n_aos(a)) | | Array to store just the useful mo_coef * ao_overlap | +! | | | values depending of the nuclei | +! | a | | index to loop over the nuclei | +! | b | | index to loop over the AOs which belongs to the nuclei a | +! | mu | | index to refer to an AO which belongs to the nuclei a | +! | rho | | index to loop over all the AOs | + + +subroutine gradient_PM(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + implicit none + + BEGIN_DOC + ! Compute gradient for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:), tmp_int(:,:), CS(:,:), tmp_mo_coef(:,:), tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + double precision :: t1,t2,t3 + + print*,'' + print*,'---gradient_PM---' + + call wall_time(t1) + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size), tmp_int(tmp_list_size, tmp_list_size),tmp_accu(tmp_list_size, tmp_list_size)) + allocate(CS(tmp_list_size,ao_num),tmp_mo_coef(ao_num,tmp_list_size)) + + + ! submatrix of the mo_coef + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do j = 1, ao_num + + tmp_mo_coef(j,tmp_i) = mo_coef(j,i) + + enddo + enddo + + call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + m_grad = 0d0 + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + !do tmp_j = 1, tmp_list_size + ! do tmp_i = 1, tmp_list_size + ! do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + ! mu = nucl_aos(a,b) + + ! tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (CS(tmp_i,mu) * tmp_mo_coef(mu,tmp_j) + tmp_mo_coef(mu,tmp_i) * CS(tmp_j,mu)) + + ! ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + ! !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + ! enddo + ! enddo + !enddo + + allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a))) + + do tmp_i = 1, tmp_list_size + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + + tmp_mo_coef2(b,tmp_i) = tmp_mo_coef(mu,tmp_i) + + enddo + enddo + + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + do tmp_i = 1, tmp_list_size + + tmp_CS(tmp_i,b) = CS(tmp_i,mu) + + enddo + enddo + + call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1)) + + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) + + enddo + enddo + + deallocate(tmp_mo_coef2,tmp_CS) + + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + m_grad(tmp_i,tmp_j) = m_grad(tmp_i,tmp_j) + 4d0 * tmp_int(tmp_i,tmp_j) * (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j)) + + enddo + enddo + + enddo + + ! 2D -> 1D + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad,tmp_int,CS,tmp_mo_coef) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in gradient_PM:', t3 + + print*,'---End gradient_PM---' + +end + +! Hessian v1 + +subroutine hess_pipek(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute diagonal hessian for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n) + double precision, allocatable :: beta(:,:),tmp_int(:,:) + integer :: i,j,tmp_k,tmp_i, tmp_j, a,b,rho,mu + double precision :: max_elem + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size),tmp_int(tmp_list_size,tmp_list_size)) + + beta = 0d0 + + do a = 1, nucl_num + tmp_int = 0d0 + + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do rho = 1, ao_num + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) + + tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + enddo + enddo + + ! Calculation + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + beta(tmp_i,tmp_j) = beta(tmp_i, tmp_j) + (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j))**2 - 4d0 * tmp_int(tmp_i,tmp_j)**2 + + enddo + enddo + + enddo + + H = 0d0 + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + + ! Deallocation + deallocate(beta,tmp_int) + +end + +! Hessian + +! The hessian is +! \begin{align*} +! \left. \frac{\partial^2 \mathcal{P} (\theta)}{\partial \theta^2}\right|_{\theta=0} = 4 \beta^{PM} +! \end{align*} +! \begin{align*} +! \beta_{st}^{PM} = \sum_{A=1}^N \left( ^2 - \frac{1}{4} \left[ - \right]^2 \right) +! \end{align*} + +! with +! \begin{align*} +! = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right] +! \end{align*} +! $\sum_{\rho}$ -> sum over all the AOs +! $\sum_{\mu \in A}$ -> sum over the AOs which belongs to atom A +! $c^t$ -> expansion coefficient of orbital |t> + + +subroutine hessian_PM(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute diagonal hessian for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n) + double precision, allocatable :: beta(:,:),tmp_int(:,:),CS(:,:),tmp_mo_coef(:,:),tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:) + integer :: i,j,tmp_k,tmp_i, tmp_j, a,b,rho,mu + double precision :: max_elem, t1,t2,t3 + + print*,'' + print*,'---hessian_PM---' + + call wall_time(t1) + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size),tmp_int(tmp_list_size,tmp_list_size),tmp_accu(tmp_list_size,tmp_list_size)) + allocate(CS(tmp_list_size,ao_num),tmp_mo_coef(ao_num,tmp_list_size)) + + beta = 0d0 + + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do j = 1, ao_num + + tmp_mo_coef(j,tmp_i) = mo_coef(j,i) + + enddo + enddo + + call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + !do tmp_j = 1, tmp_list_size + ! do tmp_i = 1, tmp_list_size + ! do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + ! mu = nucl_aos(a,b) + + ! tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (CS(tmp_i,mu) * tmp_mo_coef(mu,tmp_j) + tmp_mo_coef(mu,tmp_i) * CS(tmp_j,mu)) + + ! ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + ! !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + ! enddo + ! enddo + !enddo + + allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a))) + + do tmp_i = 1, tmp_list_size + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + + tmp_mo_coef2(b,tmp_i) = tmp_mo_coef(mu,tmp_i) + + enddo + enddo + + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + do tmp_i = 1, tmp_list_size + + tmp_CS(tmp_i,b) = CS(tmp_i,mu) + + enddo + enddo + + call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1)) + + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) + + enddo + enddo + + deallocate(tmp_mo_coef2,tmp_CS) + + ! Calculation + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + beta(tmp_i,tmp_j) = beta(tmp_i, tmp_j) + (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j))**2 - 4d0 * tmp_int(tmp_i,tmp_j)**2 + + enddo + enddo + + enddo + + H = 0d0 + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + + ! Deallocation + deallocate(beta,tmp_int) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in hessian_PM:', t3 + + print*,'---End hessian_PM---' + +end + +! Criterion PM (old) + +subroutine compute_crit_pipek(criterion) + + implicit none + + BEGIN_DOC + ! Compute the Pipek-Mezey localization criterion + END_DOC + + double precision, intent(out) :: criterion + double precision, allocatable :: tmp_int(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + + ! Allocation + allocate(tmp_int(mo_num, mo_num)) + + criterion = 0d0 + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + do i = 1, mo_num + do rho = 1, ao_num ! loop over all the AOs + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) + + tmp_int(i,i) = tmp_int(i,i) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,i) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,i)) + + enddo + enddo + enddo + + do i = 1, mo_num + criterion = criterion + tmp_int(i,i)**2 + enddo + + enddo + + criterion = - criterion + + deallocate(tmp_int) + +end + +! Criterion PM + +! The criterion is computed as +! \begin{align*} +! \mathcal{P} = \sum_{i=1}^n \sum_{A=1}^N \left[ \right]^2 +! \end{align*} +! with +! \begin{align*} +! = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right] +! \end{align*} + + +subroutine criterion_PM(tmp_list_size,tmp_list,criterion) + + implicit none + + BEGIN_DOC + ! Compute the Pipek-Mezey localization criterion + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + double precision, allocatable :: tmp_int(:,:),CS(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + + print*,'' + print*,'---criterion_PM---' + + ! Allocation + allocate(tmp_int(tmp_list_size, tmp_list_size),CS(mo_num,ao_num)) + + ! Initialization + criterion = 0d0 + + call dgemm('T','N',mo_num,ao_num,ao_num,1d0,mo_coef,size(mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) + + tmp_int(tmp_i,tmp_i) = tmp_int(tmp_i,tmp_i) + 0.5d0 * (CS(i,mu) * mo_coef(mu,i) + mo_coef(mu,i) * CS(i,mu)) + + ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + + do tmp_i = 1, tmp_list_size + criterion = criterion + tmp_int(tmp_i,tmp_i)**2 + enddo + + enddo + + criterion = - criterion + + deallocate(tmp_int,CS) + + print*,'---End criterion_PM---' + +end + +! Criterion PM v3 + +subroutine criterion_PM_v3(tmp_list_size,tmp_list,criterion) + + implicit none + + BEGIN_DOC + ! Compute the Pipek-Mezey localization criterion + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + double precision, allocatable :: tmp_int(:,:), CS(:,:), tmp_mo_coef(:,:), tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho,nu,c + double precision :: t1,t2,t3 + + print*,'' + print*,'---criterion_PM_v3---' + + call wall_time(t1) + + ! Allocation + allocate(tmp_int(tmp_list_size, tmp_list_size),tmp_accu(tmp_list_size, tmp_list_size)) + allocate(CS(tmp_list_size,ao_num),tmp_mo_coef(ao_num,tmp_list_size)) + + criterion = 0d0 + + ! submatrix of the mo_coef + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do j = 1, ao_num + + tmp_mo_coef(j,tmp_i) = mo_coef(j,i) + + enddo + enddo + + ! ao_overlap(ao_num,ao_num) + ! mo_coef(ao_num,mo_num) + call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + do a = 1, nucl_num ! loop over the nuclei + + do j = 1, tmp_list_size + do i = 1, tmp_list_size + tmp_int(i,j) = 0d0 + enddo + enddo + + !do tmp_j = 1, tmp_list_size + ! do tmp_i = 1, tmp_list_size + ! do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + ! mu = nucl_aos(a,b) + + ! tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (CS(tmp_i,mu) * tmp_mo_coef(mu,tmp_j) + tmp_mo_coef(mu,tmp_i) * CS(tmp_j,mu)) + + ! ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + ! !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + ! enddo + ! enddo + !enddo + + allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a))) + + do tmp_i = 1, tmp_list_size + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + + tmp_mo_coef2(b,tmp_i) = tmp_mo_coef(mu,tmp_i) + + enddo + enddo + + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + do tmp_i = 1, tmp_list_size + + tmp_CS(tmp_i,b) = CS(tmp_i,mu) + + enddo + enddo + + call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1)) + + ! Integrals + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) + + enddo + enddo + + deallocate(tmp_mo_coef2,tmp_CS) + + ! Criterion + do tmp_i = 1, tmp_list_size + criterion = criterion + tmp_int(tmp_i,tmp_i)**2 + enddo + + enddo + + criterion = - criterion + + deallocate(tmp_int,CS,tmp_accu,tmp_mo_coef) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in criterion_PM_v3:', t3 + + print*,'---End criterion_PM_v3---' + +end + +! Criterion FB (old) + +! The criterion is just computed as + +! \begin{align*} +! C = - \sum_i^{mo_{num}} (^2 + ^2 + ^2) +! \end{align*} + +! The minus sign is here in order to minimize this criterion + +! Output: +! | criterion | double precision | criterion for the Foster-Boys localization | + + +subroutine criterion_FB_old(criterion) + + implicit none + + BEGIN_DOC + ! Compute the Foster-Boys localization criterion + END_DOC + + double precision, intent(out) :: criterion + integer :: i + + ! Criterion (= \sum_i ^2 ) + criterion = 0d0 + do i = 1, mo_num + criterion = criterion + mo_dipole_x(i,i)**2 + mo_dipole_y(i,i)**2 + mo_dipole_z(i,i)**2 + enddo + criterion = - criterion + +end subroutine + +! Criterion FB + +subroutine criterion_FB(tmp_list_size, tmp_list, criterion) + + implicit none + + BEGIN_DOC + ! Compute the Foster-Boys localization criterion + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + integer :: i, tmp_i + + ! Criterion (= - \sum_i ^2 ) + criterion = 0d0 + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + criterion = criterion + mo_dipole_x(i,i)**2 + mo_dipole_y(i,i)**2 + mo_dipole_z(i,i)**2 + enddo + criterion = - criterion + +end subroutine + +subroutine theta_FB(l, n, m_x, max_elem) + + include 'pi.h' + + BEGIN_DOC + ! Compute the angles to minimize the Foster-Boys criterion by using pairwise rotations of the MOs + ! Warning: you must give - the angles to build the rotation matrix... + END_DOC + + implicit none + + integer, intent(in) :: n, l(n) + double precision, intent(out) :: m_x(n,n), max_elem + + integer :: i,j, tmp_i, tmp_j + double precision, allocatable :: cos4theta(:,:), sin4theta(:,:) + double precision, allocatable :: A(:,:), B(:,:), beta(:,:), gamma(:,:) + integer :: idx_i,idx_j + + allocate(cos4theta(n, n), sin4theta(n, n)) + allocate(A(n,n), B(n,n), beta(n,n), gamma(n,n)) + + do tmp_j = 1, n + j = l(tmp_j) + do tmp_i = 1, n + i = l(tmp_i) + A(tmp_i,tmp_j) = mo_dipole_x(i,j)**2 - 0.25d0 * (mo_dipole_x(i,i) - mo_dipole_x(j,j))**2 & + + mo_dipole_y(i,j)**2 - 0.25d0 * (mo_dipole_y(i,i) - mo_dipole_y(j,j))**2 & + + mo_dipole_z(i,j)**2 - 0.25d0 * (mo_dipole_z(i,i) - mo_dipole_z(j,j))**2 + enddo + A(j,j) = 0d0 + enddo + + do tmp_j = 1, n + j = l(tmp_j) + do tmp_i = 1, n + i = l(tmp_i) + B(tmp_i,tmp_j) = mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + + mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + + mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j)) + enddo + enddo + + !do tmp_j = 1, n + ! j = l(tmp_j) + ! do tmp_i = 1, n + ! i = l(tmp_i) + ! beta(tmp_i,tmp_j) = (mo_dipole_x(i,i) - mo_dipole_x(j,j)) - 4d0 * mo_dipole_x(i,j)**2 & + ! + (mo_dipole_y(i,i) - mo_dipole_y(j,j)) - 4d0 * mo_dipole_y(i,j)**2 & + ! + (mo_dipole_z(i,i) - mo_dipole_z(j,j)) - 4d0 * mo_dipole_z(i,j)**2 + ! enddo + !enddo + + !do tmp_j = 1, n + ! j = l(tmp_j) + ! do tmp_i = 1, n + ! i = l(tmp_i) + ! gamma(tmp_i,tmp_j) = 4d0 * ( mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + ! + mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + ! + mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j))) + ! enddo + !enddo + + ! + !do j = 1, n + ! do i = 1, n + ! cos4theta(i,j) = - A(i,j) / dsqrt(A(i,j)**2 + B(i,j)**2) + ! enddo + !enddo + + !do j = 1, n + ! do i = 1, n + ! sin4theta(i,j) = B(i,j) / dsqrt(A(i,j)**2 + B(i,j)**2) + ! enddo + !enddo + + ! Theta + do j = 1, n + do i = 1, n + m_x(i,j) = 0.25d0 * atan2(B(i,j), -A(i,j)) + !m_x(i,j) = 0.25d0 * atan2(sin4theta(i,j), cos4theta(i,j)) + enddo + enddo + + ! Enforce a perfect antisymmetry + do j = 1, n-1 + do i = j+1, n + m_x(j,i) = - m_x(i,j) + enddo + enddo + do i = 1, n + m_x(i,i) = 0d0 + enddo + + ! Max + max_elem = 0d0 + do j = 1, n-1 + do i = j+1, n + if (dabs(m_x(i,j)) > dabs(max_elem)) then + max_elem = m_x(i,j) + !idx_i = i + !idx_j = j + endif + enddo + enddo + + ! Debug + !print*,'' + !print*,'sin/B' + !do i = 1, n + ! write(*,'(100F10.4)') sin4theta(i,:) + ! !B(i,:) + !enddo + !print*,'cos/A' + !do i = 1, n + ! write(*,'(100F10.4)') cos4theta(i,:) + ! !A(i,:) + !enddo + !print*,'X' + !!m_x = 0d0 + !!m_x(idx_i,idx_j) = max_elem + !!m_x(idx_j,idx_i) = -max_elem + !do i = 1, n + ! write(*,'(100F10.4)') m_x(i,:) + !enddo + !print*,idx_i,idx_j,max_elem + + max_elem = dabs(max_elem) + + deallocate(cos4theta, sin4theta) + deallocate(A,B,beta,gamma) + +end + +subroutine theta_PM(l, n, m_x, max_elem) + + include 'pi.h' + + BEGIN_DOC + ! Compute the angles to minimize the Foster-Boys criterion by using pairwise rotations of the MOs + ! Warning: you must give - the angles to build the rotation matrix... + END_DOC + + implicit none + + integer, intent(in) :: n, l(n) + double precision, intent(out) :: m_x(n,n), max_elem + + integer :: a,b,i,j,tmp_i,tmp_j,rho,mu,nu,idx_i,idx_j + double precision, allocatable :: Aij(:,:), Bij(:,:), Pa(:,:) + + allocate(Aij(n,n), Bij(n,n), Pa(n,n)) + + do a = 1, nucl_num ! loop over the nuclei + Pa = 0d0 ! Initialization for each nuclei + + ! Loop over the MOs of the a given mo_class to compute + do tmp_j = 1, n + j = l(tmp_j) + do tmp_i = 1, n + i = l(tmp_i) + do rho = 1, ao_num ! loop over all the AOs + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) ! AO centered on atom a + + Pa(tmp_i,tmp_j) = Pa(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + enddo + enddo + + ! A + do j = 1, n + do i = 1, n + Aij(i,j) = Aij(i,j) + Pa(i,j)**2 - 0.25d0 * (Pa(i,i) - Pa(j,j))**2 + enddo + enddo + + ! B + do j = 1, n + do i = 1, n + Bij(i,j) = Bij(i,j) + Pa(i,j) * (Pa(i,i) - Pa(j,j)) + enddo + enddo + + enddo + + ! Theta + do j = 1, n + do i = 1, n + m_x(i,j) = 0.25d0 * atan2(Bij(i,j), -Aij(i,j)) + enddo + enddo + + ! Enforce a perfect antisymmetry + do j = 1, n-1 + do i = j+1, n + m_x(j,i) = - m_x(i,j) + enddo + enddo + do i = 1, n + m_x(i,i) = 0d0 + enddo + + ! Max + max_elem = 0d0 + do j = 1, n-1 + do i = j+1, n + if (dabs(m_x(i,j)) > dabs(max_elem)) then + max_elem = m_x(i,j) + idx_i = i + idx_j = j + endif + enddo + enddo + + ! Debug + !do i = 1, n + ! write(*,'(100F10.4)') m_x(i,:) + !enddo + !print*,'Max',idx_i,idx_j,max_elem + + max_elem = dabs(max_elem) + + deallocate(Aij,Bij,Pa) + +end + +! Spatial extent + +! The spatial extent of an orbital $i$ is computed as +! \begin{align*} +! \sum_{\lambda=x,y,z}\sqrt{ - ^2} +! \end{align*} + +! From that we can also compute the average and the standard deviation + + +subroutine compute_spatial_extent(spatial_extent) + + implicit none + + BEGIN_DOC + ! Compute the spatial extent of the MOs + END_DOC + + double precision, intent(out) :: spatial_extent(mo_num) + double precision :: average_core, average_act, average_inact, average_virt + double precision :: std_var_core, std_var_act, std_var_inact, std_var_virt + integer :: i,j,k,l + + spatial_extent = 0d0 + + do i = 1, mo_num + spatial_extent(i) = mo_spread_x(i,i) - mo_dipole_x(i,i)**2 + enddo + do i = 1, mo_num + spatial_extent(i) = spatial_extent(i) + mo_spread_y(i,i) - mo_dipole_y(i,i)**2 + enddo + do i = 1, mo_num + spatial_extent(i) = spatial_extent(i) + mo_spread_z(i,i) - mo_dipole_z(i,i)**2 + enddo + + do i = 1, mo_num + spatial_extent(i) = dsqrt(spatial_extent(i)) + enddo + + average_core = 0d0 + std_var_core = 0d0 + if (dim_list_core_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_core, dim_list_core_orb, average_core) + call compute_std_var_sp_ext(spatial_extent, list_core, dim_list_core_orb, average_core, std_var_core) + endif + + average_act = 0d0 + std_var_act = 0d0 + if (dim_list_act_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_act, dim_list_act_orb, average_act) + call compute_std_var_sp_ext(spatial_extent, list_act, dim_list_act_orb, average_act, std_var_act) + endif + + average_inact = 0d0 + std_var_inact = 0d0 + if (dim_list_inact_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_inact, dim_list_inact_orb, average_inact) + call compute_std_var_sp_ext(spatial_extent, list_inact, dim_list_inact_orb, average_inact, std_var_inact) + endif + + average_virt = 0d0 + std_var_virt = 0d0 + if (dim_list_virt_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_virt, dim_list_virt_orb, average_virt) + call compute_std_var_sp_ext(spatial_extent, list_virt, dim_list_virt_orb, average_virt, std_var_virt) + endif + + print*,'' + print*,'=============================' + print*,' Spatial extent of the MOs' + print*,'=============================' + print*,'' + + print*, 'elec_num:', elec_num + print*, 'elec_alpha_num:', elec_alpha_num + print*, 'elec_beta_num:', elec_beta_num + print*, 'core:', dim_list_core_orb + print*, 'act:', dim_list_act_orb + print*, 'inact:', dim_list_inact_orb + print*, 'virt:', dim_list_virt_orb + print*, 'mo_num:', mo_num + print*,'' + + print*,'-- Core MOs --' + print*,'Average:', average_core + print*,'Std var:', std_var_core + print*,'' + + print*,'-- Active MOs --' + print*,'Average:', average_act + print*,'Std var:', std_var_act + print*,'' + + print*,'-- Inactive MOs --' + print*,'Average:', average_inact + print*,'Std var:', std_var_inact + print*,'' + + print*,'-- Virtual MOs --' + print*,'Average:', average_virt + print*,'Std var:', std_var_virt + print*,'' + + print*,'Spatial extent:' + do i = 1, mo_num + print*, i, spatial_extent(i) + enddo + +end + +subroutine compute_average_sp_ext(spatial_extent, list, list_size, average) + + implicit none + + BEGIN_DOC + ! Compute the average spatial extent of the MOs + END_DOC + + integer, intent(in) :: list_size, list(list_size) + double precision, intent(in) :: spatial_extent(mo_num) + double precision, intent(out) :: average + integer :: i, tmp_i + + average = 0d0 + do tmp_i = 1, list_size + i = list(tmp_i) + average = average + spatial_extent(i) + enddo + + average = average / DBLE(list_size) + +end + +subroutine compute_std_var_sp_ext(spatial_extent, list, list_size, average, std_var) + + implicit none + + BEGIN_DOC + ! Compute the standard deviation of the spatial extent of the MOs + END_DOC + + integer, intent(in) :: list_size, list(list_size) + double precision, intent(in) :: spatial_extent(mo_num) + double precision, intent(in) :: average + double precision, intent(out) :: std_var + integer :: i, tmp_i + + std_var = 0d0 + + do tmp_i = 1, list_size + i = list(tmp_i) + std_var = std_var + (spatial_extent(i) - average)**2 + enddo + + std_var = dsqrt(1d0/DBLE(list_size) * std_var) + +end + +! Utils + + +subroutine apply_pre_rotation() + + implicit none + + BEGIN_DOC + ! Apply a rotation between the MOs + END_DOC + + double precision, allocatable :: pre_rot(:,:), prev_mos(:,:), R(:,:) + double precision :: t1,t2,t3 + integer :: i,j,tmp_i,tmp_j + integer :: info + logical :: enforce_step_cancellation + + print*,'---apply_pre_rotation---' + call wall_time(t1) + + allocate(pre_rot(mo_num,mo_num), prev_mos(ao_num,mo_num), R(mo_num,mo_num)) + + ! Initialization of the matrix + pre_rot = 0d0 + + if (kick_in_mos) then + ! Pre rotation for core MOs + if (dim_list_core_orb >= 2) then + do tmp_j = 1, dim_list_core_orb + j = list_core(tmp_j) + do tmp_i = 1, dim_list_core_orb + i = list_core(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Pre rotation for active MOs + if (dim_list_act_orb >= 2) then + do tmp_j = 1, dim_list_act_orb + j = list_act(tmp_j) + do tmp_i = 1, dim_list_act_orb + i = list_act(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Pre rotation for inactive MOs + if (dim_list_inact_orb >= 2) then + do tmp_j = 1, dim_list_inact_orb + j = list_inact(tmp_j) + do tmp_i = 1, dim_list_inact_orb + i = list_inact(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Pre rotation for virtual MOs + if (dim_list_virt_orb >= 2) then + do tmp_j = 1, dim_list_virt_orb + j = list_virt(tmp_j) + do tmp_i = 1, dim_list_virt_orb + i = list_virt(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Nothing for deleted ones + + ! Compute pre rotation matrix from pre_rot + call rotation_matrix(pre_rot,mo_num,R,mo_num,mo_num,info,enforce_step_cancellation) + + if (enforce_step_cancellation) then + print*, 'Cancellation of the pre rotation, too big error in the rotation matrix' + print*, 'Reduce the angle for the pre rotation, abort' + call abort + endif + + ! New Mos (we don't car eabout the previous MOs prev_mos) + call apply_mo_rotation(R,prev_mos) + + ! Update the things related to mo_coef + TOUCH mo_coef + call save_mos + endif + + deallocate(pre_rot, prev_mos, R) + + call wall_time(t2) + t3 = t2-t1 + print*,'Time in apply_pre_rotation:', t3 + print*,'---End apply_pre_rotation---' + +end + +subroutine x_tmp_orb_loc_v2(tmp_n, tmp_list_size, tmp_list, v_grad, H,tmp_x, tmp_m_x) + + implicit none + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(in) :: v_grad(tmp_n) + double precision, intent(in) :: H(tmp_n, tmp_n) + double precision, intent(out) :: tmp_m_x(tmp_list_size, tmp_list_size), tmp_x(tmp_list_size) + !double precision, allocatable :: x(:) + double precision :: lambda , accu, max_elem + integer :: i,j,tmp_i,tmp_j,tmp_k + + ! Allocation + !allocate(x(tmp_n)) + + ! Level shifted hessian + lambda = 0d0 + do tmp_k = 1, tmp_n + if (H(tmp_k,tmp_k) < lambda) then + lambda = H(tmp_k,tmp_k) + endif + enddo + + ! min element in the hessian + if (lambda < 0d0) then + lambda = -lambda + 1d-6 + endif + + print*, 'lambda', lambda + + ! Good + do tmp_k = 1, tmp_n + if (ABS(H(tmp_k,tmp_k)) > 1d-6) then + tmp_x(tmp_k) = - 1d0/(ABS(H(tmp_k,tmp_k))+lambda) * v_grad(tmp_k)!(-v_grad(tmp_k)) + !x(tmp_k) = - 1d0/(ABS(H(tmp_k,tmp_k))+lambda) * (-v_grad(tmp_k)) + endif + enddo + + ! 1D tmp -> 2D tmp + tmp_m_x = 0d0 + do tmp_j = 1, tmp_list_size - 1 + do tmp_i = tmp_j + 1, tmp_list_size + call mat_to_vec_index(tmp_i,tmp_j,tmp_k) + tmp_m_x(tmp_i, tmp_j) = tmp_x(tmp_k)!x(tmp_k) + enddo + enddo + + ! Antisym + do tmp_i = 1, tmp_list_size - 1 + do tmp_j = tmp_i + 1, tmp_list_size + tmp_m_x(tmp_i,tmp_j) = - tmp_m_x(tmp_j,tmp_i) + enddo + enddo + + ! Deallocation + !deallocate(x) + +end subroutine + +subroutine ao_to_mo_no_sym(A_ao,LDA_ao,A_mo,LDA_mo) + implicit none + BEGIN_DOC + ! Transform A from the |AO| basis to the |MO| basis + ! + ! $C^\dagger.A_{ao}.C$ + END_DOC + integer, intent(in) :: LDA_ao,LDA_mo + double precision, intent(in) :: A_ao(LDA_ao,ao_num) + double precision, intent(out) :: A_mo(LDA_mo,mo_num) + double precision, allocatable :: T(:,:) + + allocate ( T(ao_num,mo_num) ) + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: T + + call dgemm('N','N', ao_num, mo_num, ao_num, & + 1.d0, A_ao,LDA_ao, & + mo_coef, size(mo_coef,1), & + 0.d0, T, size(T,1)) + + call dgemm('T','N', mo_num, mo_num, ao_num, & + 1.d0, mo_coef,size(mo_coef,1), & + T, ao_num, & + 0.d0, A_mo, size(A_mo,1)) + + deallocate(T) +end + +subroutine run_sort_by_fock_energies() + + implicit none + + BEGIN_DOC + ! Saves the current MOs ordered by diagonal element of the Fock operator. + END_DOC + + integer :: i,j,k,l,tmp_i,tmp_k,tmp_list_size + integer, allocatable :: iorder(:), tmp_list(:) + double precision, allocatable :: fock_energies_tmp(:), tmp_mo_coef(:,:) + + ! Test + do l = 1, 4 + if (l==1) then ! core + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + tmp_list_size = dim_list_inact_orb + else ! virt + tmp_list_size = dim_list_virt_orb + endif + + if (tmp_list_size >= 2) then + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + print*,'MO class: ',trim(mo_class(tmp_list(1))) + + allocate(iorder(tmp_list_size), fock_energies_tmp(tmp_list_size), tmp_mo_coef(ao_num,tmp_list_size)) + !print*,'MOs before sorting them by f_p^p energies:' + do i = 1, tmp_list_size + tmp_i = tmp_list(i) + fock_energies_tmp(i) = Fock_matrix_diag_mo(tmp_i) + iorder(i) = i + !print*, tmp_i, fock_energies_tmp(i) + enddo + + call dsort(fock_energies_tmp, iorder, tmp_list_size) + + print*,'MOs after sorting them by f_p^p energies:' + do i = 1, tmp_list_size + k = iorder(i) + tmp_k = tmp_list(k) + print*, tmp_k, fock_energies_tmp(k) + do j = 1, ao_num + tmp_mo_coef(j,k) = mo_coef(j,tmp_k) + enddo + enddo + + ! Update the MOs after sorting them by energies + do i = 1, tmp_list_size + tmp_i = tmp_list(i) + do j = 1, ao_num + mo_coef(j,tmp_i) = tmp_mo_coef(j,i) + enddo + enddo + + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + print*,'' + + deallocate(iorder, fock_energies_tmp, tmp_list, tmp_mo_coef) + endif + + enddo + + touch mo_coef + call save_mos + +end + +function is_core(i) + + implicit none + + BEGIN_DOC + ! True if the orbital i is a core orbital + END_DOC + + integer, intent(in) :: i + logical :: is_core + + integer :: j + + ! Init + is_core = .False. + + ! Search + do j = 1, dim_list_core_orb + if (list_core(j) == i) then + is_core = .True. + exit + endif + enddo + +end + +function is_del(i) + + implicit none + + BEGIN_DOC + ! True if the orbital i is a deleted orbital + END_DOC + + integer, intent(in) :: i + logical :: is_del + + integer :: j + + ! Init + is_del = .False. + + ! Search + do j = 1, dim_list_core_orb + if (list_core(j) == i) then + is_del = .True. + exit + endif + enddo + +end + +subroutine set_classes_loc() + + implicit none + + integer :: i + logical :: ok1, ok2 + logical :: is_core, is_del + integer(bit_kind) :: res(N_int,2) + + if (auto_mo_class) then + do i = 1, mo_num + if (is_core(i)) cycle + if (is_del(i)) cycle + call apply_hole(psi_det(1,1,1), 1, i, res, ok1, N_int) + call apply_hole(psi_det(1,1,1), 2, i, res, ok2, N_int) + if (ok1 .and. ok2) then + mo_class(i) = 'Inactive' + else if (.not. ok1 .and. .not. ok2) then + mo_class(i) = 'Virtual' + else + mo_class(i) = 'Active' + endif + enddo + touch mo_class + endif + +end + +subroutine unset_classes_loc() + + implicit none + + integer :: i + logical :: ok1, ok2 + logical :: is_core, is_del + integer(bit_kind) :: res(N_int,2) + + if (auto_mo_class) then + do i = 1, mo_num + if (is_core(i)) cycle + if (is_del(i)) cycle + mo_class(i) = 'Active' + enddo + touch mo_class + endif + +end diff --git a/src/mo_localization/org/TANGLE_org_mode.sh b/src/mo_localization/org/TANGLE_org_mode.sh new file mode 100755 index 00000000..059cbe7d --- /dev/null +++ b/src/mo_localization/org/TANGLE_org_mode.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +list='ls *.org' +for element in $list +do + emacs --batch $element -f org-babel-tangle +done diff --git a/src/mo_localization/org/break_spatial_sym.org b/src/mo_localization/org/break_spatial_sym.org new file mode 100644 index 00000000..d82f1c60 --- /dev/null +++ b/src/mo_localization/org/break_spatial_sym.org @@ -0,0 +1,28 @@ +! A small program to break the spatial symmetry of the MOs. + +! You have to defined your MO classes or set security_mo_class to false +! with: +! qp set orbital_optimization security_mo_class false + +! The default angle for the rotations is too big for this kind of +! application, a value between 1e-3 and 1e-6 should break the spatial +! symmetry with just a small change in the energy. + +#+BEGIN_SRC f90 :comments org :tangle break_spatial_sym.irp.f +program break_spatial_sym + + !BEGIN_DOC + ! Break the symmetry of the MOs with a rotation + !END_DOC + + implicit none + + kick_in_mos = .True. + TOUCH kick_in_mos + + call set_classes_loc + call apply_pre_rotation + call unset_classes_loc + +end +#+END_SRC diff --git a/src/mo_localization/org/debug_gradient_loc.org b/src/mo_localization/org/debug_gradient_loc.org new file mode 100644 index 00000000..6d147dd0 --- /dev/null +++ b/src/mo_localization/org/debug_gradient_loc.org @@ -0,0 +1,67 @@ +#+BEGIN_SRC f90 :comments org :tangle debug_gradient_loc.irp.f +program debug_gradient_loc + + !BEGIN_DOC + ! Check if the gradient is correct + !END_DOC + + implicit none + + integer :: list_size, n + integer, allocatable :: list(:) + double precision, allocatable :: v_grad(:), v_grad2(:) + double precision :: norm, max_elem, threshold, max_error + integer :: i, nb_error + + threshold = 1d-12 + + list_size = dim_list_act_orb + + allocate(list(list_size)) + + list = list_act + + n = list_size*(list_size-1)/2 + + allocate(v_grad(n),v_grad2(n)) + + if (localization_method == 'boys') then + print*,'Foster-Boys' + call gradient_FB(n,list_size,list,v_grad,max_elem,norm) + call gradient_FB_omp(n,list_size,list,v_grad2,max_elem,norm) + elseif (localization_method == 'pipek') then + print*,'Pipek-Mezey' + call gradient_PM(n,list_size,list,v_grad,max_elem,norm) + call gradient_PM(n,list_size,list,v_grad2,max_elem,norm) + else + print*,'Unknown localization_method, please select boys or pipek' + call abort + endif + + do i = 1, n + print*,i,v_grad(i) + enddo + + v_grad = v_grad - v_grad2 + + nb_error = 0 + max_elem = 0d0 + + do i = 1, n + if (dabs(v_grad(i)) > threshold) then + print*,v_grad(i) + nb_error = nb_error + 1 + if (dabs(v_grad(i)) > max_elem) then + max_elem = v_grad(i) + endif + endif + enddo + + print*,'Threshold error', threshold + print*, 'Nb error', nb_error + print*,'Max error', max_elem + + deallocate(v_grad,v_grad2) + +end +#+END_SRC diff --git a/src/mo_localization/org/debug_hessian_loc.org b/src/mo_localization/org/debug_hessian_loc.org new file mode 100644 index 00000000..e47cf38d --- /dev/null +++ b/src/mo_localization/org/debug_hessian_loc.org @@ -0,0 +1,67 @@ +#+BEGIN_SRC f90 :comments org :tangle debug_hessian_loc.irp.f +program debug_hessian_loc + + !BEGIN_DOC + ! Check if the hessian is correct + !END_DOC + + implicit none + + integer :: list_size, n + integer, allocatable :: list(:) + double precision, allocatable :: H(:), H2(:) + double precision :: threshold, max_error, max_elem + integer :: i, nb_error + + threshold = 1d-12 + + list_size = dim_list_act_orb + + allocate(list(list_size)) + + list = list_act + + n = list_size*(list_size-1)/2 + + allocate(H(n),H2(n)) + + if (localization_method == 'boys') then + print*,'Foster-Boys' + call hessian_FB(n,list_size,list,H) + call hessian_FB_omp(n,list_size,list,H2) + elseif(localization_method == 'pipek') then + print*,'Pipek-Mezey' + call hessian_PM(n,list_size,list,H) + call hessian_PM(n,list_size,list,H2) + else + print*,'Unknown localization_method, please select boys or pipek' + call abort + endif + + do i = 1, n + print*,i,H(i) + enddo + + H = H - H2 + + nb_error = 0 + max_elem = 0d0 + + do i = 1, n + if (dabs(H(i)) > threshold) then + print*,H(i) + nb_error = nb_error + 1 + if (dabs(H(i)) > max_elem) then + max_elem = H(i) + endif + endif + enddo + + print*,'Threshold error', threshold + print*, 'Nb error', nb_error + print*,'Max error', max_elem + + deallocate(H,H2) + +end +#+END_SRC diff --git a/src/mo_localization/org/kick_the_mos.org b/src/mo_localization/org/kick_the_mos.org new file mode 100644 index 00000000..c0c6c02d --- /dev/null +++ b/src/mo_localization/org/kick_the_mos.org @@ -0,0 +1,18 @@ +#+BEGIN_SRC f90 :comments org :tangle kick_the_mos.irp.f +program kick_the_mos + + !BEGIN_DOC + ! To do a small rotation of the MOs + !END_DOC + + implicit none + + kick_in_mos = .True. + TOUCH kick_in_mos + + call set_classes_loc + call apply_pre_rotation + call unset_classes_loc + +end +#+END_SRC diff --git a/src/mo_localization/org/localization.org b/src/mo_localization/org/localization.org new file mode 100644 index 00000000..aaf9f18d --- /dev/null +++ b/src/mo_localization/org/localization.org @@ -0,0 +1,2899 @@ +* Orbital localization + +Molecular orbitals localization + +** Doc + +The program localizes the orbitals in function of their mo_class: +- core MOs +- inactive MOs +- active MOs +- virtual MOs +- deleted MOs -> no orbital localization + +Core MOs are localized with core MOs, inactives MOs are localized with +inactives MOs and so on. But deleted orbitals are not localized. + +WARNING: +- The user MUST SPECIFY THE MO CLASSES, otherwise if default mo class + is false the localization will be done for all the orbitals between + them, so the occupied and virtual MOs will be combined together + which is clearly not what we want to do. If default lpmo class is true + the localization will be done for the core, occupied and virtual + orbitals, but pay attention the mo_class are not deleted after... +- The mo class is not important (except "deleted") because it is not + link to the kind of MOs for CASSCF or CIPSI. It is just a way to + separate the MOs in order to localize them separetely, for example + to separate the core MOs, the occupied MOs and the virtuals MOs. +- The user MUST CHANGE THE MO CLASSES AFTER THE LOCALIZATION in order + to have the right mo class for his next calculation... + +For more information on the mo_class: +lpqp set_mo_class -h + +*** Foster-Boys localization +Foster-Boys localization: +- cite Foster +Boys, S. F., 1960, Rev. Mod. Phys. 32, 296. +DOI:https://doi.org/10.1103/RevModPhys.32.300 +Boys, S. F., 1966, in Quantum Theory of Atoms, Molecules, +and the Solid State, edited by P.-O. Löwdin (Academic +Press, New York), p. 253. +Daniel A. Kleier, Thomas A. Halgren, John H. Hall Jr., and William +N. Lipscomb, J. Chem. Phys. 61, 3905 (1974) +doi: 10.1063/1.1681683 +Høyvik, I.-M., Jansik, B., Jørgensen, P., J. Comput. Chem. 2013, 34, +1456– 1462. DOI: 10.1002/jcc.23281 +Høyvik, I.-M., Jansik, B., Jørgensen, P., J. Chem. Theory +Comput. 2012, 8, 9, 3137–3146 +DOI: https://doi.org/10.1021/ct300473g +Høyvik, I.-M., Jansik, B., Jørgensen, P., J. Chem. Phys. 137, 224114 +(2012) +DOI: https://doi.org/10.1063/1.4769866 +Nicola Marzari, Arash A. Mostofi, Jonathan R. Yates, Ivo Souza, and David Vanderbilt +Rev. Mod. Phys. 84, 1419 +https://doi.org/10.1103/RevModPhys.84.1419 + +The Foster-Boys localization is a method to generate localized MOs +(LMOs) by minimizing the Foster-Boys criterion: +$$ C_{FB} = \sum_{i=1}^N \left[ < \phi_i | r^2 | \phi_i > - < \phi_i | r | +\phi_i >^2 \right] $$. +In fact it is equivalent to maximise +$$ C_2 = \sum_{i>j, \ i=1}^N \left[ < \phi_i | r | \phi_i > - < +\phi_j | r | \phi_j > \left]^2$$ +or +$$ C_3 = \sum_{i=1}^N \left[ < \phi_i | r | \phi_i > \right]^2.$$ + +Noting +$$A_{ii} = < \phi_i | r^2 | \phi_i > $$ +$$B_{ii} = < \phi_i | r | \phi_i > $$ + +$$ \beta = (B_{pp} - B_{qq})^2 - 4 B_{pq}^2 $$ +$$ \gamma = 4 B_{pq} (B_{pp} - B_{qq}) $$ + +\begin{align*} +C_{FB}(\theta) &= \sum_{i=1}^N \left[ A_{ii} - B_{ii}^2 \right] \\ +&- \left[ A_{pp} - B_{pp}^2 + A_{qq} - B_{qq}^2 \right] \\ +&+ \left[ A_{pp} + A_{qq} - B_{pp}^2 - B_{qq}^2 ++ \frac{1}{4} [(1-\cos(4\theta) \beta + \sin(4\theta) \gamma] \right] \\ +&= C_1(\theta=0) + \frac{1}{4} [(1-\cos(4\theta)) \beta + \sin(4\theta) \gamma] +\end{align*} + +The derivatives are: +\begin{align*} +\frac{\partial C_{FB}(\theta)}{\partial \theta} = \beta \sin(4\theta) + \gamma \cos(4 \theta) +\end{align*} + +\begin{align*} +\frac{\partial^2 C_{FB}(\theta)}{\partial \theta^2} = 4 \beta \cos(4\theta) - 4 \gamma \sin(4 \theta) +\end{align*} + +Similarly: +\begin{align*} +C_3(\theta) &= \sum_{i=1}^N [B_{ii}^2] \\ +&- B_{pp}^2 - B_{qq}^2 \\ +&+ B_{pp}^2 + B_{qq}^2 - \frac{1}{4} [(1-\cos(4\theta) \beta + \sin(4\theta) \gamma] \\ +&= C_3(\theta=0) - \frac{1}{4} [(1-\cos(4\theta)) \beta + \sin(4\theta) \gamma] +\end{align*} + +The derivatives are: +\begin{align*} +\frac{\partial C_3(\theta)}{\partial \theta} = - \beta \sin(4\theta) - \gamma \cos(4 \theta) +\end{align*} + +\begin{align*} +\frac{\partial^2 C_3(\theta)}{\partial \theta^2} = - 4 \beta \cos(4\theta) + 4 \gamma \sin(4 \theta) +\end{align*} + +And since we compute the derivatives around $\theta = 0$ (around the +actual position) we have: +\begin{align*} +\left. \frac{\partial{C_{FB}(\theta)}}{\partial \theta}\right|_{\theta=0} = \gamma +\end{align*} + +\begin{align*} +\left. \frac{\partial^2 C_{FB}(\theta)}{\partial \theta^2}\right|_{\theta=0} = 4 \beta +\end{align*} + +Locality of the orbitals: +- cite Hoyvik +As the Foster-Boys method tries to minimize the sum of the second +moment MO spread, the locality of each MO can be expressed as the +second moment of the MO spread. For the MO i, the locality criterion is +\begin{align*} +\sigma_i &= \sqrt{ - ^2} \\ +&= \sqrt{ - ^2 + - ^2 + - ^2} +\end{align*} + + +*** Pipek-Mezey localization +-cite pipek mezey 1989 +J. Pipek, P. G. Mezey, J. Chem. Phys. 90, 4916 (1989) +DOI: 10.1063/1.456588 + +Foster-Boys localization does not preserve the $\sigma - \pi$ separation of the +MOs, it leads to "banana" orbitals. The Pipek-Mezey localization +normally preserves this separation. + +The optimum functional $\mathcal{P}$ is obtained for the maximum of +$D^{-1}$ +\begin{align*} +\mathcal{P} = \sum_{i=1}^n \sum_{A=1}^N \left[ \right]^2 +\end{align*} + +As for the Foster Boys localization, the change in the functional for +the rotation of two MOs can be obtained using very similar terms +\begin{align*} +\beta_{st}^{PM} = \sum_{A=1}^N \left( ^2 - \frac{1}{4} \left[ - \right]^2 \right) +\end{align*} +\begin{align*} +\gamma_{st}^{PM} = \sum_{A=1}^N \left[ - \right] +\end{align*} +The matrix element of the operator $P_A$ are obtained using +\begin{align*} +<\rho | \tilde{\mu}> = \delta_{\rho \mu} +\end{align*} +which leads to +\begin{align*} + = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right] +\end{align*} +$\sum_{\rho}$ -> sum over all the AOs +$\sum_{\mu \in A}$ -> sum over the AOs which belongs to atom A +$c^t$ -> expansion coefficient of orbital |t> + +So similarly the first and second derivatives are + +\begin{align*} +\left. \frac{\partial \mathcal{P} (\theta)}{\partial \theta} \right|_{\theta=0}= \gamma^{PM} +\end{align*} + +\begin{align*} +\left. \frac{\partial^2 \mathcal{P} (\theta)}{\partial \theta^2}\right|_{\theta=0} = 4 \beta^{PM} +\end{align*} + +** Localization procedure + +Localization procedure: + +To do the localization we compute the gradient and the +diagonal hessian of the Foster-Boys criterion with respect to the MO +rotations and we minimize it with the Newton method. + +In order to avoid the problem of starting on a saddle point, the +localization procedure starts by giving a little kick in the MOs, by +putting "kick in mos" true, in order to break the symmetry and escape +from a possible saddle point. + +In order to speed up the iteration we compute the gradient, the +diagonal hessian and the step in temporary matrices of the size +(number MOs in mo class by number MOs in mo class) + +** Remarks + +Variables: + +The indexes i and j refere to the positions of the elements in +the "full space", i.e., the arrays containing elements for all the MOs, +but the indexes tmp_i and tmp_j to the positions of the elements in +the "reduced space/subspace", i.e., the arrays containing elements for +a restricted number of MOs. +Example: +The gradient for the localization of the core MOs can be expressed +as a vector of length mo_num*(mo_num-1)/2 with only +n_core_orb*(n_core_orb-1)/2 non zero elements, so it is more relevant +to use a vector of size n_act_orb*(n_core_orb-1)/2. +So here the gradient is a vector of size +tmp_list_size*(tmp_list_size)/2 where tmp_list_size is the number of +MOs is the corresponding mo class. +The same thing happened for the hessian, the matrix containing the +step and the rotation matrix, which are tmp_list_size by tmp_list_size +matrices. + +Ex gradient for 4 core orbitales: +\begin{align*} +\begin{pmatrix} +0 & -a & -b & -d & \hdots & 0 \\ +a & 0 & -c & -e & \hdots & 0 \\ +b & c & 0 & -f & \hdots & 0 \\ +d & e & f & 0 & \hdots & 0 \\ +\vdots & \vdots & \vdots & \vdots & \ddots & \vdots \\ +0 & 0 & 0 & 0 & \hdots & 0 \\ +\end{pmatrix} +\Rightarrow +\begin{pmatrix} +a \\ +b \\ +c \\ +e \\ +f \\ +0 \\ +\vdots \\ +0 \\ +\end{pmatrix} +\end{align*} + +\begin{align*} +\begin{pmatrix} +0 & -a & -b & -d & \hdots & 0 \\ +a & 0 & -c & -e & \hdots & 0 \\ +b & c & 0 & -f & \hdots & 0 \\ +d & e & f & 0 & \hdots & 0 \\ +\vdots & \vdots & \vdots & \vdots & \ddots & \vdots \\ +0 & 0 & 0 & 0 & \hdots & 0 \\ +\end{pmatrix} +\Rightarrow +\begin{pmatrix} +0 & -a & -b & -d \\ +a & 0 & -c & -e \\ +b & c & 0 & -f \\ +d & e & f & 0 \\ +\end{pmatrix} +\Rightarrow +\begin{pmatrix} +a \\ +b \\ +c \\ +e \\ +f \\ +\end{pmatrix} +\end{align*} + +The same thing can be done if indexes of the orbitales are not +consecutives since it's done with lists of MOs: + +\begin{align*} +\begin{pmatrix} +0 & -a & 0 & -b & -d & \hdots & 0 \\ +a & 0 & 0 & -c & -e & \hdots & 0 \\ +0 & 0 & 0 & 0 & 0 & \hdots & 0 \\ +b & c & 0 & 0 & -f & \hdots & 0 \\ +d & e & 0 & f & 0 & \hdots & 0 \\ +\vdots & \vdots & \vdots & \vdots & \vdots & \ddots & \vdots \\ +0 & 0 & 0 & 0 & 0 & \hdots & 0 \\ +\end{pmatrix} +\Rightarrow +\begin{pmatrix} +0 & -a & -b & -d \\ +a & 0 & -c & -e \\ +b & c & 0 & -f \\ +d & e & f & 0 \\ +\end{pmatrix} +\Rightarrow +\begin{pmatrix} +a \\ +b \\ +c \\ +e \\ +f \\ +\end{pmatrix} +\end{align*} + +The dipoles are updated using the "ao to mo" subroutine without the +"restore symmetry" which is actually in N^4 but can be rewrite in N^2 +log(N^2). +The bottleneck of the program is normally N^3 with the matrix +multiplications/diagonalizations. The use of the full hessian can be +an improvement but it will scale in N^4... + +** Program + +#+BEGIN_SRC f90 org :tangle localization.irp.f +program localization + + implicit none + + call set_classes_loc + call run_localization + call unset_classes_loc + +end +#+END_SRC + + +Variables: +| pre_rot(mo_num, mo_num) | double precision | Matrix for the pre rotation | +| R(mo_num,mo_num) | double precision | Rotation matrix | +| tmp_R(:,:) | double precision | Rottation matrix in a subsapce | +| prev_mos(ao_num, mo_num) | double precision | Previous mo_coef | +| spatial_extent(mo_num) | double precision | Spatial extent of the orbitals | +| criterion | double precision | Localization criterion | +| prev_criterion | double precision | Previous criterion | +| criterion_model | double precision | Estimated next criterion | +| rho | double precision | Ratio to measure the agreement between the model | +| | | and the reality | +| delta | double precision | Radisu of the trust region | +| norm_grad | double precision | Norm of the gradient | +| info | integer | for dsyev from Lapack | +| max_elem | double precision | maximal element in the gradient | +| v_grad(:) | double precision | Gradient | +| H(:,:) | double precision | Hessian (diagonal) | +| e_val(:) | double precision | Eigenvalues of the hessian | +| W(:,:) | double precision | Eigenvectors of the hessian | +| tmp_x(:) | double precision | Step in 1D (in a subaspace) | +| tmp_m_x(:,:) | double precision | Step in 2D (in a subaspace) | +| tmp_list(:) | double precision | List of MOs in a mo_class | +| i,j,k | integer | Indexes in the full MO space | +| tmp_i, tmp_j, tmp_k | integer | Indexes in a subspace | +| l | integer | Index for the mo_class | +| key(:) | integer | Key to sort the eigenvalues of the hessian | +| nb_iter | integer | Number of iterations | +| must_exit | logical | To exit the trust region loop | +| cancel_step | logical | To cancel a step | +| not_*converged | logical | To localize the different mo classes | +| t* | double precision | To measure the time | +| n | integer | mo_num*(mo_num-1)/2, number of orbital parameters | +| tmp_n | integer | dim_subspace*(dim_subspace-1)/2 | +| | | Number of dimension in the subspace | + +Variables in qp_edit for the localization: +| localization_method | +| localization_max_nb_iter | +| default_mo_class | +| thresh_loc_max_elem_grad | +| kick_in_mos | +| angle_pre_rot | + ++ all the variables for the trust region + +Cf. qp_edit orbital optimization + +#+BEGIN_SRC f90 :comments org :tangle localization.irp.f +subroutine run_localization + + include 'pi.h' + + BEGIN_DOC + ! Orbital localization + END_DOC + + implicit none + + ! Variables + double precision, allocatable :: pre_rot(:,:), R(:,:) + double precision, allocatable :: prev_mos(:,:), spatial_extent(:), tmp_R(:,:) + double precision :: criterion, norm_grad + integer :: i,j,k,l,p, tmp_i, tmp_j, tmp_k + integer :: info + integer :: n, tmp_n, tmp_list_size + double precision, allocatable :: v_grad(:), H(:), tmp_m_x(:,:), tmp_x(:),W(:),e_val(:) + double precision :: max_elem, t1, t2, t3, t4, t5, t6 + integer, allocatable :: tmp_list(:), key(:) + double precision :: prev_criterion, rho, delta, criterion_model + integer :: nb_iter, nb_sub_iter + logical :: not_converged, not_core_converged + logical :: not_act_converged, not_inact_converged, not_virt_converged + logical :: use_trust_region, must_exit, cancel_step,enforce_step_cancellation + + n = mo_num*(mo_num-1)/2 + + ! Allocation + allocate(spatial_extent(mo_num)) + allocate(pre_rot(mo_num, mo_num), R(mo_num, mo_num)) + allocate(prev_mos(ao_num, mo_num)) + + ! Locality before the localization + call compute_spatial_extent(spatial_extent) + + ! Choice of the method + print*,'' + print*,'Localization method:',localization_method + if (localization_method == 'boys') then + print*,'Foster-Boys localization' + elseif (localization_method == 'pipek') then + print*,'Pipek-Mezey localization' + else + print*,'Unknown localization_method, please select boys or pipek' + call abort + endif + print*,'' + + ! Localization criterion (FB, PM, ...) for each mo_class + print*,'### Before the pre rotation' + + ! Debug + if (debug_hf) then + print*,'HF energy:', HF_energy + endif + + do l = 1, 4 + if (l==1) then ! core + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + tmp_list_size = dim_list_inact_orb + else ! virt + tmp_list_size = dim_list_virt_orb + endif + + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + + if (tmp_list_size >= 2) then + call criterion_localization(tmp_list_size, tmp_list,criterion) + print*,'Criterion:', criterion, mo_class(tmp_list(1)) + endif + + deallocate(tmp_list) + + enddo + + ! Debug + !print*,'HF', HF_energy + +#+END_SRC + +** Loc +#+BEGIN_SRC f90 :comments org :tangle localization.irp.f + ! Pre rotation, to give a little kick in the MOs + call apply_pre_rotation() + + ! Criterion after the pre rotation + ! Localization criterion (FB, PM, ...) for each mo_class + print*,'### After the pre rotation' + + ! Debug + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + + do l = 1, 4 + if (l==1) then ! core + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + tmp_list_size = dim_list_inact_orb + else ! virt + tmp_list_size = dim_list_virt_orb + endif + + if (tmp_list_size >= 2) then + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + + call criterion_localization(tmp_list_size, tmp_list,criterion) + print*,'Criterion:', criterion, trim(mo_class(tmp_list(1))) + + deallocate(tmp_list) + endif + + enddo + + ! Debug + !print*,'HF', HF_energy + + print*,'' + print*,'========================' + print*,' Orbital localization' + print*,'========================' + print*,'' + + !Initialization + not_converged = .TRUE. + + ! To do the localization only if there is at least 2 MOs + if (dim_list_core_orb >= 2) then + not_core_converged = .TRUE. + else + not_core_converged = .FALSE. + endif + + if (dim_list_act_orb >= 2) then + not_act_converged = .TRUE. + else + not_act_converged = .FALSE. + endif + + if (dim_list_inact_orb >= 2) then + not_inact_converged = .TRUE. + else + not_inact_converged = .FALSE. + endif + + if (dim_list_virt_orb >= 2) then + not_virt_converged = .TRUE. + else + not_virt_converged = .FALSE. + endif + + ! Loop over the mo_classes + do l = 1, 4 + + if (l==1) then ! core + not_converged = not_core_converged + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + not_converged = not_act_converged + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + not_converged = not_inact_converged + tmp_list_size = dim_list_inact_orb + else ! virt + not_converged = not_virt_converged + tmp_list_size = dim_list_virt_orb + endif + + ! Next iteration if converged = true + if (.not. not_converged) then + cycle + endif + + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + + ! Display + if (not_converged) then + print*,'' + print*,'###', trim(mo_class(tmp_list(1))), 'MOs ###' + print*,'' + endif + + ! Size for the 2D -> 1D transformation + tmp_n = tmp_list_size * (tmp_list_size - 1)/2 + + ! Without hessian + trust region + if (.not. localization_use_hessian) then + + ! Allocation of temporary arrays + allocate(v_grad(tmp_n), tmp_m_x(tmp_list_size, tmp_list_size)) + allocate(tmp_R(tmp_list_size, tmp_list_size), tmp_x(tmp_n)) + + ! Criterion + call criterion_localization(tmp_list_size, tmp_list, prev_criterion) + + ! Init + nb_iter = 0 + delta = 1d0 + + !Loop + do while (not_converged) + + print*,'' + print*,'***********************' + print*,'Iteration', nb_iter + print*,'***********************' + print*,'' + + ! Angles of rotation + call theta_localization(tmp_list, tmp_list_size, tmp_m_x, max_elem) + tmp_m_x = - tmp_m_x * delta + + ! Rotation submatrix + call rotation_matrix(tmp_m_x, tmp_list_size, tmp_R, tmp_list_size, tmp_list_size, & + info, enforce_step_cancellation) + + ! To ensure that the rotation matrix is unitary + if (enforce_step_cancellation) then + print*, 'Step cancellation, too large error in the rotation matrix' + delta = delta * 0.5d0 + cycle + else + delta = min(delta * 2d0, 1d0) + endif + + ! Full rotation matrix and application of the rotation + call sub_to_full_rotation_matrix(tmp_list_size, tmp_list, tmp_R, R) + call apply_mo_rotation(R, prev_mos) + + ! Update the needed data + call update_data_localization() + + ! New criterion + call criterion_localization(tmp_list_size, tmp_list, criterion) + print*,'Criterion:', trim(mo_class(tmp_list(1))), nb_iter, criterion + print*,'Max elem :', max_elem + print*,'Delta :', delta + + nb_iter = nb_iter + 1 + + ! Exit + if (nb_iter >= localization_max_nb_iter .or. dabs(max_elem) < thresh_loc_max_elem_grad) then + not_converged = .False. + endif + enddo + + ! Save the changes + call update_data_localization() + call save_mos() + TOUCH mo_coef + + ! Deallocate + deallocate(v_grad, tmp_m_x, tmp_list) + deallocate(tmp_R, tmp_x) + + ! Trust region + else + + ! Allocation of temporary arrays + allocate(v_grad(tmp_n), H(tmp_n), tmp_m_x(tmp_list_size, tmp_list_size)) + allocate(tmp_R(tmp_list_size, tmp_list_size)) + allocate(tmp_x(tmp_n), W(tmp_n), e_val(tmp_n), key(tmp_n)) + + ! ### Initialization ### + delta = 0d0 ! can be deleted (normally) + nb_iter = 0 ! Must start at 0 !!! + rho = 0.5d0 ! Must be 0.5 + + ! Compute the criterion before the loop + call criterion_localization(tmp_list_size, tmp_list, prev_criterion) + + ! Loop until the convergence + do while (not_converged) + + print*,'' + print*,'***********************' + print*,'Iteration', nb_iter + print*,'***********************' + print*,'' + + ! Gradient + call gradient_localization(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + ! Diagonal hessian + call hessian_localization(tmp_n, tmp_list_size, tmp_list, H) + + ! Diagonalization of the diagonal hessian by hands + !call diagonalization_hessian(tmp_n,H,e_val,w) + do i = 1, tmp_n + e_val(i) = H(i) + enddo + + ! Key list for dsort + do i = 1, tmp_n + key(i) = i + enddo + + ! Sort of the eigenvalues + call dsort(e_val, key, tmp_n) + + ! Eigenvectors + W = 0d0 + do i = 1, tmp_n + W(i) = dble(key(i)) + enddo + + ! To enter in the loop just after + cancel_step = .True. + nb_sub_iter = 0 + + ! Loop to reduce the trust radius until the criterion decreases and rho >= thresh_rho + do while (cancel_step) + print*,'-----------------------------' + print*, mo_class(tmp_list(1)) + print*,'Iteration:', nb_iter + print*,'Sub iteration:', nb_sub_iter + print*,'Max elem grad:', max_elem + print*,'-----------------------------' + + ! Hessian,gradient,Criterion -> x + call trust_region_step_w_expected_e(tmp_n,1, H, W, e_val, v_grad, prev_criterion, & + rho, nb_iter, delta, criterion_model, tmp_x, must_exit) + + ! Internal loop exit condition + if (must_exit) then + print*,'trust_region_step_w_expected_e sent: Exit' + exit + endif + + ! 1D tmp -> 2D tmp + call vec_to_mat_v2(tmp_n, tmp_list_size, tmp_x, tmp_m_x) + + ! Rotation submatrix (square matrix tmp_list_size by tmp_list_size) + call rotation_matrix(tmp_m_x, tmp_list_size, tmp_R, tmp_list_size, tmp_list_size, & + info, enforce_step_cancellation) + + if (enforce_step_cancellation) then + print*, 'Step cancellation, too large error in the rotation matrix' + rho = 0d0 + cycle + endif + + ! tmp_R to R, subspace to full space + call sub_to_full_rotation_matrix(tmp_list_size, tmp_list, tmp_R, R) + + ! Rotation of the MOs + call apply_mo_rotation(R, prev_mos) + + ! Update the things related to mo_coef + call update_data_localization() + + ! Update the criterion + call criterion_localization(tmp_list_size, tmp_list, criterion) + print*,'Criterion:', trim(mo_class(tmp_list(1))), nb_iter, criterion + + ! Criterion -> step accepted or rejected + call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, & + criterion_model, rho, cancel_step) + + ! Cancellation of the step, previous MOs + if (cancel_step) then + mo_coef = prev_mos + endif + + nb_sub_iter = nb_sub_iter + 1 + enddo + !call save_mos() !### depend of the time for 1 iteration + + ! To exit the external loop if must_exti = .True. + if (must_exit) then + exit + endif + + ! Step accepted, nb iteration + 1 + nb_iter = nb_iter + 1 + + ! External loop exit conditions + if (DABS(max_elem) < thresh_loc_max_elem_grad) then + not_converged = .False. + endif + if (nb_iter > localization_max_nb_iter) then + not_converged = .False. + endif + enddo + + ! Deallocation of temporary arrays + deallocate(v_grad, H, tmp_m_x, tmp_R, tmp_list, tmp_x, W, e_val, key) + + ! Save the MOs + call save_mos() + TOUCH mo_coef + + ! Debug + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + + endif + enddo + + ! Seems unecessary + TOUCH mo_coef + + ! To sort the MOs using the diagonal elements of the Fock matrix + if (sort_mos_by_e) then + call run_sort_by_fock_energies() + endif + + ! Debug + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + + ! Locality after the localization + call compute_spatial_extent(spatial_extent) + +end +#+END_SRC + +** Gathering +Gradient/hessian/criterion for the localization: +They are chosen in function of the localization method + +Gradient: + +qp_edit : +| localization_method | method for the localization | + +Input: +| tmp_n | integer | Number of parameters in the MO subspace | +| tmp_list_size | integer | Number of MOs in the mo_class we want to localize | +| tmp_list(tmp_list_size) | integer | MOs in the mo_class | + +Output: +| v_grad(tmp_n) | double precision | Gradient in the subspace | +| max_elem | double precision | Maximal element in the gradient | +| norm_grad | double precision | Norm of the gradient | + + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine gradient_localization(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the gradient of the chosen localization method + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + + if (localization_method == 'boys') then + call gradient_FB_omp(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + !call gradient_FB(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + elseif (localization_method== 'pipek') then + call gradient_PM(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + else + print*,'Unkown method:'//localization_method + call abort + endif + +end +#+END_SRC + +Hessian: + +Output: +| H(tmp_n,tmp_n) | double precision | Gradient in the subspace | +| max_elem | double precision | Maximal element in the gradient | +| norm_grad | double precision | Norm of the gradient | + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine hessian_localization(tmp_n, tmp_list_size, tmp_list, H) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the diagonal hessian of the chosen localization method + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n) + + if (localization_method == 'boys') then + call hessian_FB_omp(tmp_n, tmp_list_size, tmp_list, H) + !call hessian_FB(tmp_n, tmp_list_size, tmp_list, H) ! non OMP for debugging + elseif (localization_method == 'pipek') then + call hessian_PM(tmp_n, tmp_list_size, tmp_list, H) + else + print*,'Unkown method: '//localization_method + call abort + endif + +end +#+END_SRC + +Criterion: + +Output: +| criterion | double precision | Criterion for the orbital localization | + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine criterion_localization(tmp_list_size, tmp_list,criterion) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the localization criterion of the chosen localization method + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + + if (localization_method == 'boys') then + call criterion_FB(tmp_list_size, tmp_list, criterion) + elseif (localization_method == 'pipek') then + !call criterion_PM(tmp_list_size, tmp_list,criterion) + call criterion_PM_v3(tmp_list_size, tmp_list, criterion) + else + print*,'Unkown method: '//localization_method + call abort + endif + +end +#+END_SRC + +Subroutine to update the datas needed for the localization +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine update_data_localization() + + include 'pi.h' + + implicit none + + if (localization_method == 'boys') then + ! Update the dipoles + call ao_to_mo_no_sym(ao_dipole_x, ao_num, mo_dipole_x, mo_num) + call ao_to_mo_no_sym(ao_dipole_y, ao_num, mo_dipole_y, mo_num) + call ao_to_mo_no_sym(ao_dipole_z, ao_num, mo_dipole_z, mo_num) + elseif (localization_method == 'pipek') then + ! Nothing required + else + print*,'Unkown method: '//localization_method + call abort + endif +end +#+END_SRC + +Angles: + +Output: +| tmp_m_x(tmp_list_size, tmp_list_size) | double precision | Angles for the rotations in the subspace | +| max_elem | double precision | Maximal angle | + + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine theta_localization(tmp_list, tmp_list_size, tmp_m_x, max_elem) + + include 'pi.h' + + implicit none + + BEGIN_DOC + ! Compute the rotation angles between the MOs for the chosen localization method + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: tmp_m_x(tmp_list_size,tmp_list_size), max_elem + + if (localization_method == 'boys') then + call theta_FB(tmp_list, tmp_list_size, tmp_m_x, max_elem) + elseif (localization_method== 'pipek') then + call theta_PM(tmp_list, tmp_list_size, tmp_m_x, max_elem) + else + print*,'Unkown method: '//localization_method + call abort + endif + +end +#+END_SRC + +** Foster-Boys +*** Gradient +Input: +| tmp_n | integer | Number of parameters in the MO subspace | +| tmp_list_size | integer | Number of MOs in the mo_class we want to localize | +| tmp_list(tmp_list_size) | integer | MOs in the mo_class | + +Output: +| v_grad(tmp_n) | double precision | Gradient in the subspace | +| max_elem | double precision | Maximal element in the gradient | +| norm_grad | double precision | Norm of the gradient | + +Internal: +| m_grad(tmp_n,tmp_n) | double precision | Gradient in the matrix form | +| i,j,k | integer | indexes in the full space | +| tmp_i,tmp_j,tmp_k | integer | indexes in the subspace | +| t* | double precision | to compute the time | + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine gradient_FB(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + implicit none + + BEGIN_DOC + ! Compute the gradient for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k + double precision :: t1, t2, t3 + + print*,'' + print*,'---gradient_FB---' + + call wall_time(t1) + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size)) + + ! Calculation + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + m_grad(tmp_i,tmp_j) = 4d0 * mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + +4d0 * mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + +4d0 * mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j)) + enddo + enddo + + ! 2D -> 1D + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in gradient_FB:', t3 + + print*,'---End gradient_FB---' + +end subroutine +#+END_SRC + +*** Gradient (OMP) +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine gradient_FB_omp(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + use omp_lib + + implicit none + + BEGIN_DOC + ! Compute the gradient for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k + double precision :: t1, t2, t3 + + print*,'' + print*,'---gradient_FB_omp---' + + call wall_time(t1) + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size)) + + ! Initialization omp + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE(i,j,tmp_i,tmp_j,tmp_k) & + !$OMP SHARED(tmp_n,tmp_list_size,m_grad,v_grad,mo_dipole_x,mo_dipole_y,mo_dipole_z,tmp_list) & + !$OMP DEFAULT(NONE) + + ! Calculation + !$OMP DO + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + m_grad(tmp_i,tmp_j) = 4d0 * mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + +4d0 * mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + +4d0 * mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j)) + enddo + enddo + !$OMP END DO + + ! 2D -> 1D + !$OMP DO + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + !$OMP END DO + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in gradient_FB_omp:', t3 + + print*,'---End gradient_FB_omp---' + +end subroutine +#+END_SRC + +*** Hessian + +Output: +| H(tmp_n,tmp_n) | double precision | Gradient in the subspace | +| max_elem | double precision | Maximal element in the gradient | +| norm_grad | double precision | Norm of the gradient | + +Internal: +Internal: +| beta(tmp_n,tmp_n) | double precision | beta in the documentation below to compute the hesian | +| i,j,k | integer | indexes in the full space | +| tmp_i,tmp_j,tmp_k | integer | indexes in the subspace | +| t* | double precision | to compute the time | + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine hessian_FB(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute the diagonal hessian for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n) + double precision, allocatable :: beta(:,:) + integer :: i,j,tmp_k,tmp_i, tmp_j + double precision :: max_elem, t1,t2,t3 + + print*,'' + print*,'---hessian_FB---' + + call wall_time(t1) + + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size)) + + ! Calculation + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + beta(tmp_i,tmp_j) = (mo_dipole_x(i,i) - mo_dipole_x(j,j))**2 - 4d0 * mo_dipole_x(i,j)**2 & + +(mo_dipole_y(i,i) - mo_dipole_y(j,j))**2 - 4d0 * mo_dipole_y(i,j)**2 & + +(mo_dipole_z(i,i) - mo_dipole_z(j,j))**2 - 4d0 * mo_dipole_z(i,j)**2 + enddo + enddo + + ! Diagonal of the hessian + H = 0d0 + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + + ! Deallocation + deallocate(beta) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in hessian_FB:', t3 + + print*,'---End hessian_FB---' + +end subroutine +#+END_SRC + +*** Hessian (OMP) +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine hessian_FB_omp(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute the diagonal hessian for the Foster-Boys localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n) + double precision, allocatable :: beta(:,:) + integer :: i,j,tmp_k,tmp_i,tmp_j + double precision :: max_elem, t1,t2,t3 + + print*,'' + print*,'---hessian_FB_omp---' + + call wall_time(t1) + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size)) + + ! Initialization omp + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE(i,j,tmp_i,tmp_j,tmp_k) & + !$OMP SHARED(tmp_n,tmp_list_size,beta,H,mo_dipole_x,mo_dipole_y,mo_dipole_z,tmp_list) & + !$OMP DEFAULT(NONE) + + + ! Calculation + !$OMP DO + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + beta(tmp_i,tmp_j) = (mo_dipole_x(i,i) - mo_dipole_x(j,j))**2 - 4d0 * mo_dipole_x(i,j)**2 & + +(mo_dipole_y(i,i) - mo_dipole_y(j,j))**2 - 4d0 * mo_dipole_y(i,j)**2 & + +(mo_dipole_z(i,i) - mo_dipole_z(j,j))**2 - 4d0 * mo_dipole_z(i,j)**2 + enddo + enddo + !$OMP END DO + + ! Initialization + !$OMP DO + do i = 1, tmp_n + H(i) = 0d0 + enddo + !$OMP END DO + + ! Diagonalm of the hessian + !$OMP DO + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + !$OMP END DO + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) + + ! Deallocation + deallocate(beta) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in hessian_FB_omp:', t3 + + print*,'---End hessian_FB_omp---' + +end subroutine +#+END_SRC + +** Pipek-Mezey +*** Gradient v1 +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine grad_pipek(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + implicit none + + BEGIN_DOC + ! Compute gradient for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:), tmp_int(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size), tmp_int(tmp_list_size, tmp_list_size)) + + ! Initialization + m_grad = 0d0 + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 ! Initialization for each nuclei + + ! Loop over the MOs of the a given mo_class to compute + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do rho = 1, ao_num ! loop over all the AOs + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) ! AO centered on atom a + + tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + enddo + enddo + + ! Gradient + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + m_grad(tmp_i,tmp_j) = m_grad(tmp_i,tmp_j) + 4d0 * tmp_int(tmp_i,tmp_j) * (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j)) + + enddo + enddo + + enddo + + ! 2D -> 1D + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad,tmp_int) + +end subroutine grad_pipek +#+END_SRC + +*** Gradient + +The gradient is + +\begin{align*} +\left. \frac{\partial \mathcal{P} (\theta)}{\partial \theta} \right|_{\theta=0}= \gamma^{PM} +\end{align*} +with +\begin{align*} +\gamma_{st}^{PM} = \sum_{A=1}^N \left[ - \right] +\end{align*} + +\begin{align*} + = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right] +\end{align*} +$\sum_{\rho}$ -> sum over all the AOs +$\sum_{\mu \in A}$ -> sum over the AOs which belongs to atom A +$c^t$ -> expansion coefficient of orbital |t> + +Input: +| tmp_n | integer | Number of parameters in the MO subspace | +| tmp_list_size | integer | Number of MOs in the mo_class we want to localize | +| tmp_list(tmp_list_size) | integer | MOs in the mo_class | + +Output: +| v_grad(tmp_n) | double precision | Gradient in the subspace | +| max_elem | double precision | Maximal element in the gradient | +| norm_grad | double precision | Norm of the gradient | + +Internal: +| m_grad(tmp_list_size,tmp_list_size) | double precision | Gradient in a 2D array | +| tmp_int(tmp_list_size,tmp_list_size) | | Temporary array to store the integrals | +| tmp_accu(tmp_list_size,tmp_list_size) | | Temporary array to store a matrix | +| | | product and compute tmp_int | +| CS(tmp_list_size,ao_num) | | Array to store the result of mo_coef * ao_overlap | +| tmp_mo_coef(ao_num,tmp_list_size) | | Array to store just the useful MO coefficients | +| | | depending of the mo_class | +| tmp_mo_coef2(nucl_n_aos(a),tmp_list_size) | | Array to store just the useful MO coefficients | +| | | depending of the nuclei | +| tmp_CS(tmp_list_size,nucl_n_aos(a)) | | Array to store just the useful mo_coef * ao_overlap | +| | | values depending of the nuclei | +| a | | index to loop over the nuclei | +| b | | index to loop over the AOs which belongs to the nuclei a | +| mu | | index to refer to an AO which belongs to the nuclei a | +| rho | | index to loop over all the AOs | + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine gradient_PM(tmp_n, tmp_list_size, tmp_list, v_grad, max_elem, norm_grad) + + implicit none + + BEGIN_DOC + ! Compute gradient for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: v_grad(tmp_n), max_elem, norm_grad + double precision, allocatable :: m_grad(:,:), tmp_int(:,:), CS(:,:), tmp_mo_coef(:,:), tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + double precision :: t1,t2,t3 + + print*,'' + print*,'---gradient_PM---' + + call wall_time(t1) + + ! Allocation + allocate(m_grad(tmp_list_size, tmp_list_size), tmp_int(tmp_list_size, tmp_list_size),tmp_accu(tmp_list_size, tmp_list_size)) + allocate(CS(tmp_list_size,ao_num),tmp_mo_coef(ao_num,tmp_list_size)) + + + ! submatrix of the mo_coef + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do j = 1, ao_num + + tmp_mo_coef(j,tmp_i) = mo_coef(j,i) + + enddo + enddo + + call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + m_grad = 0d0 + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + !do tmp_j = 1, tmp_list_size + ! do tmp_i = 1, tmp_list_size + ! do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + ! mu = nucl_aos(a,b) + + ! tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (CS(tmp_i,mu) * tmp_mo_coef(mu,tmp_j) + tmp_mo_coef(mu,tmp_i) * CS(tmp_j,mu)) + + ! ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + ! !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + ! enddo + ! enddo + !enddo + + allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a))) + + do tmp_i = 1, tmp_list_size + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + + tmp_mo_coef2(b,tmp_i) = tmp_mo_coef(mu,tmp_i) + + enddo + enddo + + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + do tmp_i = 1, tmp_list_size + + tmp_CS(tmp_i,b) = CS(tmp_i,mu) + + enddo + enddo + + call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1)) + + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) + + enddo + enddo + + deallocate(tmp_mo_coef2,tmp_CS) + + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + m_grad(tmp_i,tmp_j) = m_grad(tmp_i,tmp_j) + 4d0 * tmp_int(tmp_i,tmp_j) * (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j)) + + enddo + enddo + + enddo + + ! 2D -> 1D + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + v_grad(tmp_k) = m_grad(tmp_i,tmp_j) + enddo + + ! Maximum element in the gradient + max_elem = 0d0 + do tmp_k = 1, tmp_n + if (ABS(v_grad(tmp_k)) > max_elem) then + max_elem = ABS(v_grad(tmp_k)) + endif + enddo + + ! Norm of the gradient + norm_grad = 0d0 + do tmp_k = 1, tmp_n + norm_grad = norm_grad + v_grad(tmp_k)**2 + enddo + norm_grad = dsqrt(norm_grad) + + print*, 'Maximal element in the gradient:', max_elem + print*, 'Norm of the gradient:', norm_grad + + ! Deallocation + deallocate(m_grad,tmp_int,CS,tmp_mo_coef) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in gradient_PM:', t3 + + print*,'---End gradient_PM---' + +end +#+END_SRC + +*** Hessian v1 +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine hess_pipek(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute diagonal hessian for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n) + double precision, allocatable :: beta(:,:),tmp_int(:,:) + integer :: i,j,tmp_k,tmp_i, tmp_j, a,b,rho,mu + double precision :: max_elem + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size),tmp_int(tmp_list_size,tmp_list_size)) + + beta = 0d0 + + do a = 1, nucl_num + tmp_int = 0d0 + + do tmp_j = 1, tmp_list_size + j = tmp_list(tmp_j) + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do rho = 1, ao_num + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) + + tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + enddo + enddo + + ! Calculation + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + beta(tmp_i,tmp_j) = beta(tmp_i, tmp_j) + (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j))**2 - 4d0 * tmp_int(tmp_i,tmp_j)**2 + + enddo + enddo + + enddo + + H = 0d0 + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + + ! Deallocation + deallocate(beta,tmp_int) + +end +#+END_SRC + +*** Hessian + +The hessian is +\begin{align*} +\left. \frac{\partial^2 \mathcal{P} (\theta)}{\partial \theta^2}\right|_{\theta=0} = 4 \beta^{PM} +\end{align*} +\begin{align*} +\beta_{st}^{PM} = \sum_{A=1}^N \left( ^2 - \frac{1}{4} \left[ - \right]^2 \right) +\end{align*} + +with +\begin{align*} + = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right] +\end{align*} +$\sum_{\rho}$ -> sum over all the AOs +$\sum_{\mu \in A}$ -> sum over the AOs which belongs to atom A +$c^t$ -> expansion coefficient of orbital |t> + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine hessian_PM(tmp_n, tmp_list_size, tmp_list, H) + + implicit none + + BEGIN_DOC + ! Compute diagonal hessian for the Pipek-Mezey localization + END_DOC + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: H(tmp_n) + double precision, allocatable :: beta(:,:),tmp_int(:,:),CS(:,:),tmp_mo_coef(:,:),tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:) + integer :: i,j,tmp_k,tmp_i, tmp_j, a,b,rho,mu + double precision :: max_elem, t1,t2,t3 + + print*,'' + print*,'---hessian_PM---' + + call wall_time(t1) + + ! Allocation + allocate(beta(tmp_list_size,tmp_list_size),tmp_int(tmp_list_size,tmp_list_size),tmp_accu(tmp_list_size,tmp_list_size)) + allocate(CS(tmp_list_size,ao_num),tmp_mo_coef(ao_num,tmp_list_size)) + + beta = 0d0 + + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do j = 1, ao_num + + tmp_mo_coef(j,tmp_i) = mo_coef(j,i) + + enddo + enddo + + call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + !do tmp_j = 1, tmp_list_size + ! do tmp_i = 1, tmp_list_size + ! do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + ! mu = nucl_aos(a,b) + + ! tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (CS(tmp_i,mu) * tmp_mo_coef(mu,tmp_j) + tmp_mo_coef(mu,tmp_i) * CS(tmp_j,mu)) + + ! ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + ! !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + ! enddo + ! enddo + !enddo + + allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a))) + + do tmp_i = 1, tmp_list_size + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + + tmp_mo_coef2(b,tmp_i) = tmp_mo_coef(mu,tmp_i) + + enddo + enddo + + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + do tmp_i = 1, tmp_list_size + + tmp_CS(tmp_i,b) = CS(tmp_i,mu) + + enddo + enddo + + call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1)) + + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) + + enddo + enddo + + deallocate(tmp_mo_coef2,tmp_CS) + + ! Calculation + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + beta(tmp_i,tmp_j) = beta(tmp_i, tmp_j) + (tmp_int(tmp_i,tmp_i) - tmp_int(tmp_j,tmp_j))**2 - 4d0 * tmp_int(tmp_i,tmp_j)**2 + + enddo + enddo + + enddo + + H = 0d0 + do tmp_k = 1, tmp_n + call vec_to_mat_index(tmp_k,tmp_i,tmp_j) + H(tmp_k) = 4d0 * beta(tmp_i, tmp_j) + enddo + + ! Deallocation + deallocate(beta,tmp_int) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in hessian_PM:', t3 + + print*,'---End hessian_PM---' + +end + +#+END_SRC + +** Criterion +*** Criterion PM (old) +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine compute_crit_pipek(criterion) + + implicit none + + BEGIN_DOC + ! Compute the Pipek-Mezey localization criterion + END_DOC + + double precision, intent(out) :: criterion + double precision, allocatable :: tmp_int(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + + ! Allocation + allocate(tmp_int(mo_num, mo_num)) + + criterion = 0d0 + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + do i = 1, mo_num + do rho = 1, ao_num ! loop over all the AOs + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) + + tmp_int(i,i) = tmp_int(i,i) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,i) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,i)) + + enddo + enddo + enddo + + do i = 1, mo_num + criterion = criterion + tmp_int(i,i)**2 + enddo + + enddo + + criterion = - criterion + + deallocate(tmp_int) + +end +#+END_SRC + +*** Criterion PM + +The criterion is computed as +\begin{align*} +\mathcal{P} = \sum_{i=1}^n \sum_{A=1}^N \left[ \right]^2 +\end{align*} +with +\begin{align*} + = \frac{1}{2} \sum_{\rho} \sum_{\mu \in A} \left[ c_{\rho}^{s*} S_{\rho \nu} c_{\mu}^{t} +c_{\mu}^{s*} S_{\mu \rho} c_{\rho}^t \right] +\end{align*} + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine criterion_PM(tmp_list_size,tmp_list,criterion) + + implicit none + + BEGIN_DOC + ! Compute the Pipek-Mezey localization criterion + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + double precision, allocatable :: tmp_int(:,:),CS(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho + + print*,'' + print*,'---criterion_PM---' + + ! Allocation + allocate(tmp_int(tmp_list_size, tmp_list_size),CS(mo_num,ao_num)) + + ! Initialization + criterion = 0d0 + + call dgemm('T','N',mo_num,ao_num,ao_num,1d0,mo_coef,size(mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + do a = 1, nucl_num ! loop over the nuclei + tmp_int = 0d0 + + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) + + tmp_int(tmp_i,tmp_i) = tmp_int(tmp_i,tmp_i) + 0.5d0 * (CS(i,mu) * mo_coef(mu,i) + mo_coef(mu,i) * CS(i,mu)) + + ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + + do tmp_i = 1, tmp_list_size + criterion = criterion + tmp_int(tmp_i,tmp_i)**2 + enddo + + enddo + + criterion = - criterion + + deallocate(tmp_int,CS) + + print*,'---End criterion_PM---' + +end +#+END_SRC + +*** Criterion PM v3 +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine criterion_PM_v3(tmp_list_size,tmp_list,criterion) + + implicit none + + BEGIN_DOC + ! Compute the Pipek-Mezey localization criterion + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + double precision, allocatable :: tmp_int(:,:), CS(:,:), tmp_mo_coef(:,:), tmp_mo_coef2(:,:),tmp_accu(:,:),tmp_CS(:,:) + integer :: i,j,k,tmp_i,tmp_j,tmp_k, a, b, mu ,rho,nu,c + double precision :: t1,t2,t3 + + print*,'' + print*,'---criterion_PM_v3---' + + call wall_time(t1) + + ! Allocation + allocate(tmp_int(tmp_list_size, tmp_list_size),tmp_accu(tmp_list_size, tmp_list_size)) + allocate(CS(tmp_list_size,ao_num),tmp_mo_coef(ao_num,tmp_list_size)) + + criterion = 0d0 + + ! submatrix of the mo_coef + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + do j = 1, ao_num + + tmp_mo_coef(j,tmp_i) = mo_coef(j,i) + + enddo + enddo + + ! ao_overlap(ao_num,ao_num) + ! mo_coef(ao_num,mo_num) + call dgemm('T','N',tmp_list_size,ao_num,ao_num,1d0,tmp_mo_coef,size(tmp_mo_coef,1),ao_overlap,size(ao_overlap,1),0d0,CS,size(CS,1)) + + do a = 1, nucl_num ! loop over the nuclei + + do j = 1, tmp_list_size + do i = 1, tmp_list_size + tmp_int(i,j) = 0d0 + enddo + enddo + + !do tmp_j = 1, tmp_list_size + ! do tmp_i = 1, tmp_list_size + ! do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + ! mu = nucl_aos(a,b) + + ! tmp_int(tmp_i,tmp_j) = tmp_int(tmp_i,tmp_j) + 0.5d0 * (CS(tmp_i,mu) * tmp_mo_coef(mu,tmp_j) + tmp_mo_coef(mu,tmp_i) * CS(tmp_j,mu)) + + ! ! (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + ! !+ mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + ! enddo + ! enddo + !enddo + + allocate(tmp_mo_coef2(nucl_n_aos(a),tmp_list_size),tmp_CS(tmp_list_size,nucl_n_aos(a))) + + do tmp_i = 1, tmp_list_size + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + + tmp_mo_coef2(b,tmp_i) = tmp_mo_coef(mu,tmp_i) + + enddo + enddo + + do b = 1, nucl_n_aos(a) + mu = nucl_aos(a,b) + do tmp_i = 1, tmp_list_size + + tmp_CS(tmp_i,b) = CS(tmp_i,mu) + + enddo + enddo + + call dgemm('N','N',tmp_list_size,tmp_list_size,nucl_n_aos(a),1d0,tmp_CS,size(tmp_CS,1),tmp_mo_coef2,size(tmp_mo_coef2,1),0d0,tmp_accu,size(tmp_accu,1)) + + ! Integrals + do tmp_j = 1, tmp_list_size + do tmp_i = 1, tmp_list_size + + tmp_int(tmp_i,tmp_j) = 0.5d0 * (tmp_accu(tmp_i,tmp_j) + tmp_accu(tmp_j,tmp_i)) + + enddo + enddo + + deallocate(tmp_mo_coef2,tmp_CS) + + ! Criterion + do tmp_i = 1, tmp_list_size + criterion = criterion + tmp_int(tmp_i,tmp_i)**2 + enddo + + enddo + + criterion = - criterion + + deallocate(tmp_int,CS,tmp_accu,tmp_mo_coef) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in criterion_PM_v3:', t3 + + print*,'---End criterion_PM_v3---' + +end +#+END_SRC + +*** Criterion FB (old) + +The criterion is just computed as + +\begin{align*} +C = - \sum_i^{mo_{num}} (^2 + ^2 + ^2) +\end{align*} + +The minus sign is here in order to minimize this criterion + +Output: +| criterion | double precision | criterion for the Foster-Boys localization | + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine criterion_FB_old(criterion) + + implicit none + + BEGIN_DOC + ! Compute the Foster-Boys localization criterion + END_DOC + + double precision, intent(out) :: criterion + integer :: i + + ! Criterion (= \sum_i ^2 ) + criterion = 0d0 + do i = 1, mo_num + criterion = criterion + mo_dipole_x(i,i)**2 + mo_dipole_y(i,i)**2 + mo_dipole_z(i,i)**2 + enddo + criterion = - criterion + +end subroutine +#+END_SRC + +*** Criterion FB +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine criterion_FB(tmp_list_size, tmp_list, criterion) + + implicit none + + BEGIN_DOC + ! Compute the Foster-Boys localization criterion + END_DOC + + integer, intent(in) :: tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(out) :: criterion + integer :: i, tmp_i + + ! Criterion (= - \sum_i ^2 ) + criterion = 0d0 + do tmp_i = 1, tmp_list_size + i = tmp_list(tmp_i) + criterion = criterion + mo_dipole_x(i,i)**2 + mo_dipole_y(i,i)**2 + mo_dipole_z(i,i)**2 + enddo + criterion = - criterion + +end subroutine +#+END_SRC + +** Theta + +In: +| n | integer | number of MOs in the considered MO class | +| l | integer | list of MOs of the considered class | + +Out: +| m_x(n,n) | double precision | Matrix containing the rotation angle between all the different | +| | | pairs of MOs to apply the rotations (need a minus sign) | +| max_elem | double precision | Maximal angle in absolute value | + +$$\cos(4 \theta) = \frac{-A{ij}}{\sqrt{(A_{ij}^2 + B_{ij}^2)} $$ +$$\sin(4 \theta) = \frac{B{ij}}{\sqrt{(A_{ij}^2 + B_{ij}^2)} $$ +$$\tan(4 \theta) = \frac{\sin(4 \theta)}{\cos(4 \theta)}$$ +where $\theta$ is in fact $\theta_{ij}$ + +For Foster-Boys localization: +$$A_{ij} = ^2 - \frac{1}{4} ( - )^2$$ +$$B_{ij} = ( - )$$ + + +For Pipek-Mezey localization: +$$A_{ij} = \sum_A ^2 - \frac{1}{4} ( - )^2$$ +$$B_{ij} = \sum_A ( - )$$ +with +$$ = \frac{1}{2} \sum_\rho \sum_{\mu \in A} ( c_\rho^{i*} S_{\rho +\mu} c_\mu^j + c_\mu^{i*} S_{\mu \rho} c_\rho^j)$$ +$i,j$ MOs +$\mu, \rho$ AOs +$A$ nucleus +$S$ overlap matrix +$c$ MO coefficient +$r$ position operator + +#+begin_src f90 :tangle localization_sub.irp.f +subroutine theta_FB(l, n, m_x, max_elem) + + include 'pi.h' + + BEGIN_DOC + ! Compute the angles to minimize the Foster-Boys criterion by using pairwise rotations of the MOs + ! Warning: you must give - the angles to build the rotation matrix... + END_DOC + + implicit none + + integer, intent(in) :: n, l(n) + double precision, intent(out) :: m_x(n,n), max_elem + + integer :: i,j, tmp_i, tmp_j + double precision, allocatable :: cos4theta(:,:), sin4theta(:,:) + double precision, allocatable :: A(:,:), B(:,:), beta(:,:), gamma(:,:) + integer :: idx_i,idx_j + + allocate(cos4theta(n, n), sin4theta(n, n)) + allocate(A(n,n), B(n,n), beta(n,n), gamma(n,n)) + + do tmp_j = 1, n + j = l(tmp_j) + do tmp_i = 1, n + i = l(tmp_i) + A(tmp_i,tmp_j) = mo_dipole_x(i,j)**2 - 0.25d0 * (mo_dipole_x(i,i) - mo_dipole_x(j,j))**2 & + + mo_dipole_y(i,j)**2 - 0.25d0 * (mo_dipole_y(i,i) - mo_dipole_y(j,j))**2 & + + mo_dipole_z(i,j)**2 - 0.25d0 * (mo_dipole_z(i,i) - mo_dipole_z(j,j))**2 + enddo + A(j,j) = 0d0 + enddo + + do tmp_j = 1, n + j = l(tmp_j) + do tmp_i = 1, n + i = l(tmp_i) + B(tmp_i,tmp_j) = mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + + mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + + mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j)) + enddo + enddo + + !do tmp_j = 1, n + ! j = l(tmp_j) + ! do tmp_i = 1, n + ! i = l(tmp_i) + ! beta(tmp_i,tmp_j) = (mo_dipole_x(i,i) - mo_dipole_x(j,j)) - 4d0 * mo_dipole_x(i,j)**2 & + ! + (mo_dipole_y(i,i) - mo_dipole_y(j,j)) - 4d0 * mo_dipole_y(i,j)**2 & + ! + (mo_dipole_z(i,i) - mo_dipole_z(j,j)) - 4d0 * mo_dipole_z(i,j)**2 + ! enddo + !enddo + + !do tmp_j = 1, n + ! j = l(tmp_j) + ! do tmp_i = 1, n + ! i = l(tmp_i) + ! gamma(tmp_i,tmp_j) = 4d0 * ( mo_dipole_x(i,j) * (mo_dipole_x(i,i) - mo_dipole_x(j,j)) & + ! + mo_dipole_y(i,j) * (mo_dipole_y(i,i) - mo_dipole_y(j,j)) & + ! + mo_dipole_z(i,j) * (mo_dipole_z(i,i) - mo_dipole_z(j,j))) + ! enddo + !enddo + + ! + !do j = 1, n + ! do i = 1, n + ! cos4theta(i,j) = - A(i,j) / dsqrt(A(i,j)**2 + B(i,j)**2) + ! enddo + !enddo + + !do j = 1, n + ! do i = 1, n + ! sin4theta(i,j) = B(i,j) / dsqrt(A(i,j)**2 + B(i,j)**2) + ! enddo + !enddo + + ! Theta + do j = 1, n + do i = 1, n + m_x(i,j) = 0.25d0 * atan2(B(i,j), -A(i,j)) + !m_x(i,j) = 0.25d0 * atan2(sin4theta(i,j), cos4theta(i,j)) + enddo + enddo + + ! Enforce a perfect antisymmetry + do j = 1, n-1 + do i = j+1, n + m_x(j,i) = - m_x(i,j) + enddo + enddo + do i = 1, n + m_x(i,i) = 0d0 + enddo + + ! Max + max_elem = 0d0 + do j = 1, n-1 + do i = j+1, n + if (dabs(m_x(i,j)) > dabs(max_elem)) then + max_elem = m_x(i,j) + !idx_i = i + !idx_j = j + endif + enddo + enddo + + ! Debug + !print*,'' + !print*,'sin/B' + !do i = 1, n + ! write(*,'(100F10.4)') sin4theta(i,:) + ! !B(i,:) + !enddo + !print*,'cos/A' + !do i = 1, n + ! write(*,'(100F10.4)') cos4theta(i,:) + ! !A(i,:) + !enddo + !print*,'X' + !!m_x = 0d0 + !!m_x(idx_i,idx_j) = max_elem + !!m_x(idx_j,idx_i) = -max_elem + !do i = 1, n + ! write(*,'(100F10.4)') m_x(i,:) + !enddo + !print*,idx_i,idx_j,max_elem + + max_elem = dabs(max_elem) + + deallocate(cos4theta, sin4theta) + deallocate(A,B,beta,gamma) + +end +#+end_src + +#+begin_src f90 :comments org :tangle localization_sub.irp.f +subroutine theta_PM(l, n, m_x, max_elem) + + include 'pi.h' + + BEGIN_DOC + ! Compute the angles to minimize the Foster-Boys criterion by using pairwise rotations of the MOs + ! Warning: you must give - the angles to build the rotation matrix... + END_DOC + + implicit none + + integer, intent(in) :: n, l(n) + double precision, intent(out) :: m_x(n,n), max_elem + + integer :: a,b,i,j,tmp_i,tmp_j,rho,mu,nu,idx_i,idx_j + double precision, allocatable :: Aij(:,:), Bij(:,:), Pa(:,:) + + allocate(Aij(n,n), Bij(n,n), Pa(n,n)) + + do a = 1, nucl_num ! loop over the nuclei + Pa = 0d0 ! Initialization for each nuclei + + ! Loop over the MOs of the a given mo_class to compute + do tmp_j = 1, n + j = l(tmp_j) + do tmp_i = 1, n + i = l(tmp_i) + do rho = 1, ao_num ! loop over all the AOs + do b = 1, nucl_n_aos(a) ! loop over the number of AOs which belongs to the nuclei a + mu = nucl_aos(a,b) ! AO centered on atom a + + Pa(tmp_i,tmp_j) = Pa(tmp_i,tmp_j) + 0.5d0 * (mo_coef(rho,i) * ao_overlap(rho,mu) * mo_coef(mu,j) & + + mo_coef(mu,i) * ao_overlap(mu,rho) * mo_coef(rho,j)) + + enddo + enddo + enddo + enddo + + ! A + do j = 1, n + do i = 1, n + Aij(i,j) = Aij(i,j) + Pa(i,j)**2 - 0.25d0 * (Pa(i,i) - Pa(j,j))**2 + enddo + enddo + + ! B + do j = 1, n + do i = 1, n + Bij(i,j) = Bij(i,j) + Pa(i,j) * (Pa(i,i) - Pa(j,j)) + enddo + enddo + + enddo + + ! Theta + do j = 1, n + do i = 1, n + m_x(i,j) = 0.25d0 * atan2(Bij(i,j), -Aij(i,j)) + enddo + enddo + + ! Enforce a perfect antisymmetry + do j = 1, n-1 + do i = j+1, n + m_x(j,i) = - m_x(i,j) + enddo + enddo + do i = 1, n + m_x(i,i) = 0d0 + enddo + + ! Max + max_elem = 0d0 + do j = 1, n-1 + do i = j+1, n + if (dabs(m_x(i,j)) > dabs(max_elem)) then + max_elem = m_x(i,j) + idx_i = i + idx_j = j + endif + enddo + enddo + + ! Debug + !do i = 1, n + ! write(*,'(100F10.4)') m_x(i,:) + !enddo + !print*,'Max',idx_i,idx_j,max_elem + + max_elem = dabs(max_elem) + + deallocate(Aij,Bij,Pa) + +end +#+end_src + +** Spatial extent + +The spatial extent of an orbital $i$ is computed as +\begin{align*} +\sum_{\lambda=x,y,z}\sqrt{ - ^2} +\end{align*} + +From that we can also compute the average and the standard deviation + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine compute_spatial_extent(spatial_extent) + + implicit none + + BEGIN_DOC + ! Compute the spatial extent of the MOs + END_DOC + + double precision, intent(out) :: spatial_extent(mo_num) + double precision :: average_core, average_act, average_inact, average_virt + double precision :: std_var_core, std_var_act, std_var_inact, std_var_virt + integer :: i,j,k,l + + spatial_extent = 0d0 + + do i = 1, mo_num + spatial_extent(i) = mo_spread_x(i,i) - mo_dipole_x(i,i)**2 + enddo + do i = 1, mo_num + spatial_extent(i) = spatial_extent(i) + mo_spread_y(i,i) - mo_dipole_y(i,i)**2 + enddo + do i = 1, mo_num + spatial_extent(i) = spatial_extent(i) + mo_spread_z(i,i) - mo_dipole_z(i,i)**2 + enddo + + do i = 1, mo_num + spatial_extent(i) = dsqrt(spatial_extent(i)) + enddo + + average_core = 0d0 + std_var_core = 0d0 + if (dim_list_core_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_core, dim_list_core_orb, average_core) + call compute_std_var_sp_ext(spatial_extent, list_core, dim_list_core_orb, average_core, std_var_core) + endif + + average_act = 0d0 + std_var_act = 0d0 + if (dim_list_act_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_act, dim_list_act_orb, average_act) + call compute_std_var_sp_ext(spatial_extent, list_act, dim_list_act_orb, average_act, std_var_act) + endif + + average_inact = 0d0 + std_var_inact = 0d0 + if (dim_list_inact_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_inact, dim_list_inact_orb, average_inact) + call compute_std_var_sp_ext(spatial_extent, list_inact, dim_list_inact_orb, average_inact, std_var_inact) + endif + + average_virt = 0d0 + std_var_virt = 0d0 + if (dim_list_virt_orb >= 2) then + call compute_average_sp_ext(spatial_extent, list_virt, dim_list_virt_orb, average_virt) + call compute_std_var_sp_ext(spatial_extent, list_virt, dim_list_virt_orb, average_virt, std_var_virt) + endif + + print*,'' + print*,'=============================' + print*,' Spatial extent of the MOs' + print*,'=============================' + print*,'' + + print*, 'elec_num:', elec_num + print*, 'elec_alpha_num:', elec_alpha_num + print*, 'elec_beta_num:', elec_beta_num + print*, 'core:', dim_list_core_orb + print*, 'act:', dim_list_act_orb + print*, 'inact:', dim_list_inact_orb + print*, 'virt:', dim_list_virt_orb + print*, 'mo_num:', mo_num + print*,'' + + print*,'-- Core MOs --' + print*,'Average:', average_core + print*,'Std var:', std_var_core + print*,'' + + print*,'-- Active MOs --' + print*,'Average:', average_act + print*,'Std var:', std_var_act + print*,'' + + print*,'-- Inactive MOs --' + print*,'Average:', average_inact + print*,'Std var:', std_var_inact + print*,'' + + print*,'-- Virtual MOs --' + print*,'Average:', average_virt + print*,'Std var:', std_var_virt + print*,'' + + print*,'Spatial extent:' + do i = 1, mo_num + print*, i, spatial_extent(i) + enddo + +end +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine compute_average_sp_ext(spatial_extent, list, list_size, average) + + implicit none + + BEGIN_DOC + ! Compute the average spatial extent of the MOs + END_DOC + + integer, intent(in) :: list_size, list(list_size) + double precision, intent(in) :: spatial_extent(mo_num) + double precision, intent(out) :: average + integer :: i, tmp_i + + average = 0d0 + do tmp_i = 1, list_size + i = list(tmp_i) + average = average + spatial_extent(i) + enddo + + average = average / DBLE(list_size) + +end +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine compute_std_var_sp_ext(spatial_extent, list, list_size, average, std_var) + + implicit none + + BEGIN_DOC + ! Compute the standard deviation of the spatial extent of the MOs + END_DOC + + integer, intent(in) :: list_size, list(list_size) + double precision, intent(in) :: spatial_extent(mo_num) + double precision, intent(in) :: average + double precision, intent(out) :: std_var + integer :: i, tmp_i + + std_var = 0d0 + + do tmp_i = 1, list_size + i = list(tmp_i) + std_var = std_var + (spatial_extent(i) - average)**2 + enddo + + std_var = dsqrt(1d0/DBLE(list_size) * std_var) + +end +#+END_SRC + +** Utils + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine apply_pre_rotation() + + implicit none + + BEGIN_DOC + ! Apply a rotation between the MOs + END_DOC + + double precision, allocatable :: pre_rot(:,:), prev_mos(:,:), R(:,:) + double precision :: t1,t2,t3 + integer :: i,j,tmp_i,tmp_j + integer :: info + logical :: enforce_step_cancellation + + print*,'---apply_pre_rotation---' + call wall_time(t1) + + allocate(pre_rot(mo_num,mo_num), prev_mos(ao_num,mo_num), R(mo_num,mo_num)) + + ! Initialization of the matrix + pre_rot = 0d0 + + if (kick_in_mos) then + ! Pre rotation for core MOs + if (dim_list_core_orb >= 2) then + do tmp_j = 1, dim_list_core_orb + j = list_core(tmp_j) + do tmp_i = 1, dim_list_core_orb + i = list_core(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Pre rotation for active MOs + if (dim_list_act_orb >= 2) then + do tmp_j = 1, dim_list_act_orb + j = list_act(tmp_j) + do tmp_i = 1, dim_list_act_orb + i = list_act(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Pre rotation for inactive MOs + if (dim_list_inact_orb >= 2) then + do tmp_j = 1, dim_list_inact_orb + j = list_inact(tmp_j) + do tmp_i = 1, dim_list_inact_orb + i = list_inact(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Pre rotation for virtual MOs + if (dim_list_virt_orb >= 2) then + do tmp_j = 1, dim_list_virt_orb + j = list_virt(tmp_j) + do tmp_i = 1, dim_list_virt_orb + i = list_virt(tmp_i) + if (i > j) then + pre_rot(i,j) = angle_pre_rot + elseif (i < j) then + pre_rot(i,j) = - angle_pre_rot + else + pre_rot(i,j) = 0d0 + endif + enddo + enddo + endif + + ! Nothing for deleted ones + + ! Compute pre rotation matrix from pre_rot + call rotation_matrix(pre_rot,mo_num,R,mo_num,mo_num,info,enforce_step_cancellation) + + if (enforce_step_cancellation) then + print*, 'Cancellation of the pre rotation, too big error in the rotation matrix' + print*, 'Reduce the angle for the pre rotation, abort' + call abort + endif + + ! New Mos (we don't car eabout the previous MOs prev_mos) + call apply_mo_rotation(R,prev_mos) + + ! Update the things related to mo_coef + TOUCH mo_coef + call save_mos + endif + + deallocate(pre_rot, prev_mos, R) + + call wall_time(t2) + t3 = t2-t1 + print*,'Time in apply_pre_rotation:', t3 + print*,'---End apply_pre_rotation---' + +end +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine x_tmp_orb_loc_v2(tmp_n, tmp_list_size, tmp_list, v_grad, H,tmp_x, tmp_m_x) + + implicit none + + integer, intent(in) :: tmp_n, tmp_list_size, tmp_list(tmp_list_size) + double precision, intent(in) :: v_grad(tmp_n) + double precision, intent(in) :: H(tmp_n, tmp_n) + double precision, intent(out) :: tmp_m_x(tmp_list_size, tmp_list_size), tmp_x(tmp_list_size) + !double precision, allocatable :: x(:) + double precision :: lambda , accu, max_elem + integer :: i,j,tmp_i,tmp_j,tmp_k + + ! Allocation + !allocate(x(tmp_n)) + + ! Level shifted hessian + lambda = 0d0 + do tmp_k = 1, tmp_n + if (H(tmp_k,tmp_k) < lambda) then + lambda = H(tmp_k,tmp_k) + endif + enddo + + ! min element in the hessian + if (lambda < 0d0) then + lambda = -lambda + 1d-6 + endif + + print*, 'lambda', lambda + + ! Good + do tmp_k = 1, tmp_n + if (ABS(H(tmp_k,tmp_k)) > 1d-6) then + tmp_x(tmp_k) = - 1d0/(ABS(H(tmp_k,tmp_k))+lambda) * v_grad(tmp_k)!(-v_grad(tmp_k)) + !x(tmp_k) = - 1d0/(ABS(H(tmp_k,tmp_k))+lambda) * (-v_grad(tmp_k)) + endif + enddo + + ! 1D tmp -> 2D tmp + tmp_m_x = 0d0 + do tmp_j = 1, tmp_list_size - 1 + do tmp_i = tmp_j + 1, tmp_list_size + call mat_to_vec_index(tmp_i,tmp_j,tmp_k) + tmp_m_x(tmp_i, tmp_j) = tmp_x(tmp_k)!x(tmp_k) + enddo + enddo + + ! Antisym + do tmp_i = 1, tmp_list_size - 1 + do tmp_j = tmp_i + 1, tmp_list_size + tmp_m_x(tmp_i,tmp_j) = - tmp_m_x(tmp_j,tmp_i) + enddo + enddo + + ! Deallocation + !deallocate(x) + +end subroutine +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine ao_to_mo_no_sym(A_ao,LDA_ao,A_mo,LDA_mo) + implicit none + BEGIN_DOC + ! Transform A from the |AO| basis to the |MO| basis + ! + ! $C^\dagger.A_{ao}.C$ + END_DOC + integer, intent(in) :: LDA_ao,LDA_mo + double precision, intent(in) :: A_ao(LDA_ao,ao_num) + double precision, intent(out) :: A_mo(LDA_mo,mo_num) + double precision, allocatable :: T(:,:) + + allocate ( T(ao_num,mo_num) ) + !DIR$ ATTRIBUTES ALIGN : $IRP_ALIGN :: T + + call dgemm('N','N', ao_num, mo_num, ao_num, & + 1.d0, A_ao,LDA_ao, & + mo_coef, size(mo_coef,1), & + 0.d0, T, size(T,1)) + + call dgemm('T','N', mo_num, mo_num, ao_num, & + 1.d0, mo_coef,size(mo_coef,1), & + T, ao_num, & + 0.d0, A_mo, size(A_mo,1)) + + deallocate(T) +end +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +subroutine run_sort_by_fock_energies() + + implicit none + + BEGIN_DOC + ! Saves the current MOs ordered by diagonal element of the Fock operator. + END_DOC + + integer :: i,j,k,l,tmp_i,tmp_k,tmp_list_size + integer, allocatable :: iorder(:), tmp_list(:) + double precision, allocatable :: fock_energies_tmp(:), tmp_mo_coef(:,:) + + ! Test + do l = 1, 4 + if (l==1) then ! core + tmp_list_size = dim_list_core_orb + elseif (l==2) then ! act + tmp_list_size = dim_list_act_orb + elseif (l==3) then ! inact + tmp_list_size = dim_list_inact_orb + else ! virt + tmp_list_size = dim_list_virt_orb + endif + + if (tmp_list_size >= 2) then + ! Allocation tmp array + allocate(tmp_list(tmp_list_size)) + + ! To give the list of MOs in a mo_class + if (l==1) then ! core + tmp_list = list_core + elseif (l==2) then + tmp_list = list_act + elseif (l==3) then + tmp_list = list_inact + else + tmp_list = list_virt + endif + print*,'MO class: ',trim(mo_class(tmp_list(1))) + + allocate(iorder(tmp_list_size), fock_energies_tmp(tmp_list_size), tmp_mo_coef(ao_num,tmp_list_size)) + !print*,'MOs before sorting them by f_p^p energies:' + do i = 1, tmp_list_size + tmp_i = tmp_list(i) + fock_energies_tmp(i) = Fock_matrix_diag_mo(tmp_i) + iorder(i) = i + !print*, tmp_i, fock_energies_tmp(i) + enddo + + call dsort(fock_energies_tmp, iorder, tmp_list_size) + + print*,'MOs after sorting them by f_p^p energies:' + do i = 1, tmp_list_size + k = iorder(i) + tmp_k = tmp_list(k) + print*, tmp_k, fock_energies_tmp(k) + do j = 1, ao_num + tmp_mo_coef(j,k) = mo_coef(j,tmp_k) + enddo + enddo + + ! Update the MOs after sorting them by energies + do i = 1, tmp_list_size + tmp_i = tmp_list(i) + do j = 1, ao_num + mo_coef(j,tmp_i) = tmp_mo_coef(j,i) + enddo + enddo + + if (debug_hf) then + touch mo_coef + print*,'HF energy:', HF_energy + endif + print*,'' + + deallocate(iorder, fock_energies_tmp, tmp_list, tmp_mo_coef) + endif + + enddo + + touch mo_coef + call save_mos + +end + +#+END_SRC + + +#+BEGIN_SRC f90 :comments org :tangle localization_sub.irp.f +function is_core(i) + + implicit none + + BEGIN_DOC + ! True if the orbital i is a core orbital + END_DOC + + integer, intent(in) :: i + logical :: is_core + + integer :: j + + ! Init + is_core = .False. + + ! Search + do j = 1, dim_list_core_orb + if (list_core(j) == i) then + is_core = .True. + exit + endif + enddo + +end + +function is_del(i) + + implicit none + + BEGIN_DOC + ! True if the orbital i is a deleted orbital + END_DOC + + integer, intent(in) :: i + logical :: is_del + + integer :: j + + ! Init + is_del = .False. + + ! Search + do j = 1, dim_list_core_orb + if (list_core(j) == i) then + is_del = .True. + exit + endif + enddo + +end + +subroutine set_classes_loc() + + implicit none + + integer :: i + logical :: ok1, ok2 + logical :: is_core, is_del + integer(bit_kind) :: res(N_int,2) + + if (auto_mo_class) then + do i = 1, mo_num + if (is_core(i)) cycle + if (is_del(i)) cycle + call apply_hole(psi_det(1,1,1), 1, i, res, ok1, N_int) + call apply_hole(psi_det(1,1,1), 2, i, res, ok2, N_int) + if (ok1 .and. ok2) then + mo_class(i) = 'Inactive' + else if (.not. ok1 .and. .not. ok2) then + mo_class(i) = 'Virtual' + else + mo_class(i) = 'Active' + endif + enddo + touch mo_class + endif + +end + +subroutine unset_classes_loc() + + implicit none + + integer :: i + logical :: ok1, ok2 + logical :: is_core, is_del + integer(bit_kind) :: res(N_int,2) + + if (auto_mo_class) then + do i = 1, mo_num + if (is_core(i)) cycle + if (is_del(i)) cycle + mo_class(i) = 'Active' + enddo + touch mo_class + endif + +end +#+END_SRC diff --git a/src/mo_optimization/83.mo_optimization.bats b/src/mo_optimization/83.mo_optimization.bats new file mode 100644 index 00000000..5bc3d313 --- /dev/null +++ b/src/mo_optimization/83.mo_optimization.bats @@ -0,0 +1,62 @@ +#!/usr/bin/env bats + +source $QP_ROOT/tests/bats/common.bats.sh +source $QP_ROOT/quantum_package.rc + + +function run() { + thresh=2e-3 + test_exe scf || skip + qp set_file $1 + qp edit --check + qp reset -a + qp run scf + qp set_frozen_core + qp set determinants n_states 2 + qp set determinants read_wf true + qp set mo_two_e_ints io_mo_two_e_integrals None + file="$(echo $1 | sed 's/.ezfio//g')" + qp run cis + qp run debug_gradient_list_opt > $file.debug_g.out + err3="$(grep 'Max error:' $file.debug_g.out | awk '{print $3}')" + qp run debug_hessian_list_opt > $file.debug_h1.out + err1="$(grep 'Max error:' $file.debug_h1.out | awk '{print $3}')" + qp run orb_opt > $file.opt1.out + energy1="$(grep 'State average energy:' $file.opt1.out | tail -n 1 | awk '{print $4}')" + qp set orbital_optimization optimization_method diag + qp reset -d + qp run scf + qp run cis + qp run debug_hessian_list_opt > $file.debug_h2.out + err2="$(grep 'Max error_H:' $file.debug_h2.out | awk '{print $3}')" + qp run orb_opt > $file.opt2.out + energy2="$(grep 'State average energy:' $file.opt2.out | tail -n 1 | awk '{print $4}')" + qp set orbital_optimization optimization_method full + qp reset -d + qp run scf + eq $energy1 $2 $thresh + eq $energy2 $3 $thresh + eq $err1 0.0 1e-12 + eq $err2 0.0 1e-12 + eq $err3 0.0 1e-12 +} + +@test "b2_stretched" { +run b2_stretched.ezfio -48.9852901484277 -48.9852937541510 +} + +@test "h2o" { +run h2o.ezfio -75.9025622449206 -75.8691844585879 +} + +@test "h2s" { +run h2s.ezfio -398.576255809878 -398.574145943928 +} + +@test "hbo" { +run hbo.ezfio -99.9234823022109 -99.9234763597840 +} + +@test "hco" { +run hco.ezfio -113.204915552241 -113.204905207050 +} diff --git a/src/mo_optimization/EZFIO.cfg b/src/mo_optimization/EZFIO.cfg new file mode 100644 index 00000000..e6aa2d67 --- /dev/null +++ b/src/mo_optimization/EZFIO.cfg @@ -0,0 +1,23 @@ +[optimization_method] +type: character*(32) +doc: Define the kind of hessian for the orbital optimization full : full hessian, diag : diagonal hessian, none : no hessian +interface: ezfio,provider,ocaml +default: full + +[n_det_max_opt] +type: integer +doc: Maximal number of the determinants in the wf for the orbital optimization (to stop the optimization if n_det > n_det_max_opt) +interface: ezfio,provider,ocaml +default: 200000 + +[optimization_max_nb_iter] +type: integer +doc: Maximal number of iterations for the orbital optimization +interface: ezfio,provider,ocaml +default: 20 + +[thresh_opt_max_elem_grad] +type: double precision +doc: Threshold for the convergence, the optimization exits when the biggest element in the gradient is smaller than thresh_optimization_max_elem_grad +interface: ezfio,provider,ocaml +default: 1.e-5 diff --git a/src/mo_optimization/NEED b/src/mo_optimization/NEED new file mode 100644 index 00000000..91f41ee3 --- /dev/null +++ b/src/mo_optimization/NEED @@ -0,0 +1,7 @@ +two_body_rdm +hartree_fock +cipsi +davidson_undressed +selectors_full +generators_full +utils_trust_region diff --git a/src/mo_optimization/README.md b/src/mo_optimization/README.md new file mode 100644 index 00000000..94f29aee --- /dev/null +++ b/src/mo_optimization/README.md @@ -0,0 +1,74 @@ +# Orbital optimization + +## Methods +Different methods are available: +- full hessian +``` +qp set orbital_optimization optimization_method full +``` +- diagonal hessian +``` +qp set orbital_optimization optimization_method diag +``` +- identity matrix +``` +qp set orbital_optimization optimization_method none +``` + +After the optimization the ezfio contains the optimized orbitals + +## For a fixed number of determinants +To optimize the MOs for the actual determinants: +``` +qp run orb_opt +``` + +## For a complete optimization, i.e, with a larger and larger wave function +To optimize the MOs with a larger and larger wave function: +``` +qp run optimization +``` + +The results are stored in the EZFIO in "mo_optimization/result_opt", +with the following format: +(1) (2) (3) (4) +1: Number of determinants in the wf, +2: Cispi energy before the optimization, +3: Cipsi energy after the optimization, +4: Energy difference between (2) and (3). + +The optimization process if the following: +- we do a first cipsi step to obtain a small number of determinants in the wf +- we run an orbital optimization for this wf +- we do a new cipsi step to double the number of determinants in the wf +- we run an orbital optimization for this wf +- ... +- we do that until the energy difference between (2) and (3) is + smaller than the targeted accuracy for the cispi (targeted_accuracy_cipsi in qp edit) + or the wf is larger than a given size (n_det_max_opt in qp_edit) +- after that you can reset your determinants (qp reset -d) and run a clean Cispi calculation + +### End of the optimization +You can choos the number of determinants after what the +optimization will stop: +``` +qp set orbital_optimization n_det_max_opt 1e5 # or any number +``` +## Weight of the states +You can change the weights of the differents states directly in qp edit. +It will affect ths weights used in the orbital optimization. + +# Tests +To run the tests: +``` +qp test +``` + +# Org files +The org files are stored in the directory org in order to avoid overwriting on user changes. +The org files can be modified, to export the change to the source code, run +``` +./TANGLE_org_mode.sh +mv *.irp.f ../. +``` + diff --git a/src/mo_optimization/class.irp.f b/src/mo_optimization/class.irp.f new file mode 100644 index 00000000..b4a68ac2 --- /dev/null +++ b/src/mo_optimization/class.irp.f @@ -0,0 +1,12 @@ + BEGIN_PROVIDER [ logical, do_only_1h1p ] +&BEGIN_PROVIDER [ logical, do_only_cas ] +&BEGIN_PROVIDER [ logical, do_ddci ] + implicit none + BEGIN_DOC + ! In the FCI case, all those are always false + END_DOC + do_only_1h1p = .False. + do_only_cas = .False. + do_ddci = .False. +END_PROVIDER + diff --git a/src/mo_optimization/constants.h b/src/mo_optimization/constants.h new file mode 100644 index 00000000..1cd00bda --- /dev/null +++ b/src/mo_optimization/constants.h @@ -0,0 +1 @@ + logical, parameter :: debug=.False. diff --git a/src/mo_optimization/debug_gradient_list_opt.irp.f b/src/mo_optimization/debug_gradient_list_opt.irp.f new file mode 100644 index 00000000..867e0105 --- /dev/null +++ b/src/mo_optimization/debug_gradient_list_opt.irp.f @@ -0,0 +1,78 @@ +! Debug the gradient + +! *Program to check the gradient* + +! The program compares the result of the first and last code for the +! gradient. + +! Provided: +! | mo_num | integer | number of MOs | + +! Internal: +! | n | integer | number of orbitals pairs (p,q) p threshold) then + print*,i,v_grad(i) + nb_error = nb_error + 1 + + if (ABS(v_grad(i)) > max_error) then + max_error = v_grad(i) + endif + + endif + enddo + + print*,'' + print*,'Check the gradient' + print*,'Threshold:', threshold + print*,'Nb error:', nb_error + print*,'Max error:', max_error + + ! Deallocation + deallocate(v_grad,v_grad2) + +end program diff --git a/src/mo_optimization/debug_gradient_opt.irp.f b/src/mo_optimization/debug_gradient_opt.irp.f new file mode 100644 index 00000000..8aeec18f --- /dev/null +++ b/src/mo_optimization/debug_gradient_opt.irp.f @@ -0,0 +1,76 @@ +! Debug the gradient + +! *Program to check the gradient* + +! The program compares the result of the first and last code for the +! gradient. + +! Provided: +! | mo_num | integer | number of MOs | + +! Internal: +! | n | integer | number of orbitals pairs (p,q) p threshold) then + print*,v_grad(i) + nb_error = nb_error + 1 + + if (ABS(v_grad(i)) > max_error) then + max_error = v_grad(i) + endif + + endif + enddo + + print*,'' + print*,'Check the gradient' + print*,'Threshold :', threshold + print*,'Nb error :', nb_error + print*,'Max error :', max_error + + ! Deallocation + deallocate(v_grad,v_grad2) + +end program diff --git a/src/mo_optimization/debug_hessian_list_opt.irp.f b/src/mo_optimization/debug_hessian_list_opt.irp.f new file mode 100644 index 00000000..d1aa79c4 --- /dev/null +++ b/src/mo_optimization/debug_hessian_list_opt.irp.f @@ -0,0 +1,147 @@ +! Debug the hessian + +! *Program to check the hessian matrix* + +! The program compares the result of the first and last code for the +! hessian. First of all the 4D hessian and after the 2D hessian. + +! Provided: +! | mo_num | integer | number of MOs | +! | optimization_method | string | Method for the orbital optimization: | +! | | | - 'full' -> full hessian | +! | | | - 'diag' -> diagonal hessian | +! | dim_list_act_orb | integer | number of active MOs | +! | list_act(dim_list_act_orb) | integer | list of the actives MOs | +! | | | | + +! Internal: +! | m | integer | number of MOs in the list | +! | | | (active MOs) | +! | n | integer | number of orbitals pairs (p,q) p threshold) then + print*,h_f(i,j,k,l) + nb_error = nb_error + 1 + if (ABS(h_f(i,j,k,l)) > ABS(max_error)) then + max_error = h_f(i,j,k,l) + endif + endif + enddo + enddo + enddo + enddo + + max_error_H = 0d0 + nb_error_H = 0 + + do j = 1, n + do i = 1, n + if (ABS(H(i,j)) > threshold) then + print*, H(i,j) + nb_error_H = nb_error_H + 1 + + if (ABS(H(i,j)) > ABS(max_error_H)) then + max_error_H = H(i,j) + endif + + endif + enddo + enddo + + ! Deallocation + deallocate(H, H2, h_f, h_f2) + + else + + print*, 'Use the diagonal hessian matrix' + allocate(H(n,1),H2(n,1)) + call diag_hessian_list_opt(n,m,list_act,H) + call first_diag_hessian_list_opt(n,m,list_act,H2) + + H = H - H2 + + max_error_H = 0d0 + nb_error_H = 0 + + do i = 1, n + if (ABS(H(i,1)) > threshold) then + print*, H(i,1) + nb_error_H = nb_error_H + 1 + + if (ABS(H(i,1)) > ABS(max_error_H)) then + max_error_H = H(i,1) + endif + + endif + enddo + + endif + + print*,'' + if (optimization_method == 'full') then + print*,'Check of the full hessian' + print*,'Threshold:', threshold + print*,'Nb error:', nb_error + print*,'Max error:', max_error + print*,'' + else + print*,'Check of the diagonal hessian' + endif + + print*,'Nb error_H:', nb_error_H + print*,'Max error_H:', max_error_H + +end program diff --git a/src/mo_optimization/debug_hessian_opt.irp.f b/src/mo_optimization/debug_hessian_opt.irp.f new file mode 100644 index 00000000..6d22cc01 --- /dev/null +++ b/src/mo_optimization/debug_hessian_opt.irp.f @@ -0,0 +1,171 @@ +! Debug the hessian + +! *Program to check the hessian matrix* + +! The program compares the result of the first and last code for the +! hessian. First of all the 4D hessian and after the 2D hessian. + +! Provided: +! | mo_num | integer | number of MOs | + +! Internal: +! | n | integer | number of orbitals pairs (p,q) p threshold) then + print*,h_f(i,j,k,l) + nb_error = nb_error + 1 + if (ABS(h_f(i,j,k,l)) > ABS(max_error)) then + max_error = h_f(i,j,k,l) + endif + endif + enddo + enddo + enddo + enddo + + max_error_H = 0d0 + nb_error_H = 0 + + do j = 1, n + do i = 1, n + if (ABS(H(i,j)) > threshold) then + print*, H(i,j) + nb_error_H = nb_error_H + 1 + + if (ABS(H(i,j)) > ABS(max_error_H)) then + max_error_H = H(i,j) + endif + + endif + enddo + enddo + + elseif (optimization_method == 'diag') then + + print*, 'Use the diagonal hessian matrix' + call diag_hessian_opt(n,H,h_f) + call first_diag_hessian_opt(n,H2,h_f2) + + h_f = h_f - h_f2 + max_error = 0d0 + nb_error = 0 + threshold = 1d-12 + + do l = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do i = 1, mo_num + + if (ABS(h_f(i,j,k,l)) > threshold) then + + print*,h_f(i,j,k,l) + nb_error = nb_error + 1 + + if (ABS(h_f(i,j,k,l)) > ABS(max_error)) then + max_error = h_f(i,j,k,l) + endif + + endif + + enddo + enddo + enddo + enddo + + h=H-H2 + + max_error_H = 0d0 + nb_error_H = 0 + + do j = 1, n + do i = 1, n + if (ABS(H(i,j)) > threshold) then + print*, H(i,j) + nb_error_H = nb_error_H + 1 + + if (ABS(H(i,j)) > ABS(max_error_H)) then + max_error_H = H(i,j) + endif + + endif + enddo + enddo + + else + print*,'Unknown optimization_method, please select full, diag' + call abort + endif + + print*,'' + if (optimization_method == 'full') then + print*,'Check the full hessian' + else + print*,'Check the diagonal hessian' + endif + + print*,'Threshold :', threshold + print*,'Nb error :', nb_error + print*,'Max error :', max_error + print*,'' + print*,'Nb error_H :', nb_error_H + print*,'Max error_H :', max_error_H + + ! Deallocation + deallocate(H,H2,h_f,h_f2) + +end program diff --git a/src/mo_optimization/diagonal_hessian_list_opt.irp.f b/src/mo_optimization/diagonal_hessian_list_opt.irp.f new file mode 100644 index 00000000..fe54fa7a --- /dev/null +++ b/src/mo_optimization/diagonal_hessian_list_opt.irp.f @@ -0,0 +1,1556 @@ +! Diagonal hessian + +! The hessian of the CI energy with respects to the orbital rotation is : +! (C-c C-x C-l) + +! \begin{align*} +! H_{pq,rs} &= \dfrac{\partial^2 E(x)}{\partial x_{pq}^2} \\ +! &= \mathcal{P}_{pq} \mathcal{P}_{rs} [ \frac{1}{2} \sum_u [\delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) +! + \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u)] +! -(h_p^s \gamma_r^q + h_r^q \gamma_p^s) \\ +! &+ \frac{1}{2} \sum_{tuv} [\delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) +! + \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt})] \\ +! &+ \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) +! - \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +! \end{align*} +! With pq a permutation operator : + +! \begin{align*} +! \mathcal{P}_{pq}= 1 - (p \leftrightarrow q) +! \end{align*} +! \begin{align*} +! \mathcal{P}_{pq} \mathcal{P}_{rs} &= (1 - (p \leftrightarrow q))(1 - (r \leftrightarrow s)) \\ +! &= 1 - (p \leftrightarrow q) - (r \leftrightarrow s) + (p \leftrightarrow q, r \leftrightarrow s) +! \end{align*} + +! Where p,q,r,s,t,u,v are general spatial orbitals +! mo_num : the number of molecular orbitals +! $$h$$ : One electron integrals +! $$\gamma$$ : One body density matrix (state average in our case) +! $$v$$ : Two electron integrals +! $$\Gamma$$ : Two body density matrice (state average in our case) + +! Source : +! Seniority-based coupled cluster theory +! J. Chem. Phys. 141, 244104 (2014); https://doi.org/10.1063/1.4904384 +! Thomas M. Henderson, Ireneusz W. Bulik, Tamar Stein, and Gustavo E. Scuseria + +! Here for the diagonal of the hessian it's a little more complicated +! than for the hessian. It's not just compute the diagonal terms of the +! hessian because of the permutations. + +! The hessian is (p,q,r,s), so the diagonal terms are (p,q,p,q). But +! with the permutations : p <-> q, r <-> s, p <-> q and r <-> s, we have +! a diagonal term, if : +! p = r and q = s, => (p,q,p,q) +! or +! q = r and p = s, => (p,q,q,p) + +! For that reason, we will use 2D temporary arrays to store the +! elements. One for the terms (p,q,p,q) and an other for the terms of +! kind (p,q,q,p). We will also use a 1D temporary array to store the +! terms of the kind (p,p,p,p) due to the kronoecker delta. + +! *Compute the diagonal hessian of energy with respects to orbital +! rotations* +! By diagonal hessian we mean, diagonal elements of the hessian + +! Provided: +! | mo_num | integer | number of MOs | +! | mo_one_e_integrals(mo_num,mo_num) | double precision | mono-electronic integrals | +! | one_e_dm_mo(mo_num,mo_num) | double precision | one e- density matrix (state average) | +! | two_e_dm_mo(mo_num,mo_num,mo_num) | double precision | two e- density matrix (state average) | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | + +! Output: +! | H(n,n) | double precision | Hessian matrix | +! | h_tmpr(mo_num,mo_num,mo_num,mo_num) | double precision | Complete hessian matrix before the tranformation | +! | | | in n by n matrix | + +! Internal: +! | hessian(mo_num,mo_num,mo_num,mo_num) | double precision | temporary array containing the hessian before | +! | | | the permutations | +! | p, q, r, s | integer | indexes of the hessian elements | +! | t, u, v | integer | indexes for the sums | +! | pq, rs | integer | indexes for the transformation of the hessian | +! | | | (4D -> 2D) | +! | t1,t2,t3 | double precision | time to compute the hessian | +! | t4,t5,t6 | double precision | time to compute the differ each element | +! | tmp_bi_int_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals (private) | +! | tmp_bi_int_3_shared(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals (shared) | +! | tmp_2rdm_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix (private) | +! | tmp_2rdm_3_shared(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix (shared) | +! | tmp_accu(mo_num,mo_num) | double precision | temporary array (private) | +! | tmp_accu_shared(mo_num,mo_num) | double precision | temporary array (shared) | +! | tmp_accu_1(mo_num) | double precision | temporary array (private) | +! | tmp_accu_1_shared(mo_num) | double precision | temporary array (shared) | +! | tmp_h_pppp(mo_num) | double precision | matrix containing the hessien elements hessian(p,p,p,p) | +! | tmp_h_pqpq(mo_num,mo_num) | double precision | matrix containing the hessien elements hessian(p,q,p,q) | +! | tmp_h_pqqp(mo_num,mo_num) | double precision | matrix containing the hessien elements hessian(p,q,q,p) | + +! Function: +! | get_two_e_integral | double precision | bi-electronic integrals | + + +subroutine diag_hessian_list_opt(n, m, list, H)!, h_tmpr) + + use omp_lib + + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n, m, list(m) + + ! out + double precision, intent(out) :: H(n)!, h_tmpr(m,m,m,m) + + ! internal + !double precision, allocatable :: !hessian(:,:,:,:)!, h_tmpr(:,:,:,:) + integer :: p,q,k + integer :: r,s,t,u,v + integer :: pq,rs + integer :: tmp_p,tmp_q,tmp_r,tmp_s,tmp_pq,tmp_rs + double precision :: t1,t2,t3,t4,t5,t6 + double precision, allocatable :: tmp_bi_int_3(:,:,:),tmp_bi_int_3_shared(:,:,:) + double precision, allocatable :: tmp_2rdm_3(:,:,:),tmp_2rdm_3_shared(:,:,:) + double precision, allocatable :: tmp_accu(:,:) + double precision, allocatable :: tmp_accu_shared(:,:), tmp_accu_1_shared(:) + double precision, allocatable :: tmp_h_pppp(:), tmp_h_pqpq(:,:), tmp_h_pqqp(:,:) + + ! Function + double precision :: get_two_e_integral + + print*,'' + print*,'--- Diagonal_hessian_list_opt---' + + ! Allocation of shared arrays + !allocate(hessian(m,m,m,m))!,h_tmpr(mo_num,mo_num,mo_num,mo_num)) + allocate(tmp_h_pppp(m),tmp_h_pqpq(m,m),tmp_h_pqqp(m,m)) + allocate(tmp_2rdm_3_shared(mo_num,mo_num,m)) + allocate(tmp_bi_int_3_shared(mo_num,mo_num,m)) + allocate(tmp_accu_1_shared(m),tmp_accu_shared(m,m)) + + ! OMP + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE( & + !$OMP p,q,r,s, tmp_accu,k, & + !$OMP u,v,t, tmp_bi_int_3, tmp_2rdm_3, & + !$OMP tmp_p,tmp_q,tmp_r,tmp_s) & + !$OMP SHARED(H, tmp_h_pppp, tmp_h_pqpq, tmp_h_pqqp, & + !$OMP mo_num,n,m, mo_one_e_integrals, one_e_dm_mo, list, & + !$OMP tmp_bi_int_3_shared, tmp_2rdm_3_shared,tmp_accu_shared, & + !$OMP tmp_accu_1_shared,two_e_dm_mo,mo_integrals_map,t1,t2,t3,t4,t5,t6) & + !$OMP DEFAULT(NONE) + + ! Allocation of the private arrays + allocate(tmp_accu(m,m)) + +! Initialization of the arrays + +!!$OMP DO +!do tmp_s = 1,m +! do tmp_r = 1, m +! do tmp_q = 1, m +! do tmp_p = 1, m +! hessian(tmp_p,tmp_q,tmp_r,tmp_s) = 0d0 +! enddo +! enddo +! enddo +!enddo +!!$OMP END DO + +!$OMP DO +do tmp_p = 1, m + tmp_h_pppp(tmp_p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + tmp_h_pqpq(tmp_p,tmp_q) = 0d0 + enddo +enddo +!$OMP END DO + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + tmp_h_pqqp(tmp_p,tmp_q) = 0d0 + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t1) +!$OMP END MASTER + +! Line 1, term 1 + +! \begin{align*} +! \frac{1}{2} \sum_u \delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + +! if (q==r) then +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & +! + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + +! enddo +! endif +! endif +! enddo +! enddo +! enddo +! enddo + +! With optimization : + +! *Part 1 : p=r and q=s and q=r* + +! hessian(p,q,r,s) -> hessian(p,p,p,p) + +! 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & +! + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) +! = +! 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & +! + mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) +! = +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) + & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo +enddo +!$OMP END DO + +!$OMP DO +do tmp_p = 1, m + tmp_h_pppp(tmp_p) = tmp_h_pppp(tmp_p) + tmp_accu_1_shared(tmp_p) +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s and q=r* + +! hessian(p,q,r,s) -> hessian(p,q,q,p) + +! 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & +! + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) +! = +! 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & +! + mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) +! = +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + +!$OMP DO +do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) + & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo +enddo +!$OMP END DO + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) + tmp_accu_1_shared(tmp_p) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6= t5-t4 +print*,'l1 1',t6 +!$OMP END MASTER + +! Line 1, term 2 + +! \begin{align*} +! \frac{1}{2} \sum_u \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + +! if (p==s) then +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & +! + mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) +! enddo +! endif +! endif +! enddo +! enddo +! enddo +! enddo + +! *Part 1 : p=r and q=s and p=s* + +! hessian(p,q,r,s) -> hessian(p,p,p,p) + +! 0.5d0 * (& +! mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & +! + mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) +! = +! 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & +! + mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) +! = +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) + & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo +enddo +!$OMP END DO + +!$OMP DO +do tmp_p = 1, m + tmp_h_pppp(tmp_p) = tmp_h_pppp(tmp_p) + tmp_accu_1_shared(tmp_p) +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s and p=s* + +! hessian(p,q,r,s) -> hessian(p,q,q,p) + +! 0.5d0 * (& +! mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & +! + mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) +! = +! 0.5d0 * ( & +! mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) & +! + mo_one_e_integrals(q,u) * one_e_dm_mo(q,u)) +! = +! mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) + + +!$OMP DO +do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do tmp_q = 1, m + q = list(tmp_q) + do u = 1, mo_num + + tmp_accu_1_shared(tmp_q) = tmp_accu_1_shared(tmp_q) + & + mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) + + enddo +enddo +!$OMP END DO + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) + tmp_accu_1_shared(tmp_q) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6= t5-t4 +print*,'l1 2',t6 +!$OMP END MASTER + +! Line 1, term 3 + +! \begin{align*} +! -(h_p^s \gamma_r^q + h_r^q \gamma_p^s) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! - mo_one_e_integrals(s,p) * one_e_rdm_mo(r,q) & +! - mo_one_e_integrals(q,r) * one_e_rdm_mo(p,s) + +! endif +! enddo +! enddo +! enddo +! enddo + +! With optimization : + +! *Part 1 : p=r and q=s* + +! hessian(p,q,r,s) -> hessian(p,q,p,q) + +! - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & +! - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) +! = +! - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) & +! - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) +! = +! - 2d0 mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do tmp_q = 1, m + q = list(tmp_q) + do tmp_p = 1, m + p = list(tmp_p) + + tmp_h_pqpq(tmp_p,tmp_q) = tmp_h_pqpq(tmp_p,tmp_q) & + - 2d0 * mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + + enddo +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s* + +! hessian(p,q,r,s) -> hessian(p,q,p,q) + +! - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & +! - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) +! = +! - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) & +! - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) +! = +! - 2d0 mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + + +!$OMP DO +do tmp_q = 1, m + q = list(tmp_q) + do tmp_p = 1, m + p = list(tmp_p) + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) & + - 2d0 * mo_one_e_integrals(p,p) * one_e_dm_mo(q,q) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6= t5-t4 +print*,'l1 3',t6 +!$OMP END MASTER + +! Line 2, term 1 + +! \begin{align*} +! \frac{1}{2} \sum_{tuv} \delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + +! if (q==r) then +! do t = 1, mo_num +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & +! + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + +! enddo +! enddo +! enddo +! endif +! endif +! enddo +! enddo +! enddo +! enddo + +! With optimization : + +! *Part 1 : p=r and q=s and q=r* + +! hessian(p,q,r,s) -> hessian(p,p,p,p) + +! 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & +! + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) +! = +! 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) & +! + get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) +! = +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +! Just re-order the index and use 3D temporary arrays for optimal memory +! accesses. + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +allocate(tmp_bi_int_3(mo_num, mo_num, m),tmp_2rdm_3(mo_num, mo_num, m)) + +!$OMP DO +do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do t = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,tmp_p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,tmp_p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_p = 1, m + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) & + + tmp_bi_int_3(u,v,tmp_p) * tmp_2rdm_3(u,v,tmp_p) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do tmp_p = 1, m + tmp_h_pppp(tmp_p) = tmp_h_pppp(tmp_p) + tmp_accu_1_shared(tmp_p) +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s and q=r* + +! hessian(p,q,r,s) -> hessian(p,q,q,p) + +! 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & +! + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) +! = +! 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) & +! + get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) +! = +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +! Just re-order the index and use 3D temporary arrays for optimal memory +! accesses. + + +!$OMP DO +do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do t = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,tmp_p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,tmp_p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_p = 1, m + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) + & + tmp_bi_int_3(u,v,tmp_p) * tmp_2rdm_3(u,v,tmp_p) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) + tmp_accu_1_shared(tmp_p) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6 = t5-t4 +print*,'l2 1',t6 +!$OMP END MASTER + +! Line 2, term 2 + +! \begin{align*} +! \frac{1}{2} \sum_{tuv} \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt}) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + +! if (p==s) then +! do t = 1, mo_num +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & +! + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + +! enddo +! enddo +! enddo +! endif +! endif +! enddo +! enddo +! enddo +! enddo + +! With optimization : + +! *Part 1 : p=r and q=s and p=s* + +! hessian(p,q,r,s) -> hessian(p,p,p,p) + +! 0.5d0 * ( & +! get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & +! + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) +! = +! 0.5d0 * ( & +! get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v) & +! + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t)) +! = +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +! Just re-order the index and use 3D temporary arrays for optimal memory +! accesses. + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do t = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,tmp_p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,tmp_p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_p = 1, m + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) +& + tmp_bi_int_3(u,v,tmp_p) * tmp_2rdm_3(u,v,tmp_p) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do tmp_p = 1, m + tmp_h_pppp(tmp_p) = tmp_h_pppp(tmp_p) + tmp_accu_1_shared(tmp_p) +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s and p=s* + +! hessian(p,q,r,s) -> hessian(p,q,q,p) + +! 0.5d0 * ( & +! get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & +! + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) +! = +! 0.5d0 * ( & +! get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(q,t,u,v) & +! + get_two_e_integral(u,v,q,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) +! = +! get_two_e_integral(u,v,q,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t) + +! Just re-order the index and use 3D temporary arrays for optimal memory +! accesses. + + +!$OMP DO +do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do t = 1, mo_num + + do tmp_q = 1, m + q = list(tmp_q) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,tmp_q) = get_two_e_integral(u,v,q,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_q = 1, m + q = list(tmp_q) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,tmp_q) = two_e_dm_mo(u,v,q,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_q = 1, m + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(tmp_q) = tmp_accu_1_shared(tmp_q) +& + tmp_bi_int_3(u,v,tmp_q) * tmp_2rdm_3(u,v,tmp_q) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) + tmp_accu_1_shared(tmp_p) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6 = t5-t4 +print*,'l2 2',t6 +!$OMP END MASTER + +! Line 3, term 1 + +! \begin{align*} +! \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)))) then + +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & +! + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + +! enddo +! enddo +! endif + +! enddo +! enddo +! enddo +! enddo + +! With optimization + +! *Part 1 : p=r and q=s* + +! hessian(p,q,r,s) -> hessian(p,q,p,q) + +! get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & +! + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) +! = +! get_two_e_integral(u,v,p,p,mo_integrals_map) * two_e_dm_mo(u,v,q,q) & +! + get_two_e_integral(q,q,u,v,mo_integrals_map) * two_e_dm_mo(p,p,u,v) +! = +! 2d0 * get_two_e_integral(u,v,p,p,mo_integrals_map) * two_e_dm_mo(u,v,q,q) + +! Arrays of the kind (u,v,p,p) can be transform in 4D arrays (u,v,p). +! Using u,v as one variable a matrix multiplication appears. +! $$c_{p,q} = \sum_{uv} a_{p,uv} b_{uv,q}$$ + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do tmp_q = 1, m + q = list(tmp_q) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_shared(u,v,tmp_q) = two_e_dm_mo(u,v,q,q) + + enddo + enddo +enddo +!$OMP END DO + +!$OMP DO +do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_shared(u,v,tmp_p) = get_two_e_integral(u,v,p,p,mo_integrals_map) + + enddo + enddo +enddo +!$OMP END DO + +call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_bi_int_3_shared,& + mo_num*mo_num, tmp_2rdm_3_shared, mo_num*mo_num, 0d0, tmp_accu, m) + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqpq(tmp_p,tmp_q) = tmp_h_pqpq(tmp_p,tmp_q) + tmp_accu(tmp_p,tmp_q) + tmp_accu(tmp_q,tmp_p) + + enddo +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s* + +! hessian(p,q,r,s) -> hessian(p,q,q,p) + +! get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & +! + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) +! = +! get_two_e_integral(u,v,p,q,mo_integrals_map) * two_e_dm_mo(u,v,q,p) & +! + get_two_e_integral(q,p,u,v,mo_integrals_map) * two_e_dm_mo(p,q,u,v) +! = +! 2d0 * get_two_e_integral(u,v,p,q,mo_integrals_map) * two_e_dm_mo(u,v,q,p) + +! Just re-order the indexes and use 3D temporary arrays for optimal +! memory accesses. + + +!$OMP MASTER +call wall_time(t4) +!$OMP END MASTER + +!$OMP DO +do tmp_q = 1, m + q = list(tmp_q) + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,tmp_p) = 2d0 * get_two_e_integral(u,v,q,p,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,tmp_p) = two_e_dm_mo(u,v,p,q) + + enddo + enddo + enddo + + do tmp_p = 1, m + do v = 1, mo_num + do u = 1, mo_num + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) & + + tmp_bi_int_3(u,v,tmp_p) * tmp_2rdm_3(u,v,tmp_p) + + enddo + enddo + enddo + +enddo +!$OMP END DO + +deallocate(tmp_bi_int_3,tmp_2rdm_3) + +!$OMP MASTER +CALL wall_TIME(t5) +t6= t5-t4 +print*,'l3 1',t6 +!$OMP END MASTER + +! Line 3, term 2 + +! \begin{align*} +! - \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & +! .or. ((p==s) .and. (q==r))) then + +! do t = 1, mo_num +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & +! - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & +! - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & +! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + +! enddo +! enddo + +! endif + +! enddo +! enddo +! enddo +! enddo + +! With optimization : + +! *Part 1 : p=r and q=s* + +! hessian(p,q,r,s) -> hessian(p,q,p,q) + +! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & +! - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & +! - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & +! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) +! = +! - get_two_e_integral(q,t,p,u,mo_integrals_map) * two_e_dm_mo(p,t,q,u) & +! - get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) & +! - get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(p,u,q,t) & +! - get_two_e_integral(q,u,t,p,mo_integrals_map) * two_e_dm_mo(p,u,t,q) +! = +! - 2d0 * get_two_e_integral(q,t,p,u,mo_integrals_map) * two_e_dm_mo(p,t,q,u) & +! - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) +! = +! - 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(q,u,p,t) & +! - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) + +! Just re-order the indexes and use 3D temporary arrays for optimal +! memory accesses. + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!---------- +! Part 1.1 +!---------- +! - 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(q,u,p,t) + +allocate(tmp_bi_int_3(m, mo_num, m), tmp_2rdm_3(m, mo_num, m)) + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + tmp_accu_shared(tmp_p,tmp_q) = 0d0 + enddo +enddo +!$OMP END DO + +!$OMP DO +do t = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + do tmp_q = 1, m + q = list(tmp_q) + + tmp_bi_int_3(tmp_q,u,tmp_p) = 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + do tmp_q = 1, m + q = list(tmp_q) + + tmp_2rdm_3(tmp_q,u,tmp_p) = two_e_dm_mo(q,u,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_p = 1, m + do u = 1, mo_num + do tmp_q = 1, m + + tmp_accu_shared(tmp_p,tmp_q) = tmp_accu_shared(tmp_p,tmp_q) & + - tmp_bi_int_3(tmp_q,u,tmp_p) * tmp_2rdm_3(tmp_q,u,tmp_p) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqpq(tmp_p,tmp_q) = tmp_h_pqpq(tmp_p,tmp_q) + tmp_accu_shared(tmp_p,tmp_q) + + enddo +enddo +!$OMP END DO + +deallocate(tmp_bi_int_3, tmp_2rdm_3) + + + +! Just re-order the indexes and use 3D temporary arrays for optimal +! memory accesses. + + +!-------- +! Part 1.2 +!-------- +! - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) + +allocate(tmp_bi_int_3(mo_num, m, m),tmp_2rdm_3(mo_num, m, m)) + + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + tmp_accu_shared(tmp_p,tmp_q) = 0d0 + enddo +enddo +!$OMP END DO + +!$OMP DO +do u = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do t = 1, mo_num + + tmp_bi_int_3(t,tmp_q,tmp_p) = 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p= 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do t = 1, mo_num + + tmp_2rdm_3(t,tmp_q,tmp_p) = two_e_dm_mo(t,p,q,u) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_q = 1, m + do tmp_p = 1, m + do t = 1, mo_num + + tmp_accu_shared(tmp_p,tmp_q) = tmp_accu_shared(tmp_p,tmp_q) & + - tmp_bi_int_3(t,tmp_q,tmp_p) * tmp_2rdm_3(t,tmp_q,tmp_p) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqpq(tmp_p,tmp_q) = tmp_h_pqpq(tmp_p,tmp_q) + tmp_accu_shared(tmp_p,tmp_q) + + enddo +enddo +!$OMP END DO + +deallocate(tmp_bi_int_3,tmp_2rdm_3) + + + +! *Part 2 : q=r and p=s* + +! hessian(p,q,r,s) -> hessian(p,q,p,q) + +! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & +! - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & +! - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & +! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) +! = +! - get_two_e_integral(p,t,p,u,mo_integrals_map) * two_e_dm_mo(q,t,q,u) & +! - get_two_e_integral(t,p,p,u,mo_integrals_map) * two_e_dm_mo(t,q,q,u) & +! - get_two_e_integral(q,u,q,t,mo_integrals_map) * two_e_dm_mo(p,u,p,t) & +! - get_two_e_integral(q,u,t,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) +! = +! - get_two_e_integral(p,t,p,u,mo_integrals_map) * two_e_dm_mo(q,t,q,u) & +! - get_two_e_integral(q,t,q,u,mo_integrals_map) * two_e_dm_mo(p,t,p,u) & + +! - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(t,q,q,u) & +! - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(t,p,p,u) +! = +! - get_two_e_integral(t,p,u,p,mo_integrals_map) * two_e_dm_mo(t,q,u,q) & +! - get_two_e_integral(t,q,u,q,mo_integrals_map) * two_e_dm_mo(p,t,p,u) & + +! - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(q,u,t,q) & +! - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) + +! Arrays of the kind (t,p,u,p) can be transformed in 3D arrays. By doing +! so and using t,u as one variable, a matrix multiplication appears : +! $$c_{p,q} = \sum_{tu} a_{p,tu} b_{tu,q}$$ + + +!---------- +! Part 2.1 +!---------- +! - get_two_e_integral(t,p,u,p,mo_integrals_map) * two_e_dm_mo(t,q,u,q) & +! - get_two_e_integral(t,q,u,q,mo_integrals_map) * two_e_dm_mo(p,t,p,u) + +!$OMP DO +do tmp_q = 1, m + q = list(tmp_q) + do u = 1, mo_num + do t = 1, mo_num + + tmp_2rdm_3_shared(t,u,tmp_q) = two_e_dm_mo(t,q,u,q) + + enddo + enddo +enddo +!$OMP END DO + +!$OMP DO +do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + do t = 1, mo_num + + tmp_bi_int_3_shared(t,u,tmp_p) = get_two_e_integral(t,p,u,p,mo_integrals_map) + + enddo + enddo +enddo +!$OMP END DO + +call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_bi_int_3_shared,& + mo_num*mo_num, tmp_2rdm_3_shared, mo_num*mo_num, 0d0, tmp_accu, m) + +!$OMP DO +do tmp_p = 1, m + do tmp_q = 1, m + + tmp_h_pqqp(tmp_q,tmp_p) = tmp_h_pqqp(tmp_q,tmp_p) - tmp_accu(tmp_q,tmp_p) - tmp_accu(tmp_p,tmp_q) + + enddo +enddo +!$OMP END DO + + + +! Arrays of the kind (t,u,p,p) can be transformed in 3D arrays. By doing +! so and using t,u as one variable, a matrix multiplication appears : +! $$c_{p,q} = \sum_{tu} a_{p,tu} b_{tu,q}$$ + + +!-------- +! Part 2.2 +!-------- +! - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(q,u,t,q) & +! - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) + +!$OMP DO +do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + do t = 1, mo_num + + tmp_bi_int_3_shared(t,u,tmp_p) = get_two_e_integral(t,u,p,p,mo_integrals_map) + + enddo + enddo +enddo +!$OMP END DO + +!$OMP DO +do tmp_q = 1, m + q = list(tmp_q) + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_shared(u,t,tmp_q) = two_e_dm_mo(q,u,t,q) + + enddo + enddo +enddo +!$OMP END DO + +call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_2rdm_3_shared,& + mo_num*mo_num, tmp_bi_int_3_shared, mo_num*mo_num, 0d0, tmp_accu, m) + +!$OMP DO +do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) - tmp_accu(tmp_p,tmp_q) - tmp_accu(tmp_q,tmp_p) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6= t5-t4 +print*,'l3 2',t6 +!$OMP END MASTER + +!$OMP MASTER +CALL wall_TIME(t2) +t2 = t2 - t1 +print*, 'Time to compute the hessian :', t2 +!$OMP END MASTER + +! Deallocation of private arrays +! In the OMP section ! + +deallocate(tmp_accu) + +! Permutations +! As we mentioned before there are two permutation operator in the +! formula : +! Hessian(p,q,r,s) = P_pq P_rs [...] +! => Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + + +!!$OMP DO +!do tmp_p = 1, m +! hessian(tmp_p,tmp_p,tmp_p,tmp_p) = hessian(tmp_p,tmp_p,tmp_p,tmp_p) + tmp_h_pppp(tmp_p) +!enddo +!!$OMP END DO + +!!$OMP DO +!do tmp_q = 1, m +! do tmp_p = 1, m +! hessian(tmp_p,tmp_q,tmp_p,tmp_q) = hessian(tmp_p,tmp_q,tmp_p,tmp_q) + tmp_h_pqpq(tmp_p,tmp_q) +! enddo +!enddo +!!$OMP END DO +! +!!$OMP DO +!do tmp_q = 1, m +! do tmp_p = 1, m +! hessian(tmp_p,tmp_q,tmp_q,tmp_p) = hessian(tmp_p,tmp_q,tmp_q,tmp_p) + tmp_h_pqqp(tmp_p,tmp_q) +! enddo +!enddo +!!$OMP END DO + +!!$OMP DO +!do tmp_s = 1, m +! do tmp_r = 1, m +! do tmp_q = 1, m +! do tmp_p = 1, m + +! h_tmpr(tmp_p,tmp_q,tmp_r,tmp_s) = (hessian(tmp_p,tmp_q,tmp_r,tmp_s) - hessian(tmp_q,tmp_p,tmp_r,tmp_s) & +! - hessian(tmp_p,tmp_q,tmp_s,tmp_r) + hessian(tmp_q,tmp_p,tmp_s,tmp_r)) + +! enddo +! enddo +! enddo +!enddo +!!$OMP END DO + +! 4D -> 2D matrix +! We need a 2D matrix for the Newton method's. Since the Hessian is +! "antisymmetric" : $$H_{pq,rs} = -H_{rs,pq}$$ +! We can write it as a 2D matrix, N by N, with N = mo_num(mo_num-1)/2 +! with p 2D :',t6 +!!$OMP END MASTER + +!$OMP END PARALLEL +call omp_set_max_active_levels(4) + +! Display +!if (debug) then +! print*,'2D diag Hessian matrix' +! do tmp_pq = 1, n +! write(*,'(100(F10.5))') H(tmp_pq,:) +! enddo +!endif + +! Deallocation of shared arrays, end + + +!deallocate(hessian)!,h_tmpr) + deallocate(tmp_h_pppp,tmp_h_pqpq,tmp_h_pqqp) + deallocate(tmp_accu_1_shared, tmp_accu_shared) + + print*,'---End diagonal_hessian_list_opt---' + +end subroutine diff --git a/src/mo_optimization/diagonal_hessian_opt.irp.f b/src/mo_optimization/diagonal_hessian_opt.irp.f new file mode 100644 index 00000000..7688ec37 --- /dev/null +++ b/src/mo_optimization/diagonal_hessian_opt.irp.f @@ -0,0 +1,1511 @@ +! Diagonal hessian + +! The hessian of the CI energy with respects to the orbital rotation is : +! (C-c C-x C-l) + +! \begin{align*} +! H_{pq,rs} &= \dfrac{\partial^2 E(x)}{\partial x_{pq}^2} \\ +! &= \mathcal{P}_{pq} \mathcal{P}_{rs} [ \frac{1}{2} \sum_u [\delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) +! + \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u)] +! -(h_p^s \gamma_r^q + h_r^q \gamma_p^s) \\ +! &+ \frac{1}{2} \sum_{tuv} [\delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) +! + \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt})] \\ +! &+ \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) +! - \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +! \end{align*} +! With pq a permutation operator : + +! \begin{align*} +! \mathcal{P}_{pq}= 1 - (p \leftrightarrow q) +! \end{align*} +! \begin{align*} +! \mathcal{P}_{pq} \mathcal{P}_{rs} &= (1 - (p \leftrightarrow q))(1 - (r \leftrightarrow s)) \\ +! &= 1 - (p \leftrightarrow q) - (r \leftrightarrow s) + (p \leftrightarrow q, r \leftrightarrow s) +! \end{align*} + +! Where p,q,r,s,t,u,v are general spatial orbitals +! mo_num : the number of molecular orbitals +! $$h$$ : One electron integrals +! $$\gamma$$ : One body density matrix (state average in our case) +! $$v$$ : Two electron integrals +! $$\Gamma$$ : Two body density matrice (state average in our case) + +! Source : +! Seniority-based coupled cluster theory +! J. Chem. Phys. 141, 244104 (2014); https://doi.org/10.1063/1.4904384 +! Thomas M. Henderson, Ireneusz W. Bulik, Tamar Stein, and Gustavo E. Scuseria + +! Here for the diagonal of the hessian it's a little more complicated +! than for the hessian. It's not just compute the diagonal terms of the +! hessian because of the permutations. + +! The hessian is (p,q,r,s), so the diagonal terms are (p,q,p,q). But +! with the permutations : p <-> q, r <-> s, p <-> q and r <-> s, we have +! a diagonal term, if : +! p = r and q = s, => (p,q,p,q) +! or +! q = r and p = s, => (p,q,q,p) + +! For that reason, we will use 2D temporary arrays to store the +! elements. One for the terms (p,q,p,q) and an other for the terms of +! kind (p,q,q,p). We will also use a 1D temporary array to store the +! terms of the kind (p,p,p,p) due to the kronoecker delta. + +! *Compute the diagonal hessian of energy with respects to orbital +! rotations* +! By diagonal hessian we mean, diagonal elements of the hessian + +! Provided: +! | mo_num | integer | number of MOs | +! | mo_one_e_integrals(mo_num,mo_num) | double precision | mono-electronic integrals | +! | one_e_dm_mo(mo_num,mo_num) | double precision | one e- density matrix (state average) | +! | two_e_dm_mo(mo_num,mo_num,mo_num) | double precision | two e- density matrix (state average) | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | + +! Output: +! | H(n,n) | double precision | Hessian matrix | +! | h_tmpr(mo_num,mo_num,mo_num,mo_num) | double precision | Complete hessian matrix before the tranformation | +! | | | in n by n matrix | + +! Internal: +! | hessian(mo_num,mo_num,mo_num,mo_num) | double precision | temporary array containing the hessian before | +! | | | the permutations | +! | p, q, r, s | integer | indexes of the hessian elements | +! | t, u, v | integer | indexes for the sums | +! | pq, rs | integer | indexes for the transformation of the hessian | +! | | | (4D -> 2D) | +! | t1,t2,t3 | double precision | time to compute the hessian | +! | t4,t5,t6 | double precision | time to compute the differ each element | +! | tmp_bi_int_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals (private) | +! | tmp_bi_int_3_shared(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals (shared) | +! | tmp_2rdm_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix (private) | +! | tmp_2rdm_3_shared(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix (shared) | +! | tmp_accu(mo_num,mo_num) | double precision | temporary array (private) | +! | tmp_accu_shared(mo_num,mo_num) | double precision | temporary array (shared) | +! | tmp_accu_1(mo_num) | double precision | temporary array (private) | +! | tmp_accu_1_shared(mo_num) | double precision | temporary array (shared) | +! | tmp_h_pppp(mo_num) | double precision | matrix containing the hessien elements hessian(p,p,p,p) | +! | tmp_h_pqpq(mo_num,mo_num) | double precision | matrix containing the hessien elements hessian(p,q,p,q) | +! | tmp_h_pqqp(mo_num,mo_num) | double precision | matrix containing the hessien elements hessian(p,q,q,p) | + +! Function: +! | get_two_e_integral | double precision | bi-electronic integrals | + + +subroutine diag_hessian_opt(n,H)!, h_tmpr) + + use omp_lib + + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + + ! out + double precision, intent(out) :: H(n)!,n), h_tmpr(mo_num,mo_num,mo_num,mo_num) + + ! internal + !double precision, allocatable :: hessian(:,:,:,:)!, h_tmpr(:,:,:,:) + integer :: p,q,k + integer :: r,s,t,u,v + integer :: pq,rs + integer :: istate + double precision :: t1,t2,t3,t4,t5,t6 + double precision, allocatable :: tmp_bi_int_3(:,:,:),tmp_bi_int_3_shared(:,:,:) + double precision, allocatable :: tmp_2rdm_3(:,:,:),tmp_2rdm_3_shared(:,:,:) + double precision, allocatable :: tmp_accu(:,:) + double precision, allocatable :: tmp_accu_shared(:,:), tmp_accu_1_shared(:) + double precision, allocatable :: tmp_h_pppp(:), tmp_h_pqpq(:,:), tmp_h_pqqp(:,:) + + ! Function + double precision :: get_two_e_integral + + print*,'' + print*,'--- diagonal hessian---' + print*,'Use the diagonal hessian' + + ! Allocation of shared arrays + !allocate(hessian(mo_num,mo_num,mo_num,mo_num))!,h_tmpr(mo_num,mo_num,mo_num,mo_num)) + allocate(tmp_h_pppp(mo_num),tmp_h_pqpq(mo_num,mo_num),tmp_h_pqqp(mo_num,mo_num)) + allocate(tmp_2rdm_3_shared(mo_num,mo_num,mo_num)) + allocate(tmp_bi_int_3_shared(mo_num,mo_num,mo_num)) + allocate(tmp_accu_1_shared(mo_num),tmp_accu_shared(mo_num,mo_num)) + + ! OMP + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE( & + !$OMP p,q,r,s, tmp_accu,k, & + !$OMP u,v,t, tmp_bi_int_3, tmp_2rdm_3) & + !$OMP SHARED(H, tmp_h_pppp, tmp_h_pqpq, tmp_h_pqqp, & + !$OMP mo_num,n, mo_one_e_integrals, one_e_dm_mo, & + !$OMP tmp_bi_int_3_shared, tmp_2rdm_3_shared,tmp_accu_shared, & + !$OMP tmp_accu_1_shared,two_e_dm_mo,mo_integrals_map,t1,t2,t3,t4,t5,t6) & + !$OMP DEFAULT(NONE) + + ! Allocation of the private arrays + allocate(tmp_2rdm_3(mo_num,mo_num,mo_num),tmp_bi_int_3(mo_num,mo_num,mo_num)) + allocate(tmp_accu(mo_num,mo_num)) + +! Initialization of the arrays + +!!$OMP DO +!do s = 1,mo_num +! do r = 1, mo_num +! do q = 1, mo_num +! do p = 1, mo_num +! hessian(p,q,r,s) = 0d0 +! enddo +! enddo +! enddo +!enddo +!!$OMP END DO + +!$OMP DO +do p = 1, mo_num + tmp_h_pppp(p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + tmp_h_pqpq(p,q) = 0d0 + enddo +enddo +!$OMP END DO + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + tmp_h_pqqp(p,q) = 0d0 + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t1) +!$OMP END MASTER + +! Line 1, term 1 + +! \begin{align*} +! \frac{1}{2} \sum_u \delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + +! if (q==r) then +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & +! + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + +! enddo +! endif +! endif +! enddo +! enddo +! enddo +! enddo + +! With optimization : + +! *Part 1 : p=r and q=s and q=r* + +! hessian(p,q,r,s) -> hessian(p,p,p,p) + +! 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & +! + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) +! = +! 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & +! + mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) +! = +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do p = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo +enddo +!$OMP END DO + +!$OMP DO +do p = 1, mo_num + tmp_h_pppp(p) = tmp_h_pppp(p) + tmp_accu_1_shared(p) +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s and q=r* + +! hessian(p,q,r,s) -> hessian(p,q,q,p) + +! 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & +! + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) +! = +! 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & +! + mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) +! = +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + +!$OMP DO +do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do p = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo +enddo +!$OMP END DO + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) + tmp_accu_1_shared(p) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6= t5-t4 +print*,'l1 1',t6 +!$OMP END MASTER + +! Line 1, term 2 + +! \begin{align*} +! \frac{1}{2} \sum_u \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + +! if (p==s) then +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & +! + mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) +! enddo +! endif +! endif +! enddo +! enddo +! enddo +! enddo + +! *Part 1 : p=r and q=s and p=s* + +! hessian(p,q,r,s) -> hessian(p,p,p,p) + +! 0.5d0 * (& +! mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & +! + mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) +! = +! 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & +! + mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) +! = +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do p = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo +enddo +!$OMP END DO + +!$OMP DO +do p = 1, mo_num + + tmp_h_pppp(p) = tmp_h_pppp(p) + tmp_accu_1_shared(p) + +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s and p=s* + +! hessian(p,q,r,s) -> hessian(p,q,q,p) + +! 0.5d0 * (& +! mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & +! + mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) +! = +! 0.5d0 * ( & +! mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) & +! + mo_one_e_integrals(q,u) * one_e_dm_mo(q,u)) +! = +! mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) + + +!$OMP DO +do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do q = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(q) = tmp_accu_1_shared(q) + mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) + + enddo +enddo +!$OMP END DO + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) + tmp_accu_1_shared(q) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6= t5-t4 +print*,'l1 2',t6 +!$OMP END MASTER + +! Line 1, term 3 + +! \begin{align*} +! -(h_p^s \gamma_r^q + h_r^q \gamma_p^s) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! - mo_one_e_integrals(s,p) * one_e_rdm_mo(r,q) & +! - mo_one_e_integrals(q,r) * one_e_rdm_mo(p,s) + +! endif +! enddo +! enddo +! enddo +! enddo + +! With optimization : + +! *Part 1 : p=r and q=s* + +! hessian(p,q,r,s) -> hessian(p,q,p,q) + +! - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & +! - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) +! = +! - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) & +! - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) +! = +! - 2d0 mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqpq(p,q) = tmp_h_pqpq(p,q) & + - 2d0 * mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + + enddo +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s* + +! hessian(p,q,r,s) -> hessian(p,q,p,q) + +! - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & +! - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) +! = +! - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) & +! - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) +! = +! - 2d0 mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) & + - 2d0 * mo_one_e_integrals(p,p) * one_e_dm_mo(q,q) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6= t5-t4 +print*,'l1 3',t6 +!$OMP END MASTER + +! Line 2, term 1 + +! \begin{align*} +! \frac{1}{2} \sum_{tuv} \delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + +! if (q==r) then +! do t = 1, mo_num +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & +! + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + +! enddo +! enddo +! enddo +! endif +! endif +! enddo +! enddo +! enddo +! enddo + +! With optimization : + +! *Part 1 : p=r and q=s and q=r* + +! hessian(p,q,r,s) -> hessian(p,p,p,p) + +! 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & +! + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) +! = +! 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) & +! + get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) +! = +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +! Just re-order the index and use 3D temporary arrays for optimal memory +! accesses. + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do t = 1, mo_num + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) & + + tmp_bi_int_3(u,v,p) * tmp_2rdm_3(u,v,p) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do p =1, mo_num + + tmp_h_pppp(p) = tmp_h_pppp(p) + tmp_accu_1_shared(p) + +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s and q=r* + +! hessian(p,q,r,s) -> hessian(p,q,q,p) + +! 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & +! + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) +! = +! 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) & +! + get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) +! = +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +! Just re-order the index and use 3D temporary arrays for optimal memory +! accesses. + + +!$OMP DO +do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do t = 1, mo_num + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) + & + tmp_bi_int_3(u,v,p) * tmp_2rdm_3(u,v,p) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) + tmp_accu_1_shared(p) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6 = t5-t4 +print*,'l2 1',t6 +!$OMP END MASTER + +! Line 2, term 2 + +! \begin{align*} +! \frac{1}{2} \sum_{tuv} \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt}) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + +! if (p==s) then +! do t = 1, mo_num +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & +! + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + +! enddo +! enddo +! enddo +! endif +! endif +! enddo +! enddo +! enddo +! enddo + +! With optimization : + +! *Part 1 : p=r and q=s and p=s* + +! hessian(p,q,r,s) -> hessian(p,p,p,p) + +! 0.5d0 * ( & +! get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & +! + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) +! = +! 0.5d0 * ( & +! get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v) & +! + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t)) +! = +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +! Just re-order the index and use 3D temporary arrays for optimal memory +! accesses. + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do t = 1, mo_num + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) +& + tmp_bi_int_3(u,v,p) * tmp_2rdm_3(u,v,p) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do p = 1, mo_num + + tmp_h_pppp(p) = tmp_h_pppp(p) + tmp_accu_1_shared(p) + +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s and p=s* + +! hessian(p,q,r,s) -> hessian(p,q,q,p) + +! 0.5d0 * ( & +! get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & +! + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) +! = +! 0.5d0 * ( & +! get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(q,t,u,v) & +! + get_two_e_integral(u,v,q,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) +! = +! get_two_e_integral(u,v,q,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t) + +! Just re-order the index and use 3D temporary arrays for optimal memory +! accesses. + + +!$OMP DO +do p = 1,mo_num + tmp_accu_1_shared(p) = 0d0 +enddo +!$OMP END DO + +!$OMP DO +do t = 1, mo_num + + do q = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,q) = get_two_e_integral(u,v,q,t,mo_integrals_map) + + enddo + enddo + enddo + + do q = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,q) = two_e_dm_mo(u,v,q,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do q = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(q) = tmp_accu_1_shared(q) +& + tmp_bi_int_3(u,v,q) * tmp_2rdm_3(u,v,q) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) + tmp_accu_1_shared(p) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6 = t5-t4 +print*,'l2 2',t6 +!$OMP END MASTER + +! Line 3, term 1 + +! \begin{align*} +! \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)))) then + +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & +! + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + +! enddo +! enddo +! endif + +! enddo +! enddo +! enddo +! enddo + +! With optimization + +! *Part 1 : p=r and q=s* + +! hessian(p,q,r,s) -> hessian(p,q,p,q) + +! get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & +! + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) +! = +! get_two_e_integral(u,v,p,p,mo_integrals_map) * two_e_dm_mo(u,v,q,q) & +! + get_two_e_integral(q,q,u,v,mo_integrals_map) * two_e_dm_mo(p,p,u,v) +! = +! 2d0 * get_two_e_integral(u,v,p,p,mo_integrals_map) * two_e_dm_mo(u,v,q,q) + +! Arrays of the kind (u,v,p,p) can be transform in 4D arrays (u,v,p). +! Using u,v as one variable a matrix multiplication appears. +! $$c_{p,q} = \sum_{uv} a_{p,uv} b_{uv,q}$$ + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do q = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_shared(u,v,q) = two_e_dm_mo(u,v,q,q) + + enddo + enddo +enddo +!$OMP END DO + +!$OMP DO +do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_shared(u,v,p) = get_two_e_integral(u,v,p,p,mo_integrals_map) + + enddo + enddo +enddo +!$OMP END DO + +call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_bi_int_3_shared,& + mo_num*mo_num, tmp_2rdm_3_shared, mo_num*mo_num, 0d0, tmp_accu, mo_num) + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqpq(p,q) = tmp_h_pqpq(p,q) + tmp_accu(p,q) + tmp_accu(q,p) + + enddo +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s* + +! hessian(p,q,r,s) -> hessian(p,q,q,p) + +! get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & +! + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) +! = +! get_two_e_integral(u,v,p,q,mo_integrals_map) * two_e_dm_mo(u,v,q,p) & +! + get_two_e_integral(q,p,u,v,mo_integrals_map) * two_e_dm_mo(p,q,u,v) +! = +! 2d0 * get_two_e_integral(u,v,p,q,mo_integrals_map) * two_e_dm_mo(u,v,q,p) + +! Just re-order the indexes and use 3D temporary arrays for optimal +! memory accesses. + + +!$OMP MASTER +call wall_time(t4) +!$OMP END MASTER + +!$OMP DO +do q = 1, mo_num + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,p) = 2d0 * get_two_e_integral(u,v,q,p,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,p) = two_e_dm_mo(u,v,p,q) + + enddo + enddo + enddo + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) & + + tmp_bi_int_3(u,v,p) * tmp_2rdm_3(u,v,p) + + enddo + enddo + enddo + +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6= t5-t4 +print*,'l3 1',t6 +!$OMP END MASTER + +! Line 3, term 2 + +! \begin{align*} +! - \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +! \end{align*} + +! Without optimization : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! ! Permutations +! if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & +! .or. ((p==s) .and. (q==r))) then + +! do t = 1, mo_num +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & +! - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & +! - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & +! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + +! enddo +! enddo + +! endif + +! enddo +! enddo +! enddo +! enddo + +! With optimization : + +! *Part 1 : p=r and q=s* + +! hessian(p,q,r,s) -> hessian(p,q,p,q) + +! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & +! - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & +! - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & +! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) +! = +! - get_two_e_integral(q,t,p,u,mo_integrals_map) * two_e_dm_mo(p,t,q,u) & +! - get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) & +! - get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(p,u,q,t) & +! - get_two_e_integral(q,u,t,p,mo_integrals_map) * two_e_dm_mo(p,u,t,q) +! = +! - 2d0 * get_two_e_integral(q,t,p,u,mo_integrals_map) * two_e_dm_mo(p,t,q,u) & +! - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) +! = +! - 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(q,u,p,t) & +! - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) + +! Just re-order the indexes and use 3D temporary arrays for optimal +! memory accesses. + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!---------- +! Part 1.1 +!---------- +! - 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(q,u,p,t) + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + tmp_accu_shared(p,q) = 0d0 + enddo +enddo +!$OMP END DO + +!$OMP DO +do t = 1, mo_num + + do p = 1, mo_num + do u = 1, mo_num + do q = 1, mo_num + + tmp_bi_int_3(q,u,p) = 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + do u = 1, mo_num + do q = 1, mo_num + + tmp_2rdm_3(q,u,p) = two_e_dm_mo(q,u,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do p = 1, mo_num + do u = 1, mo_num + do q = 1, mo_num + + tmp_accu_shared(p,q) = tmp_accu_shared(p,q) & + - tmp_bi_int_3(q,u,p) * tmp_2rdm_3(q,u,p) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqpq(p,q) = tmp_h_pqpq(p,q) + tmp_accu_shared(p,q) + + enddo +enddo +!$OMP END DO + + + +! Just re-order the indexes and use 3D temporary arrays for optimal +! memory accesses. + + +!-------- +! Part 1.2 +!-------- +! - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + tmp_accu_shared(p,q) = 0d0 + enddo +enddo +!$OMP END DO + +!$OMP DO +do u = 1, mo_num + + do p = 1, mo_num + do q = 1, mo_num + do t = 1, mo_num + + tmp_bi_int_3(t,q,p) = 2d0*get_two_e_integral(t,q,p,u,mo_integrals_map) + + enddo + enddo + enddo + + do p= 1, mo_num + do q = 1, mo_num + do t = 1, mo_num + + tmp_2rdm_3(t,q,p) = two_e_dm_mo(t,p,q,u) + + enddo + enddo + enddo + + !$OMP CRITICAL + do q = 1, mo_num + do p = 1, mo_num + do t = 1, mo_num + + tmp_accu_shared(p,q) = tmp_accu_shared(p,q) & + - tmp_bi_int_3(t,q,p) * tmp_2rdm_3(t,q,p) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqpq(p,q) = tmp_h_pqpq(p,q) + tmp_accu_shared(p,q) + + enddo +enddo +!$OMP END DO + + + +! *Part 2 : q=r and p=s* + +! hessian(p,q,r,s) -> hessian(p,q,p,q) + +! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & +! - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & +! - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & +! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) +! = +! - get_two_e_integral(p,t,p,u,mo_integrals_map) * two_e_dm_mo(q,t,q,u) & +! - get_two_e_integral(t,p,p,u,mo_integrals_map) * two_e_dm_mo(t,q,q,u) & +! - get_two_e_integral(q,u,q,t,mo_integrals_map) * two_e_dm_mo(p,u,p,t) & +! - get_two_e_integral(q,u,t,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) +! = +! - get_two_e_integral(p,t,p,u,mo_integrals_map) * two_e_dm_mo(q,t,q,u) & +! - get_two_e_integral(q,t,q,u,mo_integrals_map) * two_e_dm_mo(p,t,p,u) & + +! - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(t,q,q,u) & +! - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(t,p,p,u) +! = +! - get_two_e_integral(t,p,u,p,mo_integrals_map) * two_e_dm_mo(t,q,u,q) & +! - get_two_e_integral(t,q,u,q,mo_integrals_map) * two_e_dm_mo(p,t,p,u) & + +! - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(q,u,t,q) & +! - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) + +! Arrays of the kind (t,p,u,p) can be transformed in 3D arrays. By doing +! so and using t,u as one variable, a matrix multiplication appears : +! $$c_{p,q} = \sum_{tu} a_{p,tu} b_{tu,q}$$ + + +!---------- +! Part 2.1 +!---------- +! - get_two_e_integral(t,p,u,p,mo_integrals_map) * two_e_dm_mo(t,q,u,q) & +! - get_two_e_integral(t,q,u,q,mo_integrals_map) * two_e_dm_mo(p,t,p,u) + +!$OMP DO +do q = 1, mo_num + do u = 1, mo_num + do t = 1, mo_num + + tmp_2rdm_3_shared(t,u,q) = two_e_dm_mo(t,q,u,q) + + enddo + enddo +enddo +!$OMP END DO + +!$OMP DO +do p = 1, mo_num + do u = 1, mo_num + do t = 1, mo_num + + tmp_bi_int_3_shared(t,u,p) = get_two_e_integral(t,p,u,p,mo_integrals_map) + + enddo + enddo +enddo +!$OMP END DO + +call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_bi_int_3_shared,& + mo_num*mo_num, tmp_2rdm_3_shared, mo_num*mo_num, 0d0, tmp_accu, mo_num) + +!$OMP DO +do p = 1, mo_num + do q = 1, mo_num + + tmp_h_pqqp(q,p) = tmp_h_pqqp(q,p) - tmp_accu(q,p) - tmp_accu(p,q) + + enddo +enddo +!$OMP END DO + + + +! Arrays of the kind (t,u,p,p) can be transformed in 3D arrays. By doing +! so and using t,u as one variable, a matrix multiplication appears : +! $$c_{p,q} = \sum_{tu} a_{p,tu} b_{tu,q}$$ + + +!-------- +! Part 2.2 +!-------- +! - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(q,u,t,q) & +! - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) + +!$OMP DO +do p = 1, mo_num + do u = 1, mo_num + do t = 1, mo_num + + tmp_bi_int_3_shared(t,u,p) = get_two_e_integral(t,u,p,p,mo_integrals_map) + + enddo + enddo +enddo +!$OMP END DO + +!$OMP DO +do q = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_shared(u,t,q) = two_e_dm_mo(q,u,t,q) + + enddo + enddo +enddo +!$OMP END DO + +call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_2rdm_3_shared,& + mo_num*mo_num, tmp_bi_int_3_shared, mo_num*mo_num, 0d0, tmp_accu, mo_num) + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) - tmp_accu(p,q) - tmp_accu(q,p) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6= t5-t4 +print*,'l3 2',t6 +!$OMP END MASTER + +!$OMP MASTER +CALL wall_TIME(t2) +t2 = t2 - t1 +print*, 'Time to compute the hessian :', t2 +!$OMP END MASTER + +! Deallocation of private arrays +! In the OMP section ! + +deallocate(tmp_2rdm_3,tmp_bi_int_3) +deallocate(tmp_accu) + +! Permutations +! As we mentioned before there are two permutation operator in the +! formula : +! Hessian(p,q,r,s) = P_pq P_rs [...] +! => Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + + +!!$OMP DO +!do p = 1, mo_num +! hessian(p,p,p,p) = hessian(p,p,p,p) + tmp_h_pppp(p) +!enddo +!!$OMP END DO + +!!$OMP DO +!do q = 1, mo_num +! do p = 1, mo_num +! hessian(p,q,p,q) = hessian(p,q,p,q) + tmp_h_pqpq(p,q) +! enddo +!enddo +!!$OMP END DO +! +!!$OMP DO +!do q = 1, mo_num +! do p = 1, mo_num +! hessian(p,q,q,p) = hessian(p,q,q,p) + tmp_h_pqqp(p,q) +! enddo +!enddo +!!$OMP END DO + +!!$OMP DO +!do s = 1, mo_num +! do r = 1, mo_num +! do q = 1, mo_num +! do p = 1, mo_num + +! h_tmpr(p,q,r,s) = (hessian(p,q,r,s) - hessian(q,p,r,s) - hessian(p,q,s,r) + hessian(q,p,s,r)) + +! enddo +! enddo +! enddo +!enddo +!!$OMP END DO + +! 4D -> 2D matrix +! We need a 2D matrix for the Newton method's. Since the Hessian is +! "antisymmetric" : $$H_{pq,rs} = -H_{rs,pq}$$ +! We can write it as a 2D matrix, N by N, with N = mo_num(mo_num-1)/2 +! with p 2D :',t6 +!!$OMP END MASTER + +!$OMP END PARALLEL +call omp_set_max_active_levels(4) + +! Display +!if (debug) then +! print*,'2D diag Hessian matrix' +! do pq = 1, n +! write(*,'(100(F10.5))') H(pq,:) +! enddo +!endif + +! Deallocation of shared arrays, end + + +!deallocate(hessian)!,h_tmpr) + deallocate(tmp_h_pppp,tmp_h_pqpq,tmp_h_pqqp) + deallocate(tmp_accu_1_shared, tmp_accu_shared) + + print*,'---diagonal_hessian' + +end subroutine diff --git a/src/mo_optimization/diagonalization_hessian.irp.f b/src/mo_optimization/diagonalization_hessian.irp.f new file mode 100644 index 00000000..e25879d9 --- /dev/null +++ b/src/mo_optimization/diagonalization_hessian.irp.f @@ -0,0 +1,136 @@ +! Diagonalization of the hessian + +! Just a matrix diagonalization using Lapack + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | +! | H(n,n) | double precision | hessian | + +! Output: +! | e_val(n) | double precision | eigenvalues of the hessian | +! | w(n,n) | double precision | eigenvectors of the hessian | + +! Internal: +! | nb_negative_nv | integer | number of negative eigenvalues | +! | lwork | integer | for Lapack | +! | work(lwork,n) | double precision | temporary array for Lapack | +! | info | integer | if 0 -> ok, else problem in the diagonalization | +! | i,j | integer | dummy indexes | + + +subroutine diagonalization_hessian(n,H,e_val,w) + + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: H(n,n) + + ! out + double precision, intent(out) :: e_val(n), w(n,n) + + ! internal + double precision, allocatable :: work(:,:) + integer, allocatable :: key(:) + integer :: info,lwork + integer :: i,j + integer :: nb_negative_vp + double precision :: t1,t2,t3,max_elem + + print*,'' + print*,'---Diagonalization_hessian---' + + call wall_time(t1) + + if (optimization_method == 'full') then + ! Allocation + ! For Lapack + lwork=3*n-1 + + allocate(work(lwork,n)) + + ! Calculation + + ! Copy the hessian matrix, the eigenvectors will be store in W + W=H + + ! Diagonalization of the hessian + call dsyev('V','U',n,W,size(W,1),e_val,work,lwork,info) + + if (info /= 0) then + print*, 'Error diagonalization : diagonalization_hessian' + print*, 'info = ', info + call ABORT + endif + + if (debug) then + print *, 'vp Hess:' + write(*,'(100(F10.5))') real(e_val(:)) + endif + + ! Number of negative eigenvalues + max_elem = 0d0 + nb_negative_vp = 0 + do i = 1, n + if (e_val(i) < 0d0) then + nb_negative_vp = nb_negative_vp + 1 + if (e_val(i) < max_elem) then + max_elem = e_val(i) + endif + !print*,'e_val < 0 :', e_val(i) + endif + enddo + print*,'Number of negative eigenvalues:', nb_negative_vp + print*,'Lowest eigenvalue:',max_elem + + !nb_negative_vp = 0 + !do i = 1, n + ! if (e_val(i) < -thresh_eig) then + ! nb_negative_vp = nb_negative_vp + 1 + ! endif + !enddo + !print*,'Number of negative eigenvalues <', -thresh_eig,':', nb_negative_vp + + ! Deallocation + deallocate(work) + + elseif (optimization_method == 'diag') then + ! Diagonalization of the diagonal hessian by hands + allocate(key(n)) + + do i = 1, n + e_val(i) = H(i,i) + enddo + + ! Key list for dsort + do i = 1, n + key(i) = i + enddo + + ! Sort of the eigenvalues + call dsort(e_val, key, n) + + ! Eigenvectors + W = 0d0 + do i = 1, n + j = key(i) + W(j,i) = 1d0 + enddo + + deallocate(key) + else + print*,'Diagonalization_hessian, abort' + call abort + endif + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in diagonalization_hessian:', t3 + + print*,'---End diagonalization_hessian---' + +end subroutine diff --git a/src/mo_optimization/first_diagonal_hessian_list_opt.irp.f b/src/mo_optimization/first_diagonal_hessian_list_opt.irp.f new file mode 100644 index 00000000..58536993 --- /dev/null +++ b/src/mo_optimization/first_diagonal_hessian_list_opt.irp.f @@ -0,0 +1,372 @@ +subroutine first_diag_hessian_list_opt(tmp_n,m,list,H)!, h_tmpr) + + include 'constants.h' + + implicit none + + !=========================================================================== + ! Compute the diagonal hessian of energy with respects to orbital rotations + !=========================================================================== + + !=========== + ! Variables + !=========== + + ! in + integer, intent(in) :: tmp_n, m, list(m) + ! tmp_n : integer, tmp_n = m*(m-1)/2 + + ! out + double precision, intent(out) :: H(tmp_n)!, h_tmpr(m,m,m,m) + ! H : n by n double precision matrix containing the 2D hessian + + ! internal + double precision, allocatable :: hessian(:,:,:,:), tmp(:,:),h_tmpr(:,:,:,:) + integer :: p,q, tmp_p,tmp_q + integer :: r,s,t,u,v,tmp_r,tmp_s,tmp_t,tmp_u,tmp_v + integer :: pq,rs,tmp_pq,tmp_rs + double precision :: t1,t2,t3 + ! hessian : mo_num 4D double precision matrix containing the hessian before the permutations + ! h_tmpr : mo_num 4D double precision matrix containing the hessian after the permutations + ! p,q,r,s : integer, indexes of the 4D hessian matrix + ! t,u,v : integer, indexes to compute hessian elements + ! pq,rs : integer, indexes for the conversion from 4D to 2D hessian matrix + ! t1,t2,t3 : double precision, t3 = t2 - t1, time to compute the hessian + + ! Function + double precision :: get_two_e_integral + ! get_two_e_integral : double precision function, two e integrals + + ! Provided : + ! mo_one_e_integrals : mono e- integrals + ! get_two_e_integral : two e- integrals + ! one_e_dm_mo_alpha, one_e_dm_mo_beta : one body density matrix + ! two_e_dm_mo : two body density matrix + + print*,'---first_diag_hess_list---' + + !============ + ! Allocation + !============ + + allocate(hessian(m,m,m,m),tmp(tmp_n,tmp_n),h_tmpr(mo_num,mo_num,mo_num,mo_num)) + + !============= + ! Calculation + !============= + + ! From Anderson et. al. (2014) + ! The Journal of Chemical Physics 141, 244104 (2014); doi: 10.1063/1.4904384 + + ! LaTeX formula : + + !\begin{align*} + !H_{pq,rs} &= \dfrac{\partial^2 E(x)}{\partial x_{pq}^2} \\ + !&= \mathcal{P}_{pq} \mathcal{P}_{rs} [ \frac{1}{2} \sum_u [\delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) + !+ \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_u^r)] + !-(h_p^s \gamma_r^q + h_r^q \gamma_p^s) \\ + !&+ \frac{1}{2} \sum_{tuv} [\delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} +v_{uv}^{st} \Gamma_{pt}^{uv}) + !+ \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt})] \\ + !&+ \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{ps}^{uv}) \\ + !&- \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) + !\end{align*} + + !================ + ! Initialization + !================ + hessian = 0d0 + + CALL wall_time(t1) + + !======================== + ! First line, first term + !======================== + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + if (q==r) then + do u = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & + + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + + enddo + endif + endif + + enddo + enddo + enddo + enddo + + !========================= + ! First line, second term + !========================= + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + if (p==s) then + do u = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + 0.5d0 * ( & + mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & + + mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) + enddo + endif + endif + + enddo + enddo + enddo + enddo + + !======================== + ! First line, third term + !======================== + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) & + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) + + endif + + enddo + enddo + enddo + enddo + + !========================= + ! Second line, first term + !========================= + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + if (q==r) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & + + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + + enddo + enddo + enddo + endif + endif + + enddo + enddo + enddo + enddo + + !========================== + ! Second line, second term + !========================== + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + if (p==s) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & + + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + + enddo + enddo + enddo + endif + endif + + enddo + enddo + enddo + enddo + + !======================== + ! Third line, first term + !======================== + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + do u = 1, mo_num + do v = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) & + + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & + + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + + enddo + enddo + endif + + enddo + enddo + enddo + enddo + + !========================= + ! Third line, second term + !========================= + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + do t = 1, mo_num + do u = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) & + - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & + - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & + - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & + - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + + enddo + enddo + + endif + + enddo + enddo + enddo + enddo + + CALL wall_time(t2) + t2 = t2 - t1 + print*, 'Time to compute the hessian :', t2 + + !============== + ! Permutations + !============== + + ! Convert the hessian mo_num * mo_num * mo_num * mo_num matrix in a + ! 2D n * n matrix (n = mo_num*(mo_num-1)/2) + ! H(pq,rs) : p 2D matrix + !======================== + + ! Convert the hessian mo_num * mo_num * mo_num * mo_num matrix in a + ! 2D n * n matrix (n = mo_num*(mo_num-1)/2) + ! H(pq,rs) : p 2D matrix + !======================== + + ! Convert the hessian mo_num * mo_num * mo_num * mo_num matrix in a + ! 2D n * n matrix (n = mo_num*(mo_num-1)/2) + ! H(pq,rs) : p Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + + do tmp_s = 1, m + do tmp_r = 1, m + do tmp_q = 1, m + do tmp_p = 1, m + + h_tmpr(tmp_p,tmp_q,tmp_r,tmp_s) = (hessian(tmp_p,tmp_q,tmp_r,tmp_s) - hessian(tmp_q,tmp_p,tmp_r,tmp_s) & + - hessian(tmp_p,tmp_q,tmp_s,tmp_r) + hessian(tmp_q,tmp_p,tmp_s,tmp_r)) + + enddo + enddo + enddo + enddo + + !======================== + ! 4D matrix to 2D matrix + !======================== + + ! Convert the hessian mo_num * mo_num * mo_num * mo_num matrix in a + ! 2D n * n matrix (n = mo_num*(mo_num-1)/2) + ! H(pq,rs) : p Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + + do s = 1, mo_num + do r = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + + h_tmpr(p,q,r,s) = (hessian(p,q,r,s) - hessian(q,p,r,s) - hessian(p,q,s,r) + hessian(q,p,s,r)) + + enddo + enddo + enddo + enddo + + !======================== + ! 4D matrix to 2D matrix + !======================== + + ! Convert the hessian mo_num * mo_num * mo_num * mo_num matrix in a + ! 2D n * n matrix (n = mo_num*(mo_num-1)/2) + ! H(pq,rs) : p vector, transformation +! In addition there is a permutation in the gradient formula : +! \begin{equation} +! P_{pq} = 1 - (p <-> q) +! \end{equation} + +! We need a vector to use the gradient. Here the gradient is a +! antisymetric matrix so we can transform it in a vector of length +! mo_num*(mo_num-1)/2. + +! Here we do these two things at the same time. + + +do i=1,n + call vec_to_mat_index(i,p,q) + v_grad(i)=(grad(p,q) - grad(q,p)) +enddo + +! Debug, diplay the vector containing the gradient elements +if (debug) then + print*,'Vector containing the gradient :' + write(*,'(100(F10.5))') v_grad(1:n) +endif + +! Norm of the gradient +! The norm can be useful. + +norm = dnrm2(n,v_grad,1) +print*, 'Gradient norm : ', norm + +! Maximum element in the gradient +! The maximum element in the gradient is very important for the +! convergence criterion of the Newton method. + + +! Max element of the gradient +max_elem = 0d0 +do i = 1, n + if (DABS(v_grad(i)) > DABS(max_elem)) then + max_elem = v_grad(i) + endif +enddo + +print*,'Max element in the gradient :', max_elem + +! Debug, display the matrix containting the gradient elements +if (debug) then + ! Matrix gradient + A = 0d0 + do q=1,m + do p=1,m + A(p,q) = grad(p,q) - grad(q,p) + enddo + enddo + print*,'Matrix containing the gradient :' + do i = 1, m + write(*,'(100(F10.5))') A(i,1:m) + enddo +endif + +! Deallocation of shared arrays and end + +deallocate(grad,A, tmp_mo_one_e_integrals,tmp_one_e_dm_mo) + +print*,'---End gradient---' + +end subroutine diff --git a/src/mo_optimization/gradient_opt.irp.f b/src/mo_optimization/gradient_opt.irp.f new file mode 100644 index 00000000..25be6b5a --- /dev/null +++ b/src/mo_optimization/gradient_opt.irp.f @@ -0,0 +1,346 @@ +! Gradient + +! The gradient of the CI energy with respects to the orbital rotation +! is: +! (C-c C-x C-l) +! $$ +! G(p,q) = \mathcal{P}_{pq} \left[ \sum_r (h_p^r \gamma_r^q - h_r^q \gamma_p^r) + +! \sum_{rst}(v_{pt}^{rs} \Gamma_{rs}^{qt} - v_{rs}^{qt} \Gamma_{pt}^{rs}) +! \right] +! $$ + + +! $$ +! \mathcal{P}_{pq}= 1 - (p \leftrightarrow q) +! $$ + +! $$ +! G(p,q) = \left[ +! \sum_r (h_p^r \gamma_r^q - h_r^q \gamma_p^r) + +! \sum_{rst}(v_{pt}^{rs} \Gamma_{rs}^{qt} - v_{rs}^{qt} \Gamma_{pt}^{rs}) +! \right] - +! \left[ +! \sum_r (h_q^r \gamma_r^p - h_r^p \gamma_q^r) + +! \sum_{rst}(v_{qt}^{rs} \Gamma_{rs}^{pt} - v_{rs}^{pt} +! \Gamma_{qt}^{rs}) +! \right] +! $$ + +! Where p,q,r,s,t are general spatial orbitals +! mo_num : the number of molecular orbitals +! $$h$$ : One electron integrals +! $$\gamma$$ : One body density matrix (state average in our case) +! $$v$$ : Two electron integrals +! $$\Gamma$$ : Two body density matrice (state average in our case) + +! The gradient is a mo_num by mo_num matrix, p,q,r,s,t take all the +! values between 1 and mo_num (1 and mo_num include). + +! To do that we compute $$G(p,q)$$ for all the pairs (p,q). + +! Source : +! Seniority-based coupled cluster theory +! J. Chem. Phys. 141, 244104 (2014); https://doi.org/10.1063/1.4904384 +! Thomas M. Henderson, Ireneusz W. Bulik, Tamar Stein, and Gustavo +! E. Scuseria + +! *Compute the gradient of energy with respects to orbital rotations* + +! Provided: +! | mo_num | integer | number of MOs | +! | mo_one_e_integrals(mo_num,mo_num) | double precision | mono_electronic integrals | +! | one_e_dm_mo(mo_num,mo_num) | double precision | one e- density matrix | +! | two_e_dm_mo(mo_num,mo_num,mo_num,mo_num) | double precision | two e- density matrix | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | + +! Output: +! | v_grad(n) | double precision | the gradient | +! | max_elem | double precision | maximum element of the gradient | + +! Internal: +! | grad(mo_num,mo_num) | double precison | gradient before the tranformation in a vector | +! | A((mo_num,mo_num) | doubre precision | gradient after the permutations | +! | norm | double precision | norm of the gradient | +! | p, q | integer | indexes of the element in the matrix grad | +! | i | integer | index for the tranformation in a vector | +! | r, s, t | integer | indexes dor the sums | +! | t1, t2, t3 | double precision | t3 = t2 - t1, time to compute the gradient | +! | t4, t5, t6 | double precission | t6 = t5 - t4, time to compute each element | +! | tmp_bi_int_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bi-electronic integrals | +! | tmp_2rdm_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the two e- density matrix | +! | tmp_accu(mo_num,mo_num) | double precision | temporary array | + +! Function: +! | get_two_e_integral | double precision | bi-electronic integrals | +! | dnrm2 | double precision | (Lapack) norm | + + +subroutine gradient_opt(n,v_grad,max_elem) + use omp_lib + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + + ! out + double precision, intent(out) :: v_grad(n), max_elem + + ! internal + double precision, allocatable :: grad(:,:),A(:,:) + double precision :: norm + integer :: i,p,q,r,s,t + double precision :: t1,t2,t3,t4,t5,t6 + + double precision, allocatable :: tmp_accu(:,:) + double precision, allocatable :: tmp_bi_int_3(:,:,:), tmp_2rdm_3(:,:,:) + + ! Functions + double precision :: get_two_e_integral, dnrm2 + + + print*,'' + print*,'---gradient---' + + ! Allocation of shared arrays + allocate(grad(mo_num,mo_num),A(mo_num,mo_num)) + + ! Initialization omp + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE( & + !$OMP p,q,r,s,t, & + !$OMP tmp_accu, tmp_bi_int_3, tmp_2rdm_3) & + !$OMP SHARED(grad, one_e_dm_mo, mo_num,mo_one_e_integrals, & + !$OMP mo_integrals_map,t4,t5,t6) & + !$OMP DEFAULT(SHARED) + + ! Allocation of private arrays + allocate(tmp_accu(mo_num,mo_num)) + allocate(tmp_bi_int_3(mo_num,mo_num,mo_num)) + allocate(tmp_2rdm_3(mo_num,mo_num,mo_num)) + +! Initialization + +!$OMP DO +do q = 1, mo_num + do p = 1,mo_num + grad(p,q) = 0d0 + enddo +enddo +!$OMP END DO + +! Term 1 + +! Without optimization the term 1 is : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! grad(p,q) = grad(p,q) & +! + mo_one_e_integrals(p,r) * one_e_dm_mo(r,q) & +! - mo_one_e_integrals(r,q) * one_e_dm_mo(p,r) +! enddo +! enddo +! enddo + +! Since the matrix multiplication A.B is defined like : +! \begin{equation} +! c_{ij} = \sum_k a_{ik}.b_{kj} +! \end{equation} +! The previous equation can be rewritten as a matrix multplication + + +!**************** +! Opt first term +!**************** + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +call dgemm('N','N',mo_num,mo_num,mo_num,1d0,mo_one_e_integrals,& +mo_num,one_e_dm_mo,mo_num,0d0,tmp_accu,mo_num) + +!$OMP DO +do q = 1, mo_num + do p = 1, mo_num + + grad(p,q) = grad(p,q) + (tmp_accu(p,q) - tmp_accu(q,p)) + + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6 = t5-t4 +print*,'Gradient, first term (s) :', t6 +!$OMP END MASTER + +! Term 2 + +! Without optimization the second term is : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num +! do t= 1, mo_num + +! grad(p,q) = grad(p,q) & +! + get_two_e_integral(p,t,r,s,mo_integrals_map) * two_e_dm_mo(r,s,q,t) & +! - get_two_e_integral(r,s,q,t,mo_integrals_map) * two_e_dm_mo(p,t,r,s) +! enddo +! enddo +! enddo +! enddo +! enddo + +! Using the bielectronic integral properties : +! get_two_e_integral(p,t,r,s,mo_integrals_map) = get_two_e_integral(r,s,p,t,mo_integrals_map) + +! Using the two body matrix properties : +! two_e_dm_mo(p,t,r,s) = two_e_dm_mo(r,s,p,t) + +! t is one the right, we can put it on the external loop and create 3 +! indexes temporary array +! r,s can be seen as one index + +! By doing so, a matrix multiplication appears + + +!***************** +! Opt second term +!***************** + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do t = 1, mo_num + + do p = 1, mo_num + do s = 1, mo_num + do r = 1, mo_num + + tmp_bi_int_3(r,s,p) = get_two_e_integral(r,s,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do q = 1, mo_num + do s = 1, mo_num + do r = 1, mo_num + + tmp_2rdm_3(r,s,q) = two_e_dm_mo(r,s,q,t) + + enddo + enddo + enddo + + call dgemm('T','N',mo_num,mo_num,mo_num*mo_num,1d0,tmp_bi_int_3,& + mo_num*mo_num,tmp_2rdm_3,mo_num*mo_num,0d0,tmp_accu,mo_num) + + !$OMP CRITICAL + do q = 1, mo_num + do p = 1, mo_num + + grad(p,q) = grad(p,q) + tmp_accu(p,q) - tmp_accu(q,p) + + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6 = t5-t4 +print*,'Gradient second term (s) : ', t6 +!$OMP END MASTER + +! Deallocation of private arrays + +deallocate(tmp_bi_int_3,tmp_2rdm_3,tmp_accu) + +!$OMP END PARALLEL + +call omp_set_max_active_levels(4) + +! Permutation, 2D matrix -> vector, transformation +! In addition there is a permutation in the gradient formula : +! \begin{equation} +! P_{pq} = 1 - (p <-> q) +! \end{equation} + +! We need a vector to use the gradient. Here the gradient is a +! antisymetric matrix so we can transform it in a vector of length +! mo_num*(mo_num-1)/2. + +! Here we do these two things at the same time. + + +do i=1,n + call vec_to_mat_index(i,p,q) + v_grad(i)=(grad(p,q) - grad(q,p)) +enddo + +! Debug, diplay the vector containing the gradient elements +if (debug) then + print*,'Vector containing the gradient :' + write(*,'(100(F10.5))') v_grad(1:n) +endif + +! Norm of the gradient +! The norm can be useful. + +norm = dnrm2(n,v_grad,1) +print*, 'Gradient norm : ', norm + +! Maximum element in the gradient +! The maximum element in the gradient is very important for the +! convergence criterion of the Newton method. + + +! Max element of the gradient +max_elem = 0d0 +do i = 1, n + if (ABS(v_grad(i)) > ABS(max_elem)) then + max_elem = v_grad(i) + endif +enddo + +print*,'Max element in the gradient :', max_elem + +! Debug, display the matrix containting the gradient elements +if (debug) then + ! Matrix gradient + A = 0d0 + do q=1,mo_num + do p=1,mo_num + A(p,q) = grad(p,q) - grad(q,p) + enddo + enddo + print*,'Matrix containing the gradient :' + do i = 1, mo_num + write(*,'(100(F10.5))') A(i,1:mo_num) + enddo +endif + +! Deallocation of shared arrays and end + +deallocate(grad,A) + +print*,'---End gradient---' + +end subroutine diff --git a/src/mo_optimization/hessian_list_opt.irp.f b/src/mo_optimization/hessian_list_opt.irp.f new file mode 100644 index 00000000..31af769a --- /dev/null +++ b/src/mo_optimization/hessian_list_opt.irp.f @@ -0,0 +1,1129 @@ +! Hessian + +! The hessian of the CI energy with respects to the orbital rotation is : +! (C-c C-x C-l) + +! \begin{align*} +! H_{pq,rs} &= \dfrac{\partial^2 E(x)}{\partial x_{pq}^2} \\ +! &= \mathcal{P}_{pq} \mathcal{P}_{rs} [ \frac{1}{2} \sum_u [\delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) +! + \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u)] +! -(h_p^s \gamma_r^q + h_r^q \gamma_p^s) \\ +! &+ \frac{1}{2} \sum_{tuv} [\delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) +! + \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt})] \\ +! &+ \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) +! - \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +! \end{align*} +! With pq a permutation operator : + +! \begin{align*} +! \mathcal{P}_{pq}= 1 - (p \leftrightarrow q) +! \end{align*} +! \begin{align*} +! \mathcal{P}_{pq} \mathcal{P}_{rs} &= (1 - (p \leftrightarrow q))(1 - (r \leftrightarrow s)) \\ +! &= 1 - (p \leftrightarrow q) - (r \leftrightarrow s) + (p \leftrightarrow q, r \leftrightarrow s) +! \end{align*} + +! Where p,q,r,s,t,u,v are general spatial orbitals +! mo_num : the number of molecular orbitals +! $$h$$ : One electron integrals +! $$\gamma$$ : One body density matrix (state average in our case) +! $$v$$ : Two electron integrals +! $$\Gamma$$ : Two body density matrice (state average in our case) + +! The hessian is a 4D matrix of size mo_num, p,q,r,s,t,u,v take all the +! values between 1 and mo_num (1 and mo_num include). + +! To do that we compute all the pairs (pq,rs) + +! Source : +! Seniority-based coupled cluster theory +! J. Chem. Phys. 141, 244104 (2014); https://doi.org/10.1063/1.4904384 +! Thomas M. Henderson, Ireneusz W. Bulik, Tamar Stein, and Gustavo E. Scuseria + +! *Compute the hessian of energy with respects to orbital rotations* + +! Provided: +! | mo_num | integer | number of MOs | +! | mo_one_e_integrals(mo_num,mo_num) | double precision | mono-electronic integrals | +! | one_e_dm_mo(mo_num,mo_num) | double precision | one e- density matrix (state average) | +! | two_e_dm_mo(mo_num,mo_num,mo_num) | double precision | two e- density matrix (state average) | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | + +! Output: +! | H(n,n) | double precision | Hessian matrix | +! | h_tmpr(mo_num,mo_num,mo_num,mo_num) | double precision | Complete hessian matrix before the tranformation | +! | | | in n by n matrix | + +! Internal: +! | hessian(mo_num,mo_num,mo_num,mo_num) | double precision | temporary array containing the hessian before | +! | | | the permutations | +! | p, q, r, s | integer | indexes of the hessian elements | +! | t, u, v | integer | indexes for the sums | +! | pq, rs | integer | indexes for the transformation of the hessian | +! | | | (4D -> 2D) | +! | t1,t2,t3 | double precision | t3 = t2 - t1, time to compute the hessian | +! | t4,t5,t6 | double precision | t6 = t5 - t4, time to compute each element | +! | tmp_bi_int_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals | +! | tmp_2rdm_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix | +! | ind_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for matrix multiplication | +! | tmp_accu(mo_num,mo_num) | double precision | temporary array | +! | tmp_accu_sym(mo_num,mo_num) | double precision | temporary array | + +! Function: +! | get_two_e_integral | double precision | bielectronic integrals | + + +subroutine hessian_list_opt(n,m,list,H,h_tmpr) + use omp_lib + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n,m,list(m) + + ! out + double precision, intent(out) :: H(n,n),h_tmpr(m,m,m,m) + + ! internal + double precision, allocatable :: hessian(:,:,:,:)!, h_tmpr(:,:,:,:) + double precision, allocatable :: H_test(:,:) + integer :: p,q,tmp_p,tmp_q,tmp_r,tmp_s + integer :: r,s,t,u,v,k + integer :: pq,rs + double precision :: t1,t2,t3,t4,t5,t6 + ! H_test : monum**2 by mo_num**2 double precision matrix to debug the H matrix + + double precision, allocatable :: tmp_bi_int_3(:,:,:), tmp_2rdm_3(:,:,:), ind_3(:,:,:),ind_3_3(:,:,:) + double precision, allocatable :: tmp_bi_int_3_3(:,:,:), tmp_2rdm_3_3(:,:,:) + double precision, allocatable :: tmp_accu(:,:), tmp_accu_sym(:,:),tmp_one_e_dm_mo(:,:),tmp_mo_one_e_integrals(:,:) + + ! Function + double precision :: get_two_e_integral + + print*,'' + print*,'---hessian---' + print*,'Use the full hessian' + + ! Allocation of shared arrays + allocate(hessian(m,m,m,m),tmp_one_e_dm_mo(mo_num,m),tmp_mo_one_e_integrals(mo_num,m)) + + ! Calculations + + ! OMP + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE( & + !$OMP tmp_p,tmp_q,tmp_r,tmp_s,p,q,r,s, tmp_accu, tmp_accu_sym, & + !$OMP u,v,t, tmp_bi_int_3, tmp_2rdm_3, ind_3, tmp_bi_int_3_3,tmp_2rdm_3_3, ind_3_3 ) & + !$OMP SHARED(m,list,hessian,h_tmpr,H, mo_num,n, & + !$OMP mo_one_e_integrals, one_e_dm_mo, & + !$OMP two_e_dm_mo,mo_integrals_map, & + !$OMP t1,t2,t3,t4,t5,t6,& + !$OMP tmp_mo_one_e_integrals,tmp_one_e_dm_mo)& + !$OMP DEFAULT(NONE) + + ! Allocation of private arrays + allocate(tmp_bi_int_3(mo_num,mo_num,mo_num)) + allocate(tmp_2rdm_3(mo_num,mo_num,mo_num), ind_3(mo_num,mo_num,mo_num)) + allocate(tmp_accu(m,m), tmp_accu_sym(mo_num,mo_num)) + +! Initialization of the arrays + +!$OMP MASTER +do tmp_q = 1, m + do tmp_p = 1, m + tmp_accu(tmp_p,tmp_q) = 0d0 + enddo +enddo +!$OMP END MASTER + +!$OMP MASTER +do tmp_q = 1, m + do tmp_p = 1, m + tmp_accu_sym(tmp_p,tmp_q) = 0d0 + enddo +enddo +!$OMP END MASTER + +!$OMP DO +do tmp_s = 1, m + do tmp_r = 1, m + do tmp_q = 1, m + do tmp_p = 1, m + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = 0d0 + enddo + enddo + enddo +enddo +!$OMP ENDDO + +!$OMP MASTER +CALL wall_TIME(t1) +!$OMP END MASTER + +! Line 1, term 1 + +! Without optimization the term 1 of the line 1 is : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! if (q==r) then +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & +! + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + +! enddo +! endif + +! enddo +! enddo +! enddo +! enddo + +! We can write the formula as matrix multiplication. +! $$c_{p,s} = \sum_u a_{p,u} b_{u,s}$$ + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + tmp_mo_one_e_integrals(u,tmp_p) = mo_one_e_integrals(u,p) + enddo +enddo +!$OMP END DO + +!$OMP DO +do tmp_s = 1, m + s = list(tmp_s) + do u = 1, mo_num + tmp_one_e_dm_mo(u,tmp_s) = one_e_dm_mo(u,s) + enddo +enddo +!$OMP END DO + + +call dgemm('T','N', m, m, mo_num, 1d0, tmp_mo_one_e_integrals,& + size(tmp_mo_one_e_integrals,1), tmp_one_e_dm_mo, size(tmp_one_e_dm_mo,1),& + 0d0, tmp_accu, size(tmp_accu,1)) + +!$OMP DO +do tmp_s = 1, m + do tmp_p = 1, m + + tmp_accu_sym(tmp_p,tmp_s) = 0.5d0 * (tmp_accu(tmp_p,tmp_s) + tmp_accu(tmp_s,tmp_p)) + + enddo +enddo +!$OMP END DO + +!$OMP DO +do tmp_s = 1, m + do tmp_p = 1, m + do tmp_r = 1, m + + hessian(tmp_p,tmp_r,tmp_r,tmp_s) = hessian(tmp_p,tmp_r,tmp_r,tmp_s) + tmp_accu_sym(tmp_p,tmp_s) + + enddo + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6=t5-t4 +print*,'l1 1',t6 +!$OMP END MASTER + +! Line 1, term 2 +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! if (p==s) then +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! mo_one_e_integrals(u,r) * (one_e_dm_mo(u,q) & +! + mo_one_e_integrals(q,u) * (one_e_dm_mo(r,u)) +! enddo +! endif + +! enddo +! enddo +! enddo +! enddo + +! We can write the formula as matrix multiplication. +! $$c_{r,q} = \sum_u a_{r,u} b_{u,q}$$ + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +call dgemm('T','N', m, m, mo_num, 1d0, tmp_mo_one_e_integrals,& + size(tmp_mo_one_e_integrals,1), tmp_one_e_dm_mo, size(tmp_one_e_dm_mo,1),& + 0d0, tmp_accu, size(tmp_accu,1)) + +!$OMP DO +do tmp_r = 1, m + do tmp_q = 1, m + + tmp_accu_sym(tmp_q,tmp_r) = 0.5d0 * (tmp_accu(tmp_q,tmp_r) + tmp_accu(tmp_r,tmp_q)) + + enddo +enddo +!OMP END DO + +!$OMP DO +do tmp_r = 1, m + do tmp_q = 1, m + do tmp_s = 1, m + + hessian(tmp_s,tmp_q,tmp_r,tmp_s) = hessian(tmp_s,tmp_q,tmp_r,tmp_s) + tmp_accu_sym(tmp_q,tmp_r) + + enddo + enddo +enddo +!OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6=t5-t4 +print*,'l1 2',t6 +!$OMP END MASTER + +! Line 1, term 3 + +! Without optimization the third term is : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & +! - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s)) + +! enddo +! enddo +! enddo +! enddo + +! We can just re-order the indexes + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do tmp_s = 1, m + s = list(tmp_s) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_p = 1, m + p = list(tmp_p) + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) & + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q)& + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) + + enddo + enddo + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6=t5-t4 +print*,'l1 3',t6 +!$OMP END MASTER + +! Line 2, term 1 + +! Without optimization the fourth term is : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! if (q==r) then +! do t = 1, mo_num +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & +! + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + +! enddo +! enddo +! enddo +! endif + +! enddo +! enddo +! enddo +! enddo + +! Using bielectronic integral properties : +! get_two_e_integral(s,t,u,v,mo_integrals_map) = +! get_two_e_integral(u,v,s,t,mo_integrals_map) + +! Using the two electron density matrix properties : +! two_e_dm_mo(p,t,u,v) = two_e_dm_mo(u,v,p,t) + +! With t on the external loop, using temporary arrays for each t and by +! taking u,v as one variable a matrix multplication appears. +! $$c_{p,s} = \sum_{uv} a_{p,uv} b_{uv,s}$$ + +! There is a kroenecker delta $$\delta_{qr}$$, so we juste compute the +! terms like : hessian(p,r,r,s) + + +!$OMP MASTER +call wall_TIME(t4) +!$OMP END MASTER + +allocate(tmp_bi_int_3_3(mo_num,mo_num,m), tmp_2rdm_3_3(mo_num,mo_num,m)) + +!$OMP DO +do t = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,v,tmp_p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m ! error, the p might be replace by a s + ! it's a temporary array, the result by replacing p and s will be the same + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,v,tmp_p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1.d0, & + tmp_bi_int_3_3, mo_num*mo_num, tmp_2rdm_3_3, mo_num*mo_num, & + 0.d0, tmp_accu, size(tmp_accu,1)) + + do tmp_p = 1, m + do tmp_s = 1, m + + tmp_accu_sym(tmp_s,tmp_p) = 0.5d0 * (tmp_accu(tmp_p,tmp_s)+tmp_accu(tmp_s,tmp_p)) + + enddo + enddo + + !$OMP CRITICAL + do tmp_s = 1, m + do tmp_r = 1, m + do tmp_p = 1, m + + hessian(tmp_p,tmp_r,tmp_r,tmp_s) = hessian(tmp_p,tmp_r,tmp_r,tmp_s) + tmp_accu_sym(tmp_p,tmp_s) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP MASTER +call wall_TIME(t5) +t6=t5-t4 +print*,'l2 1', t6 +!$OMP END MASTER + +! Line 2, term 2 + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! if (p==s) then +! do t = 1, mo_num +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & +! + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + +! enddo +! enddo +! enddo +! endif + +! enddo +! enddo +! enddo +! enddo + +! Using the two electron density matrix properties : +! get_two_e_integral(q,t,u,v,mo_integrals_map) = +! get_two_e_integral(u,v,q,t,mo_integrals_map) + +! Using the two electron density matrix properties : +! two_e_dm_mo(r,t,u,v) = two_e_dm_mo(u,v,r,t) + +! With t on the external loop, using temporary arrays for each t and by +! taking u,v as one variable a matrix multplication appears. +! $$c_{q,r} = \sum_uv a_{q,uv} b_{uv,r}$$ + +! There is a kroenecker delta $$\delta_{ps}$$, so we juste compute the +! terms like : hessian(s,q,r,s) + + +!****************************** +! Opt Second line, second term +!****************************** + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + + + +!$OMP DO +do t = 1, mo_num + + do tmp_q = 1, m + q = list(tmp_q) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,v,tmp_q) = get_two_e_integral(u,v,q,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_r = 1, m + r = list(tmp_r) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,v,tmp_r) = two_e_dm_mo(u,v,r,t) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1.d0, & + tmp_bi_int_3_3 , mo_num*mo_num, tmp_2rdm_3_3, mo_num*mo_num, & + 0.d0, tmp_accu, size(tmp_accu,1)) + + do tmp_r = 1, m + do tmp_q = 1, m + + tmp_accu_sym(tmp_q,tmp_r) = 0.5d0 * (tmp_accu(tmp_q,tmp_r) + tmp_accu(tmp_r,tmp_q)) + + enddo + enddo + + !$OMP CRITICAL + do tmp_r = 1, m + do tmp_q = 1, m + do tmp_s = 1, m + + hessian(tmp_s,tmp_q,tmp_r,tmp_s) = hessian(tmp_s,tmp_q,tmp_r,tmp_s) + tmp_accu_sym(tmp_q,tmp_r) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3) + +!$OMP MASTER +CALL wall_TIME(t5) +t6=t5-t4 +print*,'l2 2',t6 +!$OMP END MASTER + +! Line 3, term 1 + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & +! + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + +! enddo +! enddo + +! enddo +! enddo +! enddo +! enddo + +! Using the two electron density matrix properties : +! get_two_e_integral(u,v,p,r,mo_integrals_map) = +! get_two_e_integral(p,r,u,v,mo_integrals_map) + +! Using the two electron density matrix properties : +! two_e_dm_mo(u,v,q,s) = two_e_dm_mo(q,s,u,v) + +! With v on the external loop, using temporary arrays for each v and by +! taking p,r and q,s as one dimension a matrix multplication +! appears. $$c_{pr,qs} = \sum_u a_{pr,u} b_{u,qs}$$ + +! Part 1 + +!$OMP MASTER +call wall_TIME(t4) +!$OMP END MASTER + +!-------- +! part 1 +! get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) +!-------- + +allocate(tmp_bi_int_3_3(m,m,mo_num), tmp_2rdm_3_3(mo_num,m,m),ind_3_3(m,m,m)) + +!$OMP DO +do v = 1, mo_num + + do u = 1, mo_num + do tmp_r = 1, m + r = list(tmp_r) + do tmp_p = 1, m + p = list(tmp_p) + + tmp_bi_int_3_3(tmp_p,tmp_r,u) = get_two_e_integral(p,r,u,v,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_s = 1, m + s = list(tmp_s) + do tmp_q = 1, m + q = list(tmp_q) + do u = 1, mo_num + + tmp_2rdm_3_3(u,tmp_q,tmp_s) = two_e_dm_mo(q,s,u,v) + + enddo + enddo + enddo + + do tmp_s = 1, m + + call dgemm('N','N',m*m, m, mo_num, 1d0, tmp_bi_int_3_3,& + size(tmp_bi_int_3_3,1)*size(tmp_bi_int_3_3,2), tmp_2rdm_3_3(1,1,tmp_s),& + mo_num, 0d0, ind_3_3, size(ind_3_3,1) * size(ind_3_3,2)) + + !$OMP CRITICAL + do tmp_r = 1, m + do tmp_q = 1, m + do tmp_p = 1, m + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + ind_3_3(tmp_p,tmp_r,tmp_q) + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + +deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3,ind_3_3) + + + +! With v on the external loop, using temporary arrays for each v and by +! taking q,s and p,r as one dimension a matrix multplication +! appears. $$c_{qs,pr} = \sum_u a_{qs,u}*b_{u,pr}$$ + +! Part 2 + +!-------- +! part 2 +! get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) +!-------- + +allocate(tmp_bi_int_3_3(m,m,mo_num), tmp_2rdm_3_3(mo_num,m,m),ind_3_3(m,m,m)) + +!$OMP DO +do v = 1, mo_num + + do u = 1, mo_num + do tmp_s = 1, m + s = list(tmp_s) + do tmp_q = 1, m + q = list(tmp_q) + + tmp_bi_int_3_3(tmp_q,tmp_s,u) = get_two_e_integral(q,s,u,v,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_r = 1, m + r = list(tmp_r) + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + + tmp_2rdm_3_3(u,tmp_p,tmp_r) = two_e_dm_mo(p,r,u,v) + + enddo + enddo + enddo + + do tmp_r = 1, m + + call dgemm('N','N', m*m, m, mo_num, 1d0, tmp_bi_int_3_3,& + size(tmp_bi_int_3_3,1)*size(tmp_bi_int_3_3,2), tmp_2rdm_3_3(1,1,tmp_r),& + mo_num, 0d0, ind_3_3, size(ind_3_3,1) * size(ind_3_3,2)) + + !$OMP CRITICAL + do tmp_s = 1, m + s = list(tmp_s) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_p = 1, m + p = list(tmp_p) + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + ind_3_3(tmp_q,tmp_s,tmp_p) + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + +deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3,ind_3_3) + +!$OMP MASTER +call wall_TIME(t5) +t6 = t5 - t4 +print*,'l3 1', t6 +!$OMP END MASTER + +! Line 3, term 2 + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! do t = 1, mo_num +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & +! - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & +! - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & +! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + +! enddo +! enddo + +! enddo +! enddo +! enddo +! enddo + +! With q on the external loop, using temporary arrays for each p and q, +! and taking u,v as one variable, a matrix multiplication appears: +! $$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +! Part 1 + +!-------- +! Part 1 +! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) +!-------- + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +allocate(tmp_bi_int_3_3(mo_num,mo_num,m), tmp_2rdm_3_3(mo_num,mo_num,m)) + +!$OMP DO +do tmp_q = 1, m + q = list(tmp_q) + + do tmp_r = 1, m + r = list(tmp_r) + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,t,tmp_r) = two_e_dm_mo(q,u,r,t) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + + do tmp_s = 1, m + s = list(tmp_s) + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,t,tmp_s) = - get_two_e_integral(u,s,t,p,mo_integrals_map) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_bi_int_3_3,& + mo_num*mo_num, tmp_2rdm_3_3, mo_num*mo_num, 0d0, tmp_accu, m) + + !$OMP CRITICAL + do tmp_s = 1, m + do tmp_r = 1, m + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + tmp_accu(tmp_s,tmp_r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + +deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3) + + + +! With q on the external loop, using temporary arrays for each p and q, +! and taking u,v as one variable, a matrix multiplication appears: +! $$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +! Part 2 + +!-------- +! Part 2 +!- get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) +!-------- + +allocate(tmp_bi_int_3_3(mo_num,mo_num,m), tmp_2rdm_3_3(mo_num,mo_num,m)) + +!$OMP DO +do tmp_q = 1, m + q = list(tmp_q) + + do tmp_r = 1, m + r = list(tmp_r) + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,t,tmp_r) = two_e_dm_mo(q,u,t,r) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + + do tmp_s = 1, m + s = list(tmp_s) + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,t,tmp_s) = - get_two_e_integral(u,t,s,p,mo_integrals_map) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_bi_int_3_3,& + mo_num*mo_num, tmp_2rdm_3_3, mo_num*mo_num, 0d0, tmp_accu, m) + + !$OMP CRITICAL + do tmp_s = 1, m + do tmp_r = 1, m + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + tmp_accu(tmp_s,tmp_r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + +deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3) + + + +! With q on the external loop, using temporary arrays for each p and q, +! and taking u,v as one variable, a matrix multiplication appears: +! $$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +! Part 3 + +!-------- +! Part 3 +!- get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) +!-------- + +allocate(tmp_bi_int_3_3(mo_num,mo_num,m), tmp_2rdm_3_3(mo_num,mo_num,m)) + +!$OMP DO +do tmp_q = 1, m + q = list(tmp_q) + + do tmp_r = 1, m + r = list(tmp_r) + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,t,tmp_r) = - get_two_e_integral(u,q,t,r,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + + do tmp_s = 1, m + s = list(tmp_s) + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,t,tmp_s) = two_e_dm_mo(p,u,s,t) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_2rdm_3_3,& + mo_num*mo_num, tmp_bi_int_3_3, mo_num*mo_num, 0d0, tmp_accu, m) + + !$OMP CRITICAL + do tmp_s = 1, m + do tmp_r = 1, m + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + tmp_accu(tmp_s,tmp_r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + +deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3) + + + +! With q on the external loop, using temporary arrays for each p and q, +! and taking u,v as one variable, a matrix multiplication appears: +! $$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +! Part 4 + +!-------- +! Part 4 +! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) +!-------- + +allocate(tmp_bi_int_3_3(mo_num,mo_num,m), tmp_2rdm_3_3(mo_num,mo_num,m)) + +!$OMP DO +do tmp_q = 1, m + q = list(tmp_q) + + do tmp_r = 1, m + r = list(tmp_r) + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,t,tmp_r) = - get_two_e_integral(u,t,r,q,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + + do tmp_s = 1, m + s = list(tmp_s) + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,t,tmp_s) = two_e_dm_mo(p,u,t,s) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_2rdm_3_3,& + mo_num*mo_num, tmp_bi_int_3_3, mo_num*mo_num, 0d0, tmp_accu, m) + + !$OMP CRITICAL + do tmp_s = 1, m + do tmp_r = 1, m + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + tmp_accu(tmp_s,tmp_r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + +deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3) + +!$OMP MASTER +call wall_TIME(t5) +t6 = t5-t4 +print*,'l3 2',t6 +!$OMP END MASTER + +!$OMP MASTER +CALL wall_TIME(t2) +t3 = t2 -t1 +print*,'Time to compute the hessian : ', t3 +!$OMP END MASTER + +! Deallocation of private arrays +! In the omp section ! + +deallocate(tmp_bi_int_3, tmp_2rdm_3, tmp_accu, tmp_accu_sym, ind_3) + +! Permutations +! As we mentioned before there are two permutation operator in the +! formula : +! Hessian(p,q,r,s) = P_pq P_rs [...] +! => Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do s = 1, m + do r = 1, m + do q = 1, m + do p = 1, m + + h_tmpr(p,q,r,s) = (hessian(p,q,r,s) - hessian(q,p,r,s) - hessian(p,q,s,r) + hessian(q,p,s,r)) + + enddo + enddo + enddo +enddo +!$OMP END DO + +!$OMP MASTER +call wall_TIME(t5) +t6 = t5-t4 +print*,'Time for permutations :',t6 +!$OMP END MASTER + +! 4D -> 2D matrix +! We need a 2D matrix for the Newton method's. Since the Hessian is +! "antisymmetric" : $$H_{pq,rs} = -H_{rs,pq}$$ +! We can write it as a 2D matrix, N by N, with N = mo_num(mo_num-1)/2 +! with p 2D :',t6 +!$OMP END MASTER + +!$OMP END PARALLEL +call omp_set_max_active_levels(4) + +! Display +if (debug) then + print*,'2D Hessian matrix' + do pq = 1, n + write(*,'(100(F10.5))') H(pq,:) + enddo +endif + +! Deallocation of shared arrays, end + +deallocate(hessian,tmp_one_e_dm_mo,tmp_mo_one_e_integrals)!,h_tmpr) +! h_tmpr is intent out in order to debug the subroutine +! It's why we don't deallocate it + + print*,'---End hessian---' + +end subroutine diff --git a/src/mo_optimization/hessian_opt.irp.f b/src/mo_optimization/hessian_opt.irp.f new file mode 100644 index 00000000..0b4312c6 --- /dev/null +++ b/src/mo_optimization/hessian_opt.irp.f @@ -0,0 +1,1043 @@ +! Hessian + +! The hessian of the CI energy with respects to the orbital rotation is : +! (C-c C-x C-l) + +! \begin{align*} +! H_{pq,rs} &= \dfrac{\partial^2 E(x)}{\partial x_{pq}^2} \\ +! &= \mathcal{P}_{pq} \mathcal{P}_{rs} [ \frac{1}{2} \sum_u [\delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) +! + \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u)] +! -(h_p^s \gamma_r^q + h_r^q \gamma_p^s) \\ +! &+ \frac{1}{2} \sum_{tuv} [\delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) +! + \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt})] \\ +! &+ \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) +! - \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +! \end{align*} +! With pq a permutation operator : + +! \begin{align*} +! \mathcal{P}_{pq}= 1 - (p \leftrightarrow q) +! \end{align*} +! \begin{align*} +! \mathcal{P}_{pq} \mathcal{P}_{rs} &= (1 - (p \leftrightarrow q))(1 - (r \leftrightarrow s)) \\ +! &= 1 - (p \leftrightarrow q) - (r \leftrightarrow s) + (p \leftrightarrow q, r \leftrightarrow s) +! \end{align*} + +! Where p,q,r,s,t,u,v are general spatial orbitals +! mo_num : the number of molecular orbitals +! $$h$$ : One electron integrals +! $$\gamma$$ : One body density matrix (state average in our case) +! $$v$$ : Two electron integrals +! $$\Gamma$$ : Two body density matrice (state average in our case) + +! The hessian is a 4D matrix of size mo_num, p,q,r,s,t,u,v take all the +! values between 1 and mo_num (1 and mo_num include). + +! To do that we compute all the pairs (pq,rs) + +! Source : +! Seniority-based coupled cluster theory +! J. Chem. Phys. 141, 244104 (2014); https://doi.org/10.1063/1.4904384 +! Thomas M. Henderson, Ireneusz W. Bulik, Tamar Stein, and Gustavo E. Scuseria + +! *Compute the hessian of energy with respects to orbital rotations* + +! Provided: +! | mo_num | integer | number of MOs | +! | mo_one_e_integrals(mo_num,mo_num) | double precision | mono-electronic integrals | +! | one_e_dm_mo(mo_num,mo_num) | double precision | one e- density matrix (state average) | +! | two_e_dm_mo(mo_num,mo_num,mo_num) | double precision | two e- density matrix (state average) | + +! Input: +! | n | integer | mo_num*(mo_num-1)/2 | + +! Output: +! | H(n,n) | double precision | Hessian matrix | +! | h_tmpr(mo_num,mo_num,mo_num,mo_num) | double precision | Complete hessian matrix before the tranformation | +! | | | in n by n matrix | + +! Internal: +! | hessian(mo_num,mo_num,mo_num,mo_num) | double precision | temporary array containing the hessian before | +! | | | the permutations | +! | p, q, r, s | integer | indexes of the hessian elements | +! | t, u, v | integer | indexes for the sums | +! | pq, rs | integer | indexes for the transformation of the hessian | +! | | | (4D -> 2D) | +! | t1,t2,t3 | double precision | t3 = t2 - t1, time to compute the hessian | +! | t4,t5,t6 | double precision | t6 = t5 - t4, time to compute each element | +! | tmp_bi_int_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals | +! | tmp_2rdm_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix | +! | ind_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for matrix multiplication | +! | tmp_accu(mo_num,mo_num) | double precision | temporary array | +! | tmp_accu_sym(mo_num,mo_num) | double precision | temporary array | + +! Function: +! | get_two_e_integral | double precision | bielectronic integrals | + + +subroutine hessian_opt(n,H,h_tmpr) + use omp_lib + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + + ! out + double precision, intent(out) :: H(n,n),h_tmpr(mo_num,mo_num,mo_num,mo_num) + + ! internal + double precision, allocatable :: hessian(:,:,:,:)!, h_tmpr(:,:,:,:) + double precision, allocatable :: H_test(:,:) + integer :: p,q + integer :: r,s,t,u,v,k + integer :: pq,rs + double precision :: t1,t2,t3,t4,t5,t6 + ! H_test : monum**2 by mo_num**2 double precision matrix to debug the H matrix + + double precision, allocatable :: tmp_bi_int_3(:,:,:), tmp_2rdm_3(:,:,:), ind_3(:,:,:) + double precision, allocatable :: tmp_accu(:,:), tmp_accu_sym(:,:), tmp_accu_shared(:,:),tmp_accu_sym_shared(:,:) + + ! Function + double precision :: get_two_e_integral + + print*,'' + print*,'---hessian---' + print*,'Use the full hessian' + + ! Allocation of shared arrays + allocate(hessian(mo_num,mo_num,mo_num,mo_num))!,h_tmpr(mo_num,mo_num,mo_num,mo_num)) + allocate(tmp_accu_shared(mo_num,mo_num),tmp_accu_sym_shared(mo_num,mo_num)) + + ! Calculations + + ! OMP + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE( & + !$OMP p,q,r,s, tmp_accu, tmp_accu_sym, & + !$OMP u,v,t, tmp_bi_int_3, tmp_2rdm_3, ind_3) & + !$OMP SHARED(hessian,h_tmpr,H, mo_num,n, & + !$OMP mo_one_e_integrals, one_e_dm_mo, & + !$OMP two_e_dm_mo,mo_integrals_map,tmp_accu_sym_shared, tmp_accu_shared, & + !$OMP t1,t2,t3,t4,t5,t6)& + !$OMP DEFAULT(NONE) + + ! Allocation of private arrays + allocate(tmp_bi_int_3(mo_num,mo_num,mo_num)) + allocate(tmp_2rdm_3(mo_num,mo_num,mo_num), ind_3(mo_num,mo_num,mo_num)) + allocate(tmp_accu(mo_num,mo_num), tmp_accu_sym(mo_num,mo_num)) + +! Initialization of the arrays + +!$OMP MASTER +do q = 1, mo_num + do p = 1, mo_num + tmp_accu_shared(p,q) = 0d0 + enddo +enddo +!$OMP END MASTER + +!$OMP MASTER +do q = 1, mo_num + do p = 1, mo_num + tmp_accu_sym(p,q) = 0d0 + enddo +enddo +!$OMP END MASTER + +!$OMP DO +do s=1,mo_num + do r=1,mo_num + do q=1,mo_num + do p=1,mo_num + hessian(p,q,r,s) = 0d0 + enddo + enddo + enddo +enddo +!$OMP ENDDO + +!$OMP MASTER +CALL wall_TIME(t1) +!$OMP END MASTER + +! Line 1, term 1 + +! Without optimization the term 1 of the line 1 is : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! if (q==r) then +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & +! + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + +! enddo +! endif + +! enddo +! enddo +! enddo +! enddo + +! We can write the formula as matrix multiplication. +! $$c_{p,s} = \sum_u a_{p,u} b_{u,s}$$ + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +call dgemm('T','N', mo_num, mo_num, mo_num, 1d0, mo_one_e_integrals,& + size(mo_one_e_integrals,1), one_e_dm_mo, size(one_e_dm_mo,1),& + 0d0, tmp_accu_shared, size(tmp_accu_shared,1)) + +!$OMP DO +do s = 1, mo_num + do p = 1, mo_num + + tmp_accu_sym_shared(p,s) = 0.5d0 * (tmp_accu_shared(p,s) + tmp_accu_shared(s,p)) + + enddo +enddo +!$OMP END DO + +!$OMP DO +do s = 1, mo_num + do p = 1, mo_num + do r = 1, mo_num + + hessian(p,r,r,s) = hessian(p,r,r,s) + tmp_accu_sym_shared(p,s) + + enddo + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6=t5-t4 +print*,'l1 1',t6 +!$OMP END MASTER + +! Line 1, term 2 +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! if (p==s) then +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! mo_one_e_integrals(u,r) * (one_e_dm_mo(u,q) & +! + mo_one_e_integrals(q,u) * (one_e_dm_mo(r,u)) +! enddo +! endif + +! enddo +! enddo +! enddo +! enddo + +! We can write the formula as matrix multiplication. +! $$c_{r,q} = \sum_u a_{r,u} b_{u,q}$$ + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +call dgemm('T','N', mo_num, mo_num, mo_num, 1d0, mo_one_e_integrals,& + size(mo_one_e_integrals,1), one_e_dm_mo, size(one_e_dm_mo,1),& + 0d0, tmp_accu_shared, size(tmp_accu_shared,1)) + +!$OMP DO +do r = 1, mo_num + do q = 1, mo_num + + tmp_accu_sym_shared(q,r) = 0.5d0 * (tmp_accu_shared(q,r) + tmp_accu_shared(r,q)) + + enddo +enddo +!OMP END DO + +!$OMP DO +do r = 1, mo_num + do q = 1, mo_num + do s = 1, mo_num + + hessian(s,q,r,s) = hessian(s,q,r,s) + tmp_accu_sym_shared(q,r) + + enddo + enddo +enddo +!OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6=t5-t4 +print*,'l1 2',t6 +!$OMP END MASTER + +! Line 1, term 3 + +! Without optimization the third term is : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & +! - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s)) + +! enddo +! enddo +! enddo +! enddo + +! We can just re-order the indexes + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do s = 1, mo_num + do r = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q)& + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) + + enddo + enddo + enddo +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6=t5-t4 +print*,'l1 3',t6 +!$OMP END MASTER + +! Line 2, term 1 + +! Without optimization the fourth term is : + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! if (q==r) then +! do t = 1, mo_num +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & +! + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + +! enddo +! enddo +! enddo +! endif + +! enddo +! enddo +! enddo +! enddo + +! Using bielectronic integral properties : +! get_two_e_integral(s,t,u,v,mo_integrals_map) = +! get_two_e_integral(u,v,s,t,mo_integrals_map) + +! Using the two electron density matrix properties : +! two_e_dm_mo(p,t,u,v) = two_e_dm_mo(u,v,p,t) + +! With t on the external loop, using temporary arrays for each t and by +! taking u,v as one variable a matrix multplication appears. +! $$c_{p,s} = \sum_{uv} a_{p,uv} b_{uv,s}$$ + +! There is a kroenecker delta $$\delta_{qr}$$, so we juste compute the +! terms like : hessian(p,r,r,s) + + +!$OMP MASTER +call wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do t = 1, mo_num + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num ! error, the p might be replace by a s + ! it's a temporary array, the result by replacing p and s will be the same + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1.d0, & + tmp_bi_int_3, mo_num*mo_num, tmp_2rdm_3, mo_num*mo_num, & + 0.d0, tmp_accu, size(tmp_accu,1)) + + do p = 1, mo_num + do s = 1, mo_num + + tmp_accu_sym(s,p) = 0.5d0 * (tmp_accu(p,s)+tmp_accu(s,p)) + + enddo + enddo + + !$OMP CRITICAL + do s = 1, mo_num + do r = 1, mo_num + do p = 1, mo_num + + hessian(p,r,r,s) = hessian(p,r,r,s) + tmp_accu_sym(p,s) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP MASTER +call wall_TIME(t5) +t6=t5-t4 +print*,'l2 1', t6 +!$OMP END MASTER + +! Line 2, term 2 + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! if (p==s) then +! do t = 1, mo_num +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & +! get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & +! + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + +! enddo +! enddo +! enddo +! endif + +! enddo +! enddo +! enddo +! enddo + +! Using the two electron density matrix properties : +! get_two_e_integral(q,t,u,v,mo_integrals_map) = +! get_two_e_integral(u,v,q,t,mo_integrals_map) + +! Using the two electron density matrix properties : +! two_e_dm_mo(r,t,u,v) = two_e_dm_mo(u,v,r,t) + +! With t on the external loop, using temporary arrays for each t and by +! taking u,v as one variable a matrix multplication appears. +! $$c_{q,r} = \sum_uv a_{q,uv} b_{uv,r}$$ + +! There is a kroenecker delta $$\delta_{ps}$$, so we juste compute the +! terms like : hessian(s,q,r,s) + + +!****************************** +! Opt Second line, second term +!****************************** + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do t = 1, mo_num + + do q = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,q) = get_two_e_integral(u,v,q,t,mo_integrals_map) + + enddo + enddo + enddo + + do r = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,r) = two_e_dm_mo(u,v,r,t) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1.d0, & + tmp_bi_int_3 , mo_num*mo_num, tmp_2rdm_3, mo_num*mo_num, & + 0.d0, tmp_accu, size(tmp_accu,1)) + + do r = 1, mo_num + do q = 1, mo_num + + tmp_accu_sym(q,r) = 0.5d0 * (tmp_accu(q,r) + tmp_accu(r,q)) + + enddo + enddo + + !$OMP CRITICAL + do r = 1, mo_num + do q = 1, mo_num + do s = 1, mo_num + + hessian(s,q,r,s) = hessian(s,q,r,s) + tmp_accu_sym(q,r) + + enddo + enddo + enddo + !$OMP END CRITICAL + +enddo +!$OMP END DO + +!$OMP MASTER +CALL wall_TIME(t5) +t6=t5-t4 +print*,'l2 2',t6 +!$OMP END MASTER + +! Line 3, term 1 + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! do u = 1, mo_num +! do v = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & +! + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + +! enddo +! enddo + +! enddo +! enddo +! enddo +! enddo + +! Using the two electron density matrix properties : +! get_two_e_integral(u,v,p,r,mo_integrals_map) = +! get_two_e_integral(p,r,u,v,mo_integrals_map) + +! Using the two electron density matrix properties : +! two_e_dm_mo(u,v,q,s) = two_e_dm_mo(q,s,u,v) + +! With v on the external loop, using temporary arrays for each v and by +! taking p,r and q,s as one dimension a matrix multplication +! appears. $$c_{pr,qs} = \sum_u a_{pr,u} b_{u,qs}$$ + +! Part 1 + +!$OMP MASTER +call wall_TIME(t4) +!$OMP END MASTER + +!-------- +! part 1 +! get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) +!-------- + +!$OMP DO +do v = 1, mo_num + + do u = 1, mo_num + do r = 1, mo_num + do p = 1, mo_num + + tmp_bi_int_3(p,r,u) = get_two_e_integral(p,r,u,v,mo_integrals_map) + + enddo + enddo + enddo + + do s = 1, mo_num + do q = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,q,s) = two_e_dm_mo(q,s,u,v) + + enddo + enddo + enddo + + do s = 1, mo_num + + call dgemm('N','N',mo_num*mo_num, mo_num, mo_num, 1d0, tmp_bi_int_3,& + size(tmp_bi_int_3,1)*size(tmp_bi_int_3,2), tmp_2rdm_3(1,1,s),& + size(tmp_2rdm_3,1), 0d0, ind_3, size(ind_3,1) * size(ind_3,2)) + + !$OMP CRITICAL + do r = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + hessian(p,q,r,s) = hessian(p,q,r,s) + ind_3(p,r,q) + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + + + +! With v on the external loop, using temporary arrays for each v and by +! taking q,s and p,r as one dimension a matrix multplication +! appears. $$c_{qs,pr} = \sum_u a_{qs,u}*b_{u,pr}$$ + +! Part 2 + +!-------- +! part 2 +! get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) +!-------- + +!$OMP DO +do v = 1, mo_num + + do u = 1, mo_num + do s = 1, mo_num + do q = 1, mo_num + + tmp_bi_int_3(q,s,u) = get_two_e_integral(q,s,u,v,mo_integrals_map) + + enddo + enddo + enddo + + do r = 1, mo_num + do p = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,p,r) = two_e_dm_mo(p,r,u,v) + + enddo + enddo + enddo + + do r = 1, mo_num + call dgemm('N','N', mo_num*mo_num, mo_num, mo_num, 1d0, tmp_bi_int_3,& + size(tmp_bi_int_3,1)*size(tmp_bi_int_3,2), tmp_2rdm_3(1,1,r),& + size(tmp_2rdm_3,1), 0d0, ind_3, size(ind_3,1) * size(ind_3,2)) + + !$OMP CRITICAL + do s = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + hessian(p,q,r,s) = hessian(p,q,r,s) + ind_3(q,s,p) + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + +!$OMP MASTER +call wall_TIME(t5) +t6 = t5 - t4 +print*,'l3 1', t6 +!$OMP END MASTER + +! Line 3, term 2 + +! do p = 1, mo_num +! do q = 1, mo_num +! do r = 1, mo_num +! do s = 1, mo_num + +! do t = 1, mo_num +! do u = 1, mo_num + +! hessian(p,q,r,s) = hessian(p,q,r,s) & +! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & +! - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & +! - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & +! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + +! enddo +! enddo + +! enddo +! enddo +! enddo +! enddo + +! With q on the external loop, using temporary arrays for each p and q, +! and taking u,v as one variable, a matrix multiplication appears: +! $$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +! Part 1 + +!-------- +! Part 1 +! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) +!-------- + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do q = 1, mo_num + + do r = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,t,r) = two_e_dm_mo(q,u,r,t) + + enddo + enddo + enddo + + do p = 1, mo_num + + do s = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,t,s) = - get_two_e_integral(u,s,t,p,mo_integrals_map) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_bi_int_3,& + mo_num*mo_num, tmp_2rdm_3, mo_num*mo_num, 0d0, tmp_accu, mo_num) + + !$OMP CRITICAL + do s = 1, mo_num + do r = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + tmp_accu(s,r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + + + +! With q on the external loop, using temporary arrays for each p and q, +! and taking u,v as one variable, a matrix multiplication appears: +! $$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +! Part 2 + +!-------- +! Part 2 +!- get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) +!-------- + +!$OMP DO +do q = 1, mo_num + + do r = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,t,r) = two_e_dm_mo(q,u,t,r) + + enddo + enddo + enddo + + do p = 1, mo_num + + do s = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,t,s) = - get_two_e_integral(u,t,s,p,mo_integrals_map) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_bi_int_3,& + mo_num*mo_num, tmp_2rdm_3, mo_num*mo_num, 0d0, tmp_accu, mo_num) + + !$OMP CRITICAL + do s = 1, mo_num + do r = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + tmp_accu(s,r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + + + +! With q on the external loop, using temporary arrays for each p and q, +! and taking u,v as one variable, a matrix multiplication appears: +! $$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +! Part 3 + +!-------- +! Part 3 +!- get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) +!-------- + +!$OMP DO +do q = 1, mo_num + + do r = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,t,r) = - get_two_e_integral(u,q,t,r,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + + do s = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,t,s) = two_e_dm_mo(p,u,s,t) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_2rdm_3,& + mo_num*mo_num, tmp_bi_int_3, mo_num*mo_num, 0d0, tmp_accu, mo_num) + + !$OMP CRITICAL + do s = 1, mo_num + do r = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + tmp_accu(s,r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + + + +! With q on the external loop, using temporary arrays for each p and q, +! and taking u,v as one variable, a matrix multiplication appears: +! $$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +! Part 4 + +!-------- +! Part 4 +! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) +!-------- + +!$OMP DO +do q = 1, mo_num + + do r = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,t,r) = - get_two_e_integral(u,t,r,q,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + + do s = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,t,s) = two_e_dm_mo(p,u,t,s) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_2rdm_3,& + mo_num*mo_num, tmp_bi_int_3, mo_num*mo_num, 0d0, tmp_accu, mo_num) + + !$OMP CRITICAL + do s = 1, mo_num + do r = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + tmp_accu(s,r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + +enddo +!$OMP END DO + +!$OMP MASTER +call wall_TIME(t5) +t6 = t5-t4 +print*,'l3 2',t6 +!$OMP END MASTER + +!$OMP MASTER +CALL wall_TIME(t2) +t3 = t2 -t1 +print*,'Time to compute the hessian : ', t3 +!$OMP END MASTER + +! Deallocation of private arrays +! In the omp section ! + +deallocate(tmp_bi_int_3, tmp_2rdm_3, tmp_accu, tmp_accu_sym, ind_3) + +! Permutations +! As we mentioned before there are two permutation operator in the +! formula : +! Hessian(p,q,r,s) = P_pq P_rs [...] +! => Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + + +!$OMP MASTER +CALL wall_TIME(t4) +!$OMP END MASTER + +!$OMP DO +do s = 1, mo_num + do r = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + + h_tmpr(p,q,r,s) = (hessian(p,q,r,s) - hessian(q,p,r,s) - hessian(p,q,s,r) + hessian(q,p,s,r)) + + enddo + enddo + enddo +enddo +!$OMP END DO + +!$OMP MASTER +call wall_TIME(t5) +t6 = t5-t4 +print*,'Time for permutations :',t6 +!$OMP END MASTER + +! 4D -> 2D matrix +! We need a 2D matrix for the Newton method's. Since the Hessian is +! "antisymmetric" : $$H_{pq,rs} = -H_{rs,pq}$$ +! We can write it as a 2D matrix, N by N, with N = mo_num(mo_num-1)/2 +! with p 2D :',t6 +!$OMP END MASTER + +!$OMP END PARALLEL +call omp_set_max_active_levels(4) + +! Display +if (debug) then + print*,'2D Hessian matrix' + do pq = 1, n + write(*,'(100(F10.5))') H(pq,:) + enddo +endif + +! Deallocation of shared arrays, end + +deallocate(hessian)!,h_tmpr) +! h_tmpr is intent out in order to debug the subroutine +! It's why we don't deallocate it + + print*,'---End hessian---' + +end subroutine diff --git a/src/mo_optimization/my_providers.irp.f b/src/mo_optimization/my_providers.irp.f new file mode 100644 index 00000000..7469ffd5 --- /dev/null +++ b/src/mo_optimization/my_providers.irp.f @@ -0,0 +1,141 @@ +! Dimensions of MOs + + +BEGIN_PROVIDER [ integer, n_mo_dim ] + implicit none + BEGIN_DOC + ! Number of different pairs (i,j) of MOs we can build, + ! with i>j + END_DOC + + n_mo_dim = mo_num*(mo_num-1)/2 + +END_PROVIDER + +BEGIN_PROVIDER [ integer, n_mo_dim_core ] + implicit none + BEGIN_DOC + ! Number of different pairs (i,j) of core MOs we can build, + ! with i>j + END_DOC + + n_mo_dim_core = dim_list_core_orb*(dim_list_core_orb-1)/2 + +END_PROVIDER + +BEGIN_PROVIDER [ integer, n_mo_dim_act ] + implicit none + BEGIN_DOC + ! Number of different pairs (i,j) of active MOs we can build, + ! with i>j + END_DOC + + n_mo_dim_act = dim_list_act_orb*(dim_list_act_orb-1)/2 + +END_PROVIDER + +BEGIN_PROVIDER [ integer, n_mo_dim_inact ] + implicit none + BEGIN_DOC + ! Number of different pairs (i,j) of inactive MOs we can build, + ! with i>j + END_DOC + + n_mo_dim_inact = dim_list_inact_orb*(dim_list_inact_orb-1)/2 + +END_PROVIDER + +BEGIN_PROVIDER [ integer, n_mo_dim_virt ] + implicit none + BEGIN_DOC + ! Number of different pairs (i,j) of virtual MOs we can build, + ! with i>j + END_DOC + + n_mo_dim_virt = dim_list_virt_orb*(dim_list_virt_orb-1)/2 + +END_PROVIDER + +! Energies/criterions + +BEGIN_PROVIDER [ double precision, my_st_av_energy ] + implicit none + BEGIN_DOC + ! State average CI energy + END_DOC + + !call update_st_av_ci_energy(my_st_av_energy) + call state_average_energy(my_st_av_energy) + +END_PROVIDER + +! With all the MOs + +BEGIN_PROVIDER [ double precision, my_gradient_opt, (n_mo_dim) ] +&BEGIN_PROVIDER [ double precision, my_CC1_opt ] + implicit none + BEGIN_DOC + ! - Gradient of the energy with respect to the MO rotations, for all the MOs. + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision :: norm_grad + + PROVIDE mo_two_e_integrals_in_map + + call gradient_opt(n_mo_dim, my_gradient_opt, my_CC1_opt, norm_grad) + +END_PROVIDER + +BEGIN_PROVIDER [ double precision, my_hessian_opt, (n_mo_dim, n_mo_dim) ] + implicit none + BEGIN_DOC + ! - Gradient of the energy with respect to the MO rotations, for all the MOs. + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision, allocatable :: h_f(:,:,:,:) + + PROVIDE mo_two_e_integrals_in_map + + allocate(h_f(mo_num, mo_num, mo_num, mo_num)) + + call hessian_list_opt(n_mo_dim, my_hessian_opt, h_f) + +END_PROVIDER + +! With the list of active MOs +! Can be generalized to any mo_class by changing the list/dimension + +BEGIN_PROVIDER [ double precision, my_gradient_list_opt, (n_mo_dim_act) ] +&BEGIN_PROVIDER [ double precision, my_CC2_opt ] + implicit none + BEGIN_DOC + ! - Gradient of the energy with respect to the MO rotations, only for the active MOs ! + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision :: norm_grad + + PROVIDE mo_two_e_integrals_in_map !one_e_dm_mo two_e_dm_mo mo_one_e_integrals + + call gradient_list_opt(n_mo_dim_act, dim_list_act_orb, list_act, my_gradient_list_opt, my_CC2_opt, norm_grad) + +END_PROVIDER + +BEGIN_PROVIDER [ double precision, my_hessian_list_opt, (n_mo_dim_act, n_mo_dim_act) ] + implicit none + BEGIN_DOC + ! - Gradient of the energy with respect to the MO rotations, only for the active MOs ! + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision, allocatable :: h_f(:,:,:,:) + + PROVIDE mo_two_e_integrals_in_map + + allocate(h_f(dim_list_act_orb, dim_list_act_orb, dim_list_act_orb, dim_list_act_orb)) + + call hessian_list_opt(n_mo_dim_act, dim_list_act_orb, list_act, my_hessian_list_opt, h_f) + +END_PROVIDER diff --git a/src/mo_optimization/optimization.irp.f b/src/mo_optimization/optimization.irp.f new file mode 100644 index 00000000..9892b3e3 --- /dev/null +++ b/src/mo_optimization/optimization.irp.f @@ -0,0 +1,86 @@ +program optimization + + read_wf = .true. ! must be True for the orbital optimization !!! + TOUCH read_wf + call run_optimization + +end + +subroutine run_optimization + + implicit none + + double precision :: e_cipsi, e_opt, delta_e + integer :: nb_iter,i + logical :: not_converged + character (len=100) :: filename + + PROVIDE psi_det psi_coef mo_two_e_integrals_in_map + + not_converged = .True. + nb_iter = 0 + + ! To start from the wf + N_det_max = max(n_det,5) + TOUCH N_det_max + + open(unit=10, file=trim(ezfio_filename)//'/mo_optimization/result_opt') + write(10,*) " Ndet E_cipsi E_opt Delta_e" + call state_average_energy(e_cipsi) + write(10,'(I10, 3F15.7)') n_det, e_cipsi, e_cipsi, 0d0 + close(10) + + do while (not_converged) + print*,'' + print*,'======================' + print*,' Cipsi step:', nb_iter + print*,'======================' + print*,'' + print*,'********** cipsi step **********' + ! cispi calculation + call run_stochastic_cipsi + + ! State average energy after the cipsi step + call state_average_energy(e_cipsi) + + print*,'' + print*,'********** optimization step **********' + ! orbital optimization + call run_orb_opt_trust_v2 + + ! State average energy after the orbital optimization + call state_average_energy(e_opt) + + print*,'' + print*,'********** diff step **********' + ! Gain in energy + delta_e = e_opt - e_cipsi + print*, 'Gain in energy during the orbital optimization:', delta_e + + open(unit=10, file=trim(ezfio_filename)//'/mo_optimization/result_opt', position='append') + write(10,'(I10, 3F15.7)') n_det, e_cipsi, e_opt, delta_e + close(10) + + ! Exit + if (delta_e > 1d-12) then + print*, 'WARNING, something wrong happened' + print*, 'The gain (delta_e) in energy during the optimization process' + print*, 'is > 0, but it must be < 0' + print*, 'The program will exit' + exit + endif + + if (n_det > n_det_max_opt) then + print*, 'The number of determinants in the wf > n_det_max_opt' + print*, 'The program will exit' + exit + endif + + ! To double the number of determinants in the wf + N_det_max = int(dble(n_det * 2)*0.9) + TOUCH N_det_max + + nb_iter = nb_iter + 1 + enddo + +end diff --git a/src/mo_optimization/orb_opt.irp.f b/src/mo_optimization/orb_opt.irp.f new file mode 100644 index 00000000..71ff9262 --- /dev/null +++ b/src/mo_optimization/orb_opt.irp.f @@ -0,0 +1,22 @@ +! Orbital optimization program + +! This is an optimization program for molecular orbitals. It produces +! orbital rotations in order to lower the energy of a truncated wave +! function. +! This program just optimize the orbitals for a fixed number of +! determinants. This optimization process must be repeated for different +! number of determinants. + + + + +! Main program : orb_opt_trust + + +program orb_opt + read_wf = .true. ! must be True for the orbital optimization !!! + TOUCH read_wf + io_mo_two_e_integrals = 'None' + TOUCH io_mo_two_e_integrals + call run_orb_opt_trust_v2 +end diff --git a/src/mo_optimization/org/TANGLE_org_mode.sh b/src/mo_optimization/org/TANGLE_org_mode.sh new file mode 100755 index 00000000..059cbe7d --- /dev/null +++ b/src/mo_optimization/org/TANGLE_org_mode.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +list='ls *.org' +for element in $list +do + emacs --batch $element -f org-babel-tangle +done diff --git a/src/mo_optimization/org/TODO.org b/src/mo_optimization/org/TODO.org new file mode 100644 index 00000000..960b9ba6 --- /dev/null +++ b/src/mo_optimization/org/TODO.org @@ -0,0 +1,17 @@ +TODO: +** TODO Keep under surveillance the performance of rotation matrix +- is the fix ok ? +** DONE Provider state_average_weight +** DONE Diagonal hessian for orbital optimization with a list of MOs +** DONE Something to force the step cancellation if R.R^T > treshold +** TODO Iterative method to compute the rotation matrix +- doesn't work actually +** DONE Test trust region with polynomial functions +** DONE Optimization/Localization program using the template +** DONE Correction OMP hessian shared/private arrays +** DONE State average energy +** DONE Correction of Rho +** TODO Check the PROVIDE/FREE/TOUCH +** TODO research of lambda without the power 2 +** DONE Clean the OMP sections + diff --git a/src/mo_optimization/org/debug_gradient_list_opt.org b/src/mo_optimization/org/debug_gradient_list_opt.org new file mode 100644 index 00000000..3c6f98c0 --- /dev/null +++ b/src/mo_optimization/org/debug_gradient_list_opt.org @@ -0,0 +1,79 @@ +* Debug the gradient + +*Program to check the gradient* + +The program compares the result of the first and last code for the +gradient. + +Provided: +| mo_num | integer | number of MOs | + +Internal: +| n | integer | number of orbitals pairs (p,q) p threshold) then + print*,i,v_grad(i) + nb_error = nb_error + 1 + + if (ABS(v_grad(i)) > max_error) then + max_error = v_grad(i) + endif + + endif + enddo + + print*,'' + print*,'Check the gradient' + print*,'Threshold:', threshold + print*,'Nb error:', nb_error + print*,'Max error:', max_error + + ! Deallocation + deallocate(v_grad,v_grad2) + +end program +#+END_SRC diff --git a/src/mo_optimization/org/debug_gradient_opt.org b/src/mo_optimization/org/debug_gradient_opt.org new file mode 100644 index 00000000..101e1e8c --- /dev/null +++ b/src/mo_optimization/org/debug_gradient_opt.org @@ -0,0 +1,77 @@ +* Debug the gradient + +*Program to check the gradient* + +The program compares the result of the first and last code for the +gradient. + +Provided: +| mo_num | integer | number of MOs | + +Internal: +| n | integer | number of orbitals pairs (p,q) p threshold) then + print*,v_grad(i) + nb_error = nb_error + 1 + + if (ABS(v_grad(i)) > max_error) then + max_error = v_grad(i) + endif + + endif + enddo + + print*,'' + print*,'Check the gradient' + print*,'Threshold :', threshold + print*,'Nb error :', nb_error + print*,'Max error :', max_error + + ! Deallocation + deallocate(v_grad,v_grad2) + +end program +#+END_SRC diff --git a/src/mo_optimization/org/debug_hessian_list_opt.org b/src/mo_optimization/org/debug_hessian_list_opt.org new file mode 100644 index 00000000..76e8b337 --- /dev/null +++ b/src/mo_optimization/org/debug_hessian_list_opt.org @@ -0,0 +1,148 @@ +* Debug the hessian + +*Program to check the hessian matrix* + +The program compares the result of the first and last code for the +hessian. First of all the 4D hessian and after the 2D hessian. + +Provided: +| mo_num | integer | number of MOs | +| optimization_method | string | Method for the orbital optimization: | +| | | - 'full' -> full hessian | +| | | - 'diag' -> diagonal hessian | +| dim_list_act_orb | integer | number of active MOs | +| list_act(dim_list_act_orb) | integer | list of the actives MOs | +| | | | + +Internal: +| m | integer | number of MOs in the list | +| | | (active MOs) | +| n | integer | number of orbitals pairs (p,q) p threshold) then + print*,h_f(i,j,k,l) + nb_error = nb_error + 1 + if (ABS(h_f(i,j,k,l)) > ABS(max_error)) then + max_error = h_f(i,j,k,l) + endif + endif + enddo + enddo + enddo + enddo + + max_error_H = 0d0 + nb_error_H = 0 + + do j = 1, n + do i = 1, n + if (ABS(H(i,j)) > threshold) then + print*, H(i,j) + nb_error_H = nb_error_H + 1 + + if (ABS(H(i,j)) > ABS(max_error_H)) then + max_error_H = H(i,j) + endif + + endif + enddo + enddo + + ! Deallocation + deallocate(H, H2, h_f, h_f2) + + else + + print*, 'Use the diagonal hessian matrix' + allocate(H(n,1),H2(n,1)) + call diag_hessian_list_opt(n,m,list_act,H) + call first_diag_hessian_list_opt(n,m,list_act,H2) + + H = H - H2 + + max_error_H = 0d0 + nb_error_H = 0 + + do i = 1, n + if (ABS(H(i,1)) > threshold) then + print*, H(i,1) + nb_error_H = nb_error_H + 1 + + if (ABS(H(i,1)) > ABS(max_error_H)) then + max_error_H = H(i,1) + endif + + endif + enddo + + endif + + print*,'' + if (optimization_method == 'full') then + print*,'Check of the full hessian' + print*,'Threshold:', threshold + print*,'Nb error:', nb_error + print*,'Max error:', max_error + print*,'' + else + print*,'Check of the diagonal hessian' + endif + + print*,'Nb error_H:', nb_error_H + print*,'Max error_H:', max_error_H + +end program +#+END_SRC diff --git a/src/mo_optimization/org/debug_hessian_opt.org b/src/mo_optimization/org/debug_hessian_opt.org new file mode 100644 index 00000000..40f84c82 --- /dev/null +++ b/src/mo_optimization/org/debug_hessian_opt.org @@ -0,0 +1,172 @@ +* Debug the hessian + +*Program to check the hessian matrix* + +The program compares the result of the first and last code for the +hessian. First of all the 4D hessian and after the 2D hessian. + +Provided: +| mo_num | integer | number of MOs | + +Internal: +| n | integer | number of orbitals pairs (p,q) p threshold) then + print*,h_f(i,j,k,l) + nb_error = nb_error + 1 + if (ABS(h_f(i,j,k,l)) > ABS(max_error)) then + max_error = h_f(i,j,k,l) + endif + endif + enddo + enddo + enddo + enddo + + max_error_H = 0d0 + nb_error_H = 0 + + do j = 1, n + do i = 1, n + if (ABS(H(i,j)) > threshold) then + print*, H(i,j) + nb_error_H = nb_error_H + 1 + + if (ABS(H(i,j)) > ABS(max_error_H)) then + max_error_H = H(i,j) + endif + + endif + enddo + enddo + + elseif (optimization_method == 'diag') then + + print*, 'Use the diagonal hessian matrix' + call diag_hessian_opt(n,H,h_f) + call first_diag_hessian_opt(n,H2,h_f2) + + h_f = h_f - h_f2 + max_error = 0d0 + nb_error = 0 + threshold = 1d-12 + + do l = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do i = 1, mo_num + + if (ABS(h_f(i,j,k,l)) > threshold) then + + print*,h_f(i,j,k,l) + nb_error = nb_error + 1 + + if (ABS(h_f(i,j,k,l)) > ABS(max_error)) then + max_error = h_f(i,j,k,l) + endif + + endif + + enddo + enddo + enddo + enddo + + h=H-H2 + + max_error_H = 0d0 + nb_error_H = 0 + + do j = 1, n + do i = 1, n + if (ABS(H(i,j)) > threshold) then + print*, H(i,j) + nb_error_H = nb_error_H + 1 + + if (ABS(H(i,j)) > ABS(max_error_H)) then + max_error_H = H(i,j) + endif + + endif + enddo + enddo + + else + print*,'Unknown optimization_method, please select full, diag' + call abort + endif + + print*,'' + if (optimization_method == 'full') then + print*,'Check the full hessian' + else + print*,'Check the diagonal hessian' + endif + + print*,'Threshold :', threshold + print*,'Nb error :', nb_error + print*,'Max error :', max_error + print*,'' + print*,'Nb error_H :', nb_error_H + print*,'Max error_H :', max_error_H + + ! Deallocation + deallocate(H,H2,h_f,h_f2) + +end program +#+END_SRC diff --git a/src/mo_optimization/org/diagonal_hessian_list_opt.org b/src/mo_optimization/org/diagonal_hessian_list_opt.org new file mode 100644 index 00000000..a12ca981 --- /dev/null +++ b/src/mo_optimization/org/diagonal_hessian_list_opt.org @@ -0,0 +1,1561 @@ +* Diagonal hessian + +The hessian of the CI energy with respects to the orbital rotation is : +(C-c C-x C-l) + +\begin{align*} +H_{pq,rs} &= \dfrac{\partial^2 E(x)}{\partial x_{pq}^2} \\ + &= \mathcal{P}_{pq} \mathcal{P}_{rs} [ \frac{1}{2} \sum_u [\delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) + + \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u)] + -(h_p^s \gamma_r^q + h_r^q \gamma_p^s) \\ + &+ \frac{1}{2} \sum_{tuv} [\delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) + + \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt})] \\ + &+ \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) + - \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +\end{align*} +With pq a permutation operator : + +\begin{align*} +\mathcal{P}_{pq}= 1 - (p \leftrightarrow q) +\end{align*} +\begin{align*} +\mathcal{P}_{pq} \mathcal{P}_{rs} &= (1 - (p \leftrightarrow q))(1 - (r \leftrightarrow s)) \\ +&= 1 - (p \leftrightarrow q) - (r \leftrightarrow s) + (p \leftrightarrow q, r \leftrightarrow s) +\end{align*} + +Where p,q,r,s,t,u,v are general spatial orbitals +mo_num : the number of molecular orbitals +$$h$$ : One electron integrals +$$\gamma$$ : One body density matrix (state average in our case) +$$v$$ : Two electron integrals +$$\Gamma$$ : Two body density matrice (state average in our case) + +Source : +Seniority-based coupled cluster theory +J. Chem. Phys. 141, 244104 (2014); https://doi.org/10.1063/1.4904384 +Thomas M. Henderson, Ireneusz W. Bulik, Tamar Stein, and Gustavo E. Scuseria + +Here for the diagonal of the hessian it's a little more complicated +than for the hessian. It's not just compute the diagonal terms of the +hessian because of the permutations. + +The hessian is (p,q,r,s), so the diagonal terms are (p,q,p,q). But +with the permutations : p <-> q, r <-> s, p <-> q and r <-> s, we have +a diagonal term, if : +p = r and q = s, => (p,q,p,q) +or +q = r and p = s, => (p,q,q,p) + +For that reason, we will use 2D temporary arrays to store the +elements. One for the terms (p,q,p,q) and an other for the terms of +kind (p,q,q,p). We will also use a 1D temporary array to store the +terms of the kind (p,p,p,p) due to the kronoecker delta. + +*Compute the diagonal hessian of energy with respects to orbital +rotations* +By diagonal hessian we mean, diagonal elements of the hessian + +Provided: +| mo_num | integer | number of MOs | +| mo_one_e_integrals(mo_num,mo_num) | double precision | mono-electronic integrals | +| one_e_dm_mo(mo_num,mo_num) | double precision | one e- density matrix (state average) | +| two_e_dm_mo(mo_num,mo_num,mo_num) | double precision | two e- density matrix (state average) | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | + +Output: +| H(n,n) | double precision | Hessian matrix | +| h_tmpr(mo_num,mo_num,mo_num,mo_num) | double precision | Complete hessian matrix before the tranformation | +| | | in n by n matrix | + +Internal: +| hessian(mo_num,mo_num,mo_num,mo_num) | double precision | temporary array containing the hessian before | +| | | the permutations | +| p, q, r, s | integer | indexes of the hessian elements | +| t, u, v | integer | indexes for the sums | +| pq, rs | integer | indexes for the transformation of the hessian | +| | | (4D -> 2D) | +| t1,t2,t3 | double precision | time to compute the hessian | +| t4,t5,t6 | double precision | time to compute the differ each element | +| tmp_bi_int_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals (private) | +| tmp_bi_int_3_shared(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals (shared) | +| tmp_2rdm_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix (private) | +| tmp_2rdm_3_shared(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix (shared) | +| tmp_accu(mo_num,mo_num) | double precision | temporary array (private) | +| tmp_accu_shared(mo_num,mo_num) | double precision | temporary array (shared) | +| tmp_accu_1(mo_num) | double precision | temporary array (private) | +| tmp_accu_1_shared(mo_num) | double precision | temporary array (shared) | +| tmp_h_pppp(mo_num) | double precision | matrix containing the hessien elements hessian(p,p,p,p) | +| tmp_h_pqpq(mo_num,mo_num) | double precision | matrix containing the hessien elements hessian(p,q,p,q) | +| tmp_h_pqqp(mo_num,mo_num) | double precision | matrix containing the hessien elements hessian(p,q,q,p) | + +Function: +| get_two_e_integral | double precision | bi-electronic integrals | + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f +subroutine diag_hessian_list_opt(n, m, list, H)!, h_tmpr) + + use omp_lib + + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n, m, list(m) + + ! out + double precision, intent(out) :: H(n)!, h_tmpr(m,m,m,m) + + ! internal + !double precision, allocatable :: !hessian(:,:,:,:)!, h_tmpr(:,:,:,:) + integer :: p,q,k + integer :: r,s,t,u,v + integer :: pq,rs + integer :: tmp_p,tmp_q,tmp_r,tmp_s,tmp_pq,tmp_rs + double precision :: t1,t2,t3,t4,t5,t6 + double precision, allocatable :: tmp_bi_int_3(:,:,:),tmp_bi_int_3_shared(:,:,:) + double precision, allocatable :: tmp_2rdm_3(:,:,:),tmp_2rdm_3_shared(:,:,:) + double precision, allocatable :: tmp_accu(:,:) + double precision, allocatable :: tmp_accu_shared(:,:), tmp_accu_1_shared(:) + double precision, allocatable :: tmp_h_pppp(:), tmp_h_pqpq(:,:), tmp_h_pqqp(:,:) + + ! Function + double precision :: get_two_e_integral + + print*,'' + print*,'--- Diagonal_hessian_list_opt---' + + ! Allocation of shared arrays + !allocate(hessian(m,m,m,m))!,h_tmpr(mo_num,mo_num,mo_num,mo_num)) + allocate(tmp_h_pppp(m),tmp_h_pqpq(m,m),tmp_h_pqqp(m,m)) + allocate(tmp_2rdm_3_shared(mo_num,mo_num,m)) + allocate(tmp_bi_int_3_shared(mo_num,mo_num,m)) + allocate(tmp_accu_1_shared(m),tmp_accu_shared(m,m)) + + ! OMP + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE( & + !$OMP p,q,r,s, tmp_accu,k, & + !$OMP u,v,t, tmp_bi_int_3, tmp_2rdm_3, & + !$OMP tmp_p,tmp_q,tmp_r,tmp_s) & + !$OMP SHARED(H, tmp_h_pppp, tmp_h_pqpq, tmp_h_pqqp, & + !$OMP mo_num,n,m, mo_one_e_integrals, one_e_dm_mo, list, & + !$OMP tmp_bi_int_3_shared, tmp_2rdm_3_shared,tmp_accu_shared, & + !$OMP tmp_accu_1_shared,two_e_dm_mo,mo_integrals_map,t1,t2,t3,t4,t5,t6) & + !$OMP DEFAULT(NONE) + + ! Allocation of the private arrays + allocate(tmp_accu(m,m)) +#+END_SRC + +** Initialization of the arrays +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !!$OMP DO + !do tmp_s = 1,m + ! do tmp_r = 1, m + ! do tmp_q = 1, m + ! do tmp_p = 1, m + ! hessian(tmp_p,tmp_q,tmp_r,tmp_s) = 0d0 + ! enddo + ! enddo + ! enddo + !enddo + !!$OMP END DO + + !$OMP DO + do tmp_p = 1, m + tmp_h_pppp(tmp_p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + tmp_h_pqpq(tmp_p,tmp_q) = 0d0 + enddo + enddo + !$OMP END DO + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + tmp_h_pqqp(tmp_p,tmp_q) = 0d0 + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t1) + !$OMP END MASTER +#+END_SRC + +** Line 1, term 1 + +\begin{align*} +\frac{1}{2} \sum_u \delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + + if (q==r) then + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & + + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + + enddo + endif + endif + enddo + enddo + enddo +enddo + +With optimization : + +*Part 1 : p=r and q=s and q=r* + + hessian(p,q,r,s) -> hessian(p,p,p,p) + + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & ++ mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + = + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & ++ mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) + = + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) + & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo + enddo + !$OMP END DO + + !$OMP DO + do tmp_p = 1, m + tmp_h_pppp(tmp_p) = tmp_h_pppp(tmp_p) + tmp_accu_1_shared(tmp_p) + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s and q=r* + + hessian(p,q,r,s) -> hessian(p,q,q,p) + + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & ++ mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + = + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & ++ mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) + = + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP DO + do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) + & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo + enddo + !$OMP END DO + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) + tmp_accu_1_shared(tmp_p) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6= t5-t4 + print*,'l1 1',t6 + !$OMP END MASTER +#+END_SRC + +** Line 1, term 2 + +\begin{align*} +\frac{1}{2} \sum_u \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + + if (p==s) then + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & + + mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) + enddo + endif + endif + enddo + enddo + enddo +enddo + +*Part 1 : p=r and q=s and p=s* + + hessian(p,q,r,s) -> hessian(p,p,p,p) + + 0.5d0 * (& + mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & ++ mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) + = + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & ++ mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) + = + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) + & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo + enddo + !$OMP END DO + + !$OMP DO + do tmp_p = 1, m + tmp_h_pppp(tmp_p) = tmp_h_pppp(tmp_p) + tmp_accu_1_shared(tmp_p) + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s and p=s* + + hessian(p,q,r,s) -> hessian(p,q,q,p) + + 0.5d0 * (& + mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & ++ mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) + = + 0.5d0 * ( & + mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) & ++ mo_one_e_integrals(q,u) * one_e_dm_mo(q,u)) + = + mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP DO + do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do tmp_q = 1, m + q = list(tmp_q) + do u = 1, mo_num + + tmp_accu_1_shared(tmp_q) = tmp_accu_1_shared(tmp_q) + & + mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) + + enddo + enddo + !$OMP END DO + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) + tmp_accu_1_shared(tmp_q) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6= t5-t4 + print*,'l1 2',t6 + !$OMP END MASTER +#+END_SRC + +** Line 1, term 3 + +\begin{align*} +-(h_p^s \gamma_r^q + h_r^q \gamma_p^s) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + + hessian(p,q,r,s) = hessian(p,q,r,s) & + - mo_one_e_integrals(s,p) * one_e_rdm_mo(r,q) & + - mo_one_e_integrals(q,r) * one_e_rdm_mo(p,s) + + endif + enddo + enddo + enddo +enddo + +With optimization : + +*Part 1 : p=r and q=s* + +hessian(p,q,r,s) -> hessian(p,q,p,q) + + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) += + - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) & + - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) += + - 2d0 mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do tmp_q = 1, m + q = list(tmp_q) + do tmp_p = 1, m + p = list(tmp_p) + + tmp_h_pqpq(tmp_p,tmp_q) = tmp_h_pqpq(tmp_p,tmp_q) & + - 2d0 * mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + + enddo + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s* + +hessian(p,q,r,s) -> hessian(p,q,p,q) + + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) += + - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) & + - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) += + - 2d0 mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP DO + do tmp_q = 1, m + q = list(tmp_q) + do tmp_p = 1, m + p = list(tmp_p) + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) & + - 2d0 * mo_one_e_integrals(p,p) * one_e_dm_mo(q,q) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6= t5-t4 + print*,'l1 3',t6 + !$OMP END MASTER +#+END_SRC + +** Line 2, term 1 + +\begin{align*} +\frac{1}{2} \sum_{tuv} \delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + + if (q==r) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & + + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + + enddo + enddo + enddo + endif + endif + enddo + enddo + enddo +enddo + +With optimization : + +*Part 1 : p=r and q=s and q=r* + + hessian(p,q,r,s) -> hessian(p,p,p,p) + + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & ++ get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + = + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) & ++ get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + = + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +Just re-order the index and use 3D temporary arrays for optimal memory +accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + allocate(tmp_bi_int_3(mo_num, mo_num, m),tmp_2rdm_3(mo_num, mo_num, m)) + + !$OMP DO + do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do t = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,tmp_p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,tmp_p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_p = 1, m + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) & + + tmp_bi_int_3(u,v,tmp_p) * tmp_2rdm_3(u,v,tmp_p) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do tmp_p = 1, m + tmp_h_pppp(tmp_p) = tmp_h_pppp(tmp_p) + tmp_accu_1_shared(tmp_p) + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s and q=r* + + hessian(p,q,r,s) -> hessian(p,q,q,p) + + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & ++ get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + = + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) & ++ get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + = + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +Just re-order the index and use 3D temporary arrays for optimal memory +accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP DO + do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do t = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,tmp_p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,tmp_p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_p = 1, m + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) + & + tmp_bi_int_3(u,v,tmp_p) * tmp_2rdm_3(u,v,tmp_p) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) + tmp_accu_1_shared(tmp_p) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6 = t5-t4 + print*,'l2 1',t6 + !$OMP END MASTER +#+END_SRC + +** Line 2, term 2 + +\begin{align*} +\frac{1}{2} \sum_{tuv} \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt}) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + + if (p==s) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & + + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + + enddo + enddo + enddo + endif + endif + enddo + enddo + enddo +enddo + +With optimization : + +*Part 1 : p=r and q=s and p=s* + + hessian(p,q,r,s) -> hessian(p,p,p,p) + + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & ++ get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + = + 0.5d0 * ( & + get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v) & ++ get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t)) + = + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +Just re-order the index and use 3D temporary arrays for optimal memory +accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do t = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,tmp_p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,tmp_p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_p = 1, m + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(tmp_p) = tmp_accu_1_shared(tmp_p) +& + tmp_bi_int_3(u,v,tmp_p) * tmp_2rdm_3(u,v,tmp_p) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do tmp_p = 1, m + tmp_h_pppp(tmp_p) = tmp_h_pppp(tmp_p) + tmp_accu_1_shared(tmp_p) + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s and p=s* + + hessian(p,q,r,s) -> hessian(p,q,q,p) + + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & ++ get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + = + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(q,t,u,v) & ++ get_two_e_integral(u,v,q,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + = + get_two_e_integral(u,v,q,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t) + +Just re-order the index and use 3D temporary arrays for optimal memory +accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP DO + do tmp_p = 1, m + tmp_accu_1_shared(tmp_p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do t = 1, mo_num + + do tmp_q = 1, m + q = list(tmp_q) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,tmp_q) = get_two_e_integral(u,v,q,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_q = 1, m + q = list(tmp_q) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,tmp_q) = two_e_dm_mo(u,v,q,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_q = 1, m + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(tmp_q) = tmp_accu_1_shared(tmp_q) +& + tmp_bi_int_3(u,v,tmp_q) * tmp_2rdm_3(u,v,tmp_q) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) + tmp_accu_1_shared(tmp_p) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6 = t5-t4 + print*,'l2 2',t6 + !$OMP END MASTER +#+END_SRC + +** Line 3, term 1 + +\begin{align*} +\sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)))) then + + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & + + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + + enddo + enddo + endif + + enddo + enddo + enddo +enddo + +With optimization + +*Part 1 : p=r and q=s* + + hessian(p,q,r,s) -> hessian(p,q,p,q) + + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & ++ get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + = + get_two_e_integral(u,v,p,p,mo_integrals_map) * two_e_dm_mo(u,v,q,q) & ++ get_two_e_integral(q,q,u,v,mo_integrals_map) * two_e_dm_mo(p,p,u,v) + = + 2d0 * get_two_e_integral(u,v,p,p,mo_integrals_map) * two_e_dm_mo(u,v,q,q) + +Arrays of the kind (u,v,p,p) can be transform in 4D arrays (u,v,p). +Using u,v as one variable a matrix multiplication appears. +$$c_{p,q} = \sum_{uv} a_{p,uv} b_{uv,q}$$ + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do tmp_q = 1, m + q = list(tmp_q) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_shared(u,v,tmp_q) = two_e_dm_mo(u,v,q,q) + + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_shared(u,v,tmp_p) = get_two_e_integral(u,v,p,p,mo_integrals_map) + + enddo + enddo + enddo + !$OMP END DO + + call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_bi_int_3_shared,& + mo_num*mo_num, tmp_2rdm_3_shared, mo_num*mo_num, 0d0, tmp_accu, m) + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqpq(tmp_p,tmp_q) = tmp_h_pqpq(tmp_p,tmp_q) + tmp_accu(tmp_p,tmp_q) + tmp_accu(tmp_q,tmp_p) + + enddo + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s* + + hessian(p,q,r,s) -> hessian(p,q,q,p) + + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & ++ get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + = + get_two_e_integral(u,v,p,q,mo_integrals_map) * two_e_dm_mo(u,v,q,p) & ++ get_two_e_integral(q,p,u,v,mo_integrals_map) * two_e_dm_mo(p,q,u,v) + = + 2d0 * get_two_e_integral(u,v,p,q,mo_integrals_map) * two_e_dm_mo(u,v,q,p) + +Just re-order the indexes and use 3D temporary arrays for optimal +memory accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP MASTER + call wall_time(t4) + !$OMP END MASTER + + !$OMP DO + do tmp_q = 1, m + q = list(tmp_q) + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,tmp_p) = 2d0 * get_two_e_integral(u,v,q,p,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,tmp_p) = two_e_dm_mo(u,v,p,q) + + enddo + enddo + enddo + + do tmp_p = 1, m + do v = 1, mo_num + do u = 1, mo_num + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) & + + tmp_bi_int_3(u,v,tmp_p) * tmp_2rdm_3(u,v,tmp_p) + + enddo + enddo + enddo + + enddo + !$OMP END DO + + deallocate(tmp_bi_int_3,tmp_2rdm_3) + + !$OMP MASTER + CALL wall_TIME(t5) + t6= t5-t4 + print*,'l3 1',t6 + !$OMP END MASTER +#+END_SRC + +** Line 3, term 2 + +\begin{align*} +- \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + do t = 1, mo_num + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & + - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & + - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & + - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + + enddo + enddo + + endif + + enddo + enddo + enddo +enddo + +With optimization : + +*Part 1 : p=r and q=s* + + hessian(p,q,r,s) -> hessian(p,q,p,q) + + - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & + - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & + - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & + - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + = + - get_two_e_integral(q,t,p,u,mo_integrals_map) * two_e_dm_mo(p,t,q,u) & + - get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) & + - get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(p,u,q,t) & + - get_two_e_integral(q,u,t,p,mo_integrals_map) * two_e_dm_mo(p,u,t,q) + = + - 2d0 * get_two_e_integral(q,t,p,u,mo_integrals_map) * two_e_dm_mo(p,t,q,u) & + - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) + = + - 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(q,u,p,t) & + - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) + +Just re-order the indexes and use 3D temporary arrays for optimal +memory accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !---------- + ! Part 1.1 + !---------- + ! - 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(q,u,p,t) + + allocate(tmp_bi_int_3(m, mo_num, m), tmp_2rdm_3(m, mo_num, m)) + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + tmp_accu_shared(tmp_p,tmp_q) = 0d0 + enddo + enddo + !$OMP END DO + + !$OMP DO + do t = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + do tmp_q = 1, m + q = list(tmp_q) + + tmp_bi_int_3(tmp_q,u,tmp_p) = 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + do tmp_q = 1, m + q = list(tmp_q) + + tmp_2rdm_3(tmp_q,u,tmp_p) = two_e_dm_mo(q,u,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_p = 1, m + do u = 1, mo_num + do tmp_q = 1, m + + tmp_accu_shared(tmp_p,tmp_q) = tmp_accu_shared(tmp_p,tmp_q) & + - tmp_bi_int_3(tmp_q,u,tmp_p) * tmp_2rdm_3(tmp_q,u,tmp_p) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqpq(tmp_p,tmp_q) = tmp_h_pqpq(tmp_p,tmp_q) + tmp_accu_shared(tmp_p,tmp_q) + + enddo + enddo + !$OMP END DO + + deallocate(tmp_bi_int_3, tmp_2rdm_3) +#+END_SRC + +Just re-order the indexes and use 3D temporary arrays for optimal +memory accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !-------- + ! Part 1.2 + !-------- + ! - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) + + allocate(tmp_bi_int_3(mo_num, m, m),tmp_2rdm_3(mo_num, m, m)) + + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + tmp_accu_shared(tmp_p,tmp_q) = 0d0 + enddo + enddo + !$OMP END DO + + !$OMP DO + do u = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do t = 1, mo_num + + tmp_bi_int_3(t,tmp_q,tmp_p) = 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p= 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do t = 1, mo_num + + tmp_2rdm_3(t,tmp_q,tmp_p) = two_e_dm_mo(t,p,q,u) + + enddo + enddo + enddo + + !$OMP CRITICAL + do tmp_q = 1, m + do tmp_p = 1, m + do t = 1, mo_num + + tmp_accu_shared(tmp_p,tmp_q) = tmp_accu_shared(tmp_p,tmp_q) & + - tmp_bi_int_3(t,tmp_q,tmp_p) * tmp_2rdm_3(t,tmp_q,tmp_p) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqpq(tmp_p,tmp_q) = tmp_h_pqpq(tmp_p,tmp_q) + tmp_accu_shared(tmp_p,tmp_q) + + enddo + enddo + !$OMP END DO + + deallocate(tmp_bi_int_3,tmp_2rdm_3) +#+END_SRC + +*Part 2 : q=r and p=s* + + hessian(p,q,r,s) -> hessian(p,q,p,q) + + - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & + - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & + - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & + - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + = + - get_two_e_integral(p,t,p,u,mo_integrals_map) * two_e_dm_mo(q,t,q,u) & + - get_two_e_integral(t,p,p,u,mo_integrals_map) * two_e_dm_mo(t,q,q,u) & + - get_two_e_integral(q,u,q,t,mo_integrals_map) * two_e_dm_mo(p,u,p,t) & + - get_two_e_integral(q,u,t,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) + = + - get_two_e_integral(p,t,p,u,mo_integrals_map) * two_e_dm_mo(q,t,q,u) & + - get_two_e_integral(q,t,q,u,mo_integrals_map) * two_e_dm_mo(p,t,p,u) & + + - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(t,q,q,u) & + - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(t,p,p,u) + = + - get_two_e_integral(t,p,u,p,mo_integrals_map) * two_e_dm_mo(t,q,u,q) & + - get_two_e_integral(t,q,u,q,mo_integrals_map) * two_e_dm_mo(p,t,p,u) & + + - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(q,u,t,q) & + - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) + +Arrays of the kind (t,p,u,p) can be transformed in 3D arrays. By doing +so and using t,u as one variable, a matrix multiplication appears : +$$c_{p,q} = \sum_{tu} a_{p,tu} b_{tu,q}$$ + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !---------- + ! Part 2.1 + !---------- + ! - get_two_e_integral(t,p,u,p,mo_integrals_map) * two_e_dm_mo(t,q,u,q) & + ! - get_two_e_integral(t,q,u,q,mo_integrals_map) * two_e_dm_mo(p,t,p,u) + + !$OMP DO + do tmp_q = 1, m + q = list(tmp_q) + do u = 1, mo_num + do t = 1, mo_num + + tmp_2rdm_3_shared(t,u,tmp_q) = two_e_dm_mo(t,q,u,q) + + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + do t = 1, mo_num + + tmp_bi_int_3_shared(t,u,tmp_p) = get_two_e_integral(t,p,u,p,mo_integrals_map) + + enddo + enddo + enddo + !$OMP END DO + + call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_bi_int_3_shared,& + mo_num*mo_num, tmp_2rdm_3_shared, mo_num*mo_num, 0d0, tmp_accu, m) + + !$OMP DO + do tmp_p = 1, m + do tmp_q = 1, m + + tmp_h_pqqp(tmp_q,tmp_p) = tmp_h_pqqp(tmp_q,tmp_p) - tmp_accu(tmp_q,tmp_p) - tmp_accu(tmp_p,tmp_q) + + enddo + enddo + !$OMP END DO +#+END_SRC + +Arrays of the kind (t,u,p,p) can be transformed in 3D arrays. By doing +so and using t,u as one variable, a matrix multiplication appears : +$$c_{p,q} = \sum_{tu} a_{p,tu} b_{tu,q}$$ + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !-------- + ! Part 2.2 + !-------- + ! - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(q,u,t,q) & + ! - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) + + !$OMP DO + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + do t = 1, mo_num + + tmp_bi_int_3_shared(t,u,tmp_p) = get_two_e_integral(t,u,p,p,mo_integrals_map) + + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do tmp_q = 1, m + q = list(tmp_q) + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_shared(u,t,tmp_q) = two_e_dm_mo(q,u,t,q) + + enddo + enddo + enddo + !$OMP END DO + + call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_2rdm_3_shared,& + mo_num*mo_num, tmp_bi_int_3_shared, mo_num*mo_num, 0d0, tmp_accu, m) + + !$OMP DO + do tmp_q = 1, m + do tmp_p = 1, m + + tmp_h_pqqp(tmp_p,tmp_q) = tmp_h_pqqp(tmp_p,tmp_q) - tmp_accu(tmp_p,tmp_q) - tmp_accu(tmp_q,tmp_p) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6= t5-t4 + print*,'l3 2',t6 + !$OMP END MASTER + + !$OMP MASTER + CALL wall_TIME(t2) + t2 = t2 - t1 + print*, 'Time to compute the hessian :', t2 + !$OMP END MASTER +#+END_SRC + +** Deallocation of private arrays +In the OMP section ! +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + deallocate(tmp_accu) +#+END_SRC + +** Permutations +As we mentioned before there are two permutation operator in the +formula : +Hessian(p,q,r,s) = P_pq P_rs [...] +=> Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !!$OMP DO + !do tmp_p = 1, m + ! hessian(tmp_p,tmp_p,tmp_p,tmp_p) = hessian(tmp_p,tmp_p,tmp_p,tmp_p) + tmp_h_pppp(tmp_p) + !enddo + !!$OMP END DO + + !!$OMP DO + !do tmp_q = 1, m + ! do tmp_p = 1, m + ! hessian(tmp_p,tmp_q,tmp_p,tmp_q) = hessian(tmp_p,tmp_q,tmp_p,tmp_q) + tmp_h_pqpq(tmp_p,tmp_q) + ! enddo + !enddo + !!$OMP END DO + ! + !!$OMP DO + !do tmp_q = 1, m + ! do tmp_p = 1, m + ! hessian(tmp_p,tmp_q,tmp_q,tmp_p) = hessian(tmp_p,tmp_q,tmp_q,tmp_p) + tmp_h_pqqp(tmp_p,tmp_q) + ! enddo + !enddo + !!$OMP END DO + + !!$OMP DO + !do tmp_s = 1, m + ! do tmp_r = 1, m + ! do tmp_q = 1, m + ! do tmp_p = 1, m + + ! h_tmpr(tmp_p,tmp_q,tmp_r,tmp_s) = (hessian(tmp_p,tmp_q,tmp_r,tmp_s) - hessian(tmp_q,tmp_p,tmp_r,tmp_s) & + ! - hessian(tmp_p,tmp_q,tmp_s,tmp_r) + hessian(tmp_q,tmp_p,tmp_s,tmp_r)) + + ! enddo + ! enddo + ! enddo + !enddo + !!$OMP END DO +#+END_SRC + +** 4D -> 2D matrix +We need a 2D matrix for the Newton method's. Since the Hessian is +"antisymmetric" : $$H_{pq,rs} = -H_{rs,pq}$$ +We can write it as a 2D matrix, N by N, with N = mo_num(mo_num-1)/2 +with p 2D :',t6 + !!$OMP END MASTER + + !$OMP END PARALLEL + call omp_set_max_active_levels(4) + + ! Display + !if (debug) then + ! print*,'2D diag Hessian matrix' + ! do tmp_pq = 1, n + ! write(*,'(100(F10.5))') H(tmp_pq,:) + ! enddo + !endif +#+END_SRC + +** Deallocation of shared arrays, end + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_list_opt.irp.f + !deallocate(hessian)!,h_tmpr) + deallocate(tmp_h_pppp,tmp_h_pqpq,tmp_h_pqqp) + deallocate(tmp_accu_1_shared, tmp_accu_shared) + + print*,'---End diagonal_hessian_list_opt---' + +end subroutine +#+END_SRC + diff --git a/src/mo_optimization/org/diagonal_hessian_opt.org b/src/mo_optimization/org/diagonal_hessian_opt.org new file mode 100644 index 00000000..efd75065 --- /dev/null +++ b/src/mo_optimization/org/diagonal_hessian_opt.org @@ -0,0 +1,1516 @@ +* Diagonal hessian + +The hessian of the CI energy with respects to the orbital rotation is : +(C-c C-x C-l) + +\begin{align*} +H_{pq,rs} &= \dfrac{\partial^2 E(x)}{\partial x_{pq}^2} \\ + &= \mathcal{P}_{pq} \mathcal{P}_{rs} [ \frac{1}{2} \sum_u [\delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) + + \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u)] + -(h_p^s \gamma_r^q + h_r^q \gamma_p^s) \\ + &+ \frac{1}{2} \sum_{tuv} [\delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) + + \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt})] \\ + &+ \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) + - \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +\end{align*} +With pq a permutation operator : + +\begin{align*} +\mathcal{P}_{pq}= 1 - (p \leftrightarrow q) +\end{align*} +\begin{align*} +\mathcal{P}_{pq} \mathcal{P}_{rs} &= (1 - (p \leftrightarrow q))(1 - (r \leftrightarrow s)) \\ +&= 1 - (p \leftrightarrow q) - (r \leftrightarrow s) + (p \leftrightarrow q, r \leftrightarrow s) +\end{align*} + +Where p,q,r,s,t,u,v are general spatial orbitals +mo_num : the number of molecular orbitals +$$h$$ : One electron integrals +$$\gamma$$ : One body density matrix (state average in our case) +$$v$$ : Two electron integrals +$$\Gamma$$ : Two body density matrice (state average in our case) + +Source : +Seniority-based coupled cluster theory +J. Chem. Phys. 141, 244104 (2014); https://doi.org/10.1063/1.4904384 +Thomas M. Henderson, Ireneusz W. Bulik, Tamar Stein, and Gustavo E. Scuseria + +Here for the diagonal of the hessian it's a little more complicated +than for the hessian. It's not just compute the diagonal terms of the +hessian because of the permutations. + +The hessian is (p,q,r,s), so the diagonal terms are (p,q,p,q). But +with the permutations : p <-> q, r <-> s, p <-> q and r <-> s, we have +a diagonal term, if : +p = r and q = s, => (p,q,p,q) +or +q = r and p = s, => (p,q,q,p) + +For that reason, we will use 2D temporary arrays to store the +elements. One for the terms (p,q,p,q) and an other for the terms of +kind (p,q,q,p). We will also use a 1D temporary array to store the +terms of the kind (p,p,p,p) due to the kronoecker delta. + +*Compute the diagonal hessian of energy with respects to orbital +rotations* +By diagonal hessian we mean, diagonal elements of the hessian + +Provided: +| mo_num | integer | number of MOs | +| mo_one_e_integrals(mo_num,mo_num) | double precision | mono-electronic integrals | +| one_e_dm_mo(mo_num,mo_num) | double precision | one e- density matrix (state average) | +| two_e_dm_mo(mo_num,mo_num,mo_num) | double precision | two e- density matrix (state average) | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | + +Output: +| H(n,n) | double precision | Hessian matrix | +| h_tmpr(mo_num,mo_num,mo_num,mo_num) | double precision | Complete hessian matrix before the tranformation | +| | | in n by n matrix | + +Internal: +| hessian(mo_num,mo_num,mo_num,mo_num) | double precision | temporary array containing the hessian before | +| | | the permutations | +| p, q, r, s | integer | indexes of the hessian elements | +| t, u, v | integer | indexes for the sums | +| pq, rs | integer | indexes for the transformation of the hessian | +| | | (4D -> 2D) | +| t1,t2,t3 | double precision | time to compute the hessian | +| t4,t5,t6 | double precision | time to compute the differ each element | +| tmp_bi_int_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals (private) | +| tmp_bi_int_3_shared(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals (shared) | +| tmp_2rdm_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix (private) | +| tmp_2rdm_3_shared(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix (shared) | +| tmp_accu(mo_num,mo_num) | double precision | temporary array (private) | +| tmp_accu_shared(mo_num,mo_num) | double precision | temporary array (shared) | +| tmp_accu_1(mo_num) | double precision | temporary array (private) | +| tmp_accu_1_shared(mo_num) | double precision | temporary array (shared) | +| tmp_h_pppp(mo_num) | double precision | matrix containing the hessien elements hessian(p,p,p,p) | +| tmp_h_pqpq(mo_num,mo_num) | double precision | matrix containing the hessien elements hessian(p,q,p,q) | +| tmp_h_pqqp(mo_num,mo_num) | double precision | matrix containing the hessien elements hessian(p,q,q,p) | + +Function: +| get_two_e_integral | double precision | bi-electronic integrals | + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f +subroutine diag_hessian_opt(n,H)!, h_tmpr) + + use omp_lib + + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + + ! out + double precision, intent(out) :: H(n)!,n), h_tmpr(mo_num,mo_num,mo_num,mo_num) + + ! internal + !double precision, allocatable :: hessian(:,:,:,:)!, h_tmpr(:,:,:,:) + integer :: p,q,k + integer :: r,s,t,u,v + integer :: pq,rs + integer :: istate + double precision :: t1,t2,t3,t4,t5,t6 + double precision, allocatable :: tmp_bi_int_3(:,:,:),tmp_bi_int_3_shared(:,:,:) + double precision, allocatable :: tmp_2rdm_3(:,:,:),tmp_2rdm_3_shared(:,:,:) + double precision, allocatable :: tmp_accu(:,:) + double precision, allocatable :: tmp_accu_shared(:,:), tmp_accu_1_shared(:) + double precision, allocatable :: tmp_h_pppp(:), tmp_h_pqpq(:,:), tmp_h_pqqp(:,:) + + ! Function + double precision :: get_two_e_integral + + print*,'' + print*,'--- diagonal hessian---' + print*,'Use the diagonal hessian' + + ! Allocation of shared arrays + !allocate(hessian(mo_num,mo_num,mo_num,mo_num))!,h_tmpr(mo_num,mo_num,mo_num,mo_num)) + allocate(tmp_h_pppp(mo_num),tmp_h_pqpq(mo_num,mo_num),tmp_h_pqqp(mo_num,mo_num)) + allocate(tmp_2rdm_3_shared(mo_num,mo_num,mo_num)) + allocate(tmp_bi_int_3_shared(mo_num,mo_num,mo_num)) + allocate(tmp_accu_1_shared(mo_num),tmp_accu_shared(mo_num,mo_num)) + + ! OMP + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE( & + !$OMP p,q,r,s, tmp_accu,k, & + !$OMP u,v,t, tmp_bi_int_3, tmp_2rdm_3) & + !$OMP SHARED(H, tmp_h_pppp, tmp_h_pqpq, tmp_h_pqqp, & + !$OMP mo_num,n, mo_one_e_integrals, one_e_dm_mo, & + !$OMP tmp_bi_int_3_shared, tmp_2rdm_3_shared,tmp_accu_shared, & + !$OMP tmp_accu_1_shared,two_e_dm_mo,mo_integrals_map,t1,t2,t3,t4,t5,t6) & + !$OMP DEFAULT(NONE) + + ! Allocation of the private arrays + allocate(tmp_2rdm_3(mo_num,mo_num,mo_num),tmp_bi_int_3(mo_num,mo_num,mo_num)) + allocate(tmp_accu(mo_num,mo_num)) +#+END_SRC + +** Initialization of the arrays +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !!$OMP DO + !do s = 1,mo_num + ! do r = 1, mo_num + ! do q = 1, mo_num + ! do p = 1, mo_num + ! hessian(p,q,r,s) = 0d0 + ! enddo + ! enddo + ! enddo + !enddo + !!$OMP END DO + + !$OMP DO + do p = 1, mo_num + tmp_h_pppp(p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + tmp_h_pqpq(p,q) = 0d0 + enddo + enddo + !$OMP END DO + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + tmp_h_pqqp(p,q) = 0d0 + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t1) + !$OMP END MASTER +#+END_SRC + +** Line 1, term 1 + +\begin{align*} +\frac{1}{2} \sum_u \delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + + if (q==r) then + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & + + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + + enddo + endif + endif + enddo + enddo + enddo +enddo + +With optimization : + +*Part 1 : p=r and q=s and q=r* + + hessian(p,q,r,s) -> hessian(p,p,p,p) + + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & ++ mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + = + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & ++ mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) + = + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do p = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo + enddo + !$OMP END DO + + !$OMP DO + do p = 1, mo_num + tmp_h_pppp(p) = tmp_h_pppp(p) + tmp_accu_1_shared(p) + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s and q=r* + + hessian(p,q,r,s) -> hessian(p,q,q,p) + + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & ++ mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + = + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & ++ mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) + = + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP DO + do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do p = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo + enddo + !$OMP END DO + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) + tmp_accu_1_shared(p) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6= t5-t4 + print*,'l1 1',t6 + !$OMP END MASTER +#+END_SRC + +** Line 1, term 2 + +\begin{align*} +\frac{1}{2} \sum_u \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + + if (p==s) then + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & + + mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) + enddo + endif + endif + enddo + enddo + enddo +enddo + +*Part 1 : p=r and q=s and p=s* + + hessian(p,q,r,s) -> hessian(p,p,p,p) + + 0.5d0 * (& + mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & ++ mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) + = + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) & ++ mo_one_e_integrals(p,u) * one_e_dm_mo(p,u)) + = + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do p = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) + mo_one_e_integrals(u,p) * one_e_dm_mo(u,p) + + enddo + enddo + !$OMP END DO + + !$OMP DO + do p = 1, mo_num + + tmp_h_pppp(p) = tmp_h_pppp(p) + tmp_accu_1_shared(p) + + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s and p=s* + + hessian(p,q,r,s) -> hessian(p,q,q,p) + + 0.5d0 * (& + mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & ++ mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) + = + 0.5d0 * ( & + mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) & ++ mo_one_e_integrals(q,u) * one_e_dm_mo(q,u)) + = + mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP DO + do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do q = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(q) = tmp_accu_1_shared(q) + mo_one_e_integrals(u,q) * one_e_dm_mo(u,q) + + enddo + enddo + !$OMP END DO + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) + tmp_accu_1_shared(q) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6= t5-t4 + print*,'l1 2',t6 + !$OMP END MASTER +#+END_SRC + +** Line 1, term 3 + +\begin{align*} +-(h_p^s \gamma_r^q + h_r^q \gamma_p^s) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + + hessian(p,q,r,s) = hessian(p,q,r,s) & + - mo_one_e_integrals(s,p) * one_e_rdm_mo(r,q) & + - mo_one_e_integrals(q,r) * one_e_rdm_mo(p,s) + + endif + enddo + enddo + enddo +enddo + +With optimization : + +*Part 1 : p=r and q=s* + +hessian(p,q,r,s) -> hessian(p,q,p,q) + + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) += + - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) & + - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) += + - 2d0 mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqpq(p,q) = tmp_h_pqpq(p,q) & + - 2d0 * mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + + enddo + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s* + +hessian(p,q,r,s) -> hessian(p,q,p,q) + + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) += + - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) & + - mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) += + - 2d0 mo_one_e_integrals(q,p) * one_e_dm_mo(p,q) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) & + - 2d0 * mo_one_e_integrals(p,p) * one_e_dm_mo(q,q) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6= t5-t4 + print*,'l1 3',t6 + !$OMP END MASTER +#+END_SRC + +** Line 2, term 1 + +\begin{align*} +\frac{1}{2} \sum_{tuv} \delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + + if (q==r) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & + + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + + enddo + enddo + enddo + endif + endif + enddo + enddo + enddo +enddo + +With optimization : + +*Part 1 : p=r and q=s and q=r* + + hessian(p,q,r,s) -> hessian(p,p,p,p) + + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & ++ get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + = + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) & ++ get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + = + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +Just re-order the index and use 3D temporary arrays for optimal memory +accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do t = 1, mo_num + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) & + + tmp_bi_int_3(u,v,p) * tmp_2rdm_3(u,v,p) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do p =1, mo_num + + tmp_h_pppp(p) = tmp_h_pppp(p) + tmp_accu_1_shared(p) + + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s and q=r* + + hessian(p,q,r,s) -> hessian(p,q,q,p) + + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & ++ get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + = + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) & ++ get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + = + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +Just re-order the index and use 3D temporary arrays for optimal memory +accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP DO + do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do t = 1, mo_num + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) + & + tmp_bi_int_3(u,v,p) * tmp_2rdm_3(u,v,p) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) + tmp_accu_1_shared(p) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6 = t5-t4 + print*,'l2 1',t6 + !$OMP END MASTER +#+END_SRC + +** Line 2, term 2 + +\begin{align*} +\frac{1}{2} \sum_{tuv} \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt}) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s))) then + + if (p==s) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & + + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + + enddo + enddo + enddo + endif + endif + enddo + enddo + enddo +enddo + +With optimization : + +*Part 1 : p=r and q=s and p=s* + + hessian(p,q,r,s) -> hessian(p,p,p,p) + + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & ++ get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + = + 0.5d0 * ( & + get_two_e_integral(p,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v) & ++ get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t)) + = + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,p,t) + +Just re-order the index and use 3D temporary arrays for optimal memory +accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do p = 1, mo_num + tmp_accu_1_shared(p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do t = 1, mo_num + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(p) = tmp_accu_1_shared(p) +& + tmp_bi_int_3(u,v,p) * tmp_2rdm_3(u,v,p) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do p = 1, mo_num + + tmp_h_pppp(p) = tmp_h_pppp(p) + tmp_accu_1_shared(p) + + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s and p=s* + + hessian(p,q,r,s) -> hessian(p,q,q,p) + + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & ++ get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + = + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(q,t,u,v) & ++ get_two_e_integral(u,v,q,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + = + get_two_e_integral(u,v,q,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t) + +Just re-order the index and use 3D temporary arrays for optimal memory +accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP DO + do p = 1,mo_num + tmp_accu_1_shared(p) = 0d0 + enddo + !$OMP END DO + + !$OMP DO + do t = 1, mo_num + + do q = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,q) = get_two_e_integral(u,v,q,t,mo_integrals_map) + + enddo + enddo + enddo + + do q = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,q) = two_e_dm_mo(u,v,q,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do q = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_accu_1_shared(q) = tmp_accu_1_shared(q) +& + tmp_bi_int_3(u,v,q) * tmp_2rdm_3(u,v,q) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) + tmp_accu_1_shared(p) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6 = t5-t4 + print*,'l2 2',t6 + !$OMP END MASTER +#+END_SRC + +** Line 3, term 1 + +\begin{align*} +\sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)))) then + + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & + + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + + enddo + enddo + endif + + enddo + enddo + enddo +enddo + +With optimization + +*Part 1 : p=r and q=s* + + hessian(p,q,r,s) -> hessian(p,q,p,q) + + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & ++ get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + = + get_two_e_integral(u,v,p,p,mo_integrals_map) * two_e_dm_mo(u,v,q,q) & ++ get_two_e_integral(q,q,u,v,mo_integrals_map) * two_e_dm_mo(p,p,u,v) + = + 2d0 * get_two_e_integral(u,v,p,p,mo_integrals_map) * two_e_dm_mo(u,v,q,q) + +Arrays of the kind (u,v,p,p) can be transform in 4D arrays (u,v,p). +Using u,v as one variable a matrix multiplication appears. +$$c_{p,q} = \sum_{uv} a_{p,uv} b_{uv,q}$$ + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do q = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_shared(u,v,q) = two_e_dm_mo(u,v,q,q) + + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_shared(u,v,p) = get_two_e_integral(u,v,p,p,mo_integrals_map) + + enddo + enddo + enddo + !$OMP END DO + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_bi_int_3_shared,& + mo_num*mo_num, tmp_2rdm_3_shared, mo_num*mo_num, 0d0, tmp_accu, mo_num) + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqpq(p,q) = tmp_h_pqpq(p,q) + tmp_accu(p,q) + tmp_accu(q,p) + + enddo + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s* + + hessian(p,q,r,s) -> hessian(p,q,q,p) + + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & ++ get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + = + get_two_e_integral(u,v,p,q,mo_integrals_map) * two_e_dm_mo(u,v,q,p) & ++ get_two_e_integral(q,p,u,v,mo_integrals_map) * two_e_dm_mo(p,q,u,v) + = + 2d0 * get_two_e_integral(u,v,p,q,mo_integrals_map) * two_e_dm_mo(u,v,q,p) + +Just re-order the indexes and use 3D temporary arrays for optimal +memory accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP MASTER + call wall_time(t4) + !$OMP END MASTER + + !$OMP DO + do q = 1, mo_num + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,p) = 2d0 * get_two_e_integral(u,v,q,p,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,p) = two_e_dm_mo(u,v,p,q) + + enddo + enddo + enddo + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) & + + tmp_bi_int_3(u,v,p) * tmp_2rdm_3(u,v,p) + + enddo + enddo + enddo + + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6= t5-t4 + print*,'l3 1',t6 + !$OMP END MASTER +#+END_SRC + +** Line 3, term 2 + +\begin{align*} +- \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +\end{align*} + +Without optimization : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + do t = 1, mo_num + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & + - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & + - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & + - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + + enddo + enddo + + endif + + enddo + enddo + enddo +enddo + +With optimization : + +*Part 1 : p=r and q=s* + + hessian(p,q,r,s) -> hessian(p,q,p,q) + + - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & + - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & + - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & + - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + = + - get_two_e_integral(q,t,p,u,mo_integrals_map) * two_e_dm_mo(p,t,q,u) & + - get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) & + - get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(p,u,q,t) & + - get_two_e_integral(q,u,t,p,mo_integrals_map) * two_e_dm_mo(p,u,t,q) + = + - 2d0 * get_two_e_integral(q,t,p,u,mo_integrals_map) * two_e_dm_mo(p,t,q,u) & + - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) + = + - 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(q,u,p,t) & + - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) + +Just re-order the indexes and use 3D temporary arrays for optimal +memory accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !---------- + ! Part 1.1 + !---------- + ! - 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) * two_e_dm_mo(q,u,p,t) + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + tmp_accu_shared(p,q) = 0d0 + enddo + enddo + !$OMP END DO + + !$OMP DO + do t = 1, mo_num + + do p = 1, mo_num + do u = 1, mo_num + do q = 1, mo_num + + tmp_bi_int_3(q,u,p) = 2d0 * get_two_e_integral(q,u,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + do u = 1, mo_num + do q = 1, mo_num + + tmp_2rdm_3(q,u,p) = two_e_dm_mo(q,u,p,t) + + enddo + enddo + enddo + + !$OMP CRITICAL + do p = 1, mo_num + do u = 1, mo_num + do q = 1, mo_num + + tmp_accu_shared(p,q) = tmp_accu_shared(p,q) & + - tmp_bi_int_3(q,u,p) * tmp_2rdm_3(q,u,p) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqpq(p,q) = tmp_h_pqpq(p,q) + tmp_accu_shared(p,q) + + enddo + enddo + !$OMP END DO +#+END_SRC + +Just re-order the indexes and use 3D temporary arrays for optimal +memory accesses. + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !-------- + ! Part 1.2 + !-------- + ! - 2d0 * get_two_e_integral(t,q,p,u,mo_integrals_map) * two_e_dm_mo(t,p,q,u) + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + tmp_accu_shared(p,q) = 0d0 + enddo + enddo + !$OMP END DO + + !$OMP DO + do u = 1, mo_num + + do p = 1, mo_num + do q = 1, mo_num + do t = 1, mo_num + + tmp_bi_int_3(t,q,p) = 2d0*get_two_e_integral(t,q,p,u,mo_integrals_map) + + enddo + enddo + enddo + + do p= 1, mo_num + do q = 1, mo_num + do t = 1, mo_num + + tmp_2rdm_3(t,q,p) = two_e_dm_mo(t,p,q,u) + + enddo + enddo + enddo + + !$OMP CRITICAL + do q = 1, mo_num + do p = 1, mo_num + do t = 1, mo_num + + tmp_accu_shared(p,q) = tmp_accu_shared(p,q) & + - tmp_bi_int_3(t,q,p) * tmp_2rdm_3(t,q,p) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqpq(p,q) = tmp_h_pqpq(p,q) + tmp_accu_shared(p,q) + + enddo + enddo + !$OMP END DO +#+END_SRC + +*Part 2 : q=r and p=s* + + hessian(p,q,r,s) -> hessian(p,q,p,q) + + - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & + - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & + - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & + - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + = + - get_two_e_integral(p,t,p,u,mo_integrals_map) * two_e_dm_mo(q,t,q,u) & + - get_two_e_integral(t,p,p,u,mo_integrals_map) * two_e_dm_mo(t,q,q,u) & + - get_two_e_integral(q,u,q,t,mo_integrals_map) * two_e_dm_mo(p,u,p,t) & + - get_two_e_integral(q,u,t,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) + = + - get_two_e_integral(p,t,p,u,mo_integrals_map) * two_e_dm_mo(q,t,q,u) & + - get_two_e_integral(q,t,q,u,mo_integrals_map) * two_e_dm_mo(p,t,p,u) & + + - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(t,q,q,u) & + - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(t,p,p,u) + = + - get_two_e_integral(t,p,u,p,mo_integrals_map) * two_e_dm_mo(t,q,u,q) & + - get_two_e_integral(t,q,u,q,mo_integrals_map) * two_e_dm_mo(p,t,p,u) & + + - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(q,u,t,q) & + - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) + +Arrays of the kind (t,p,u,p) can be transformed in 3D arrays. By doing +so and using t,u as one variable, a matrix multiplication appears : +$$c_{p,q} = \sum_{tu} a_{p,tu} b_{tu,q}$$ + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !---------- + ! Part 2.1 + !---------- + ! - get_two_e_integral(t,p,u,p,mo_integrals_map) * two_e_dm_mo(t,q,u,q) & + ! - get_two_e_integral(t,q,u,q,mo_integrals_map) * two_e_dm_mo(p,t,p,u) + + !$OMP DO + do q = 1, mo_num + do u = 1, mo_num + do t = 1, mo_num + + tmp_2rdm_3_shared(t,u,q) = two_e_dm_mo(t,q,u,q) + + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do p = 1, mo_num + do u = 1, mo_num + do t = 1, mo_num + + tmp_bi_int_3_shared(t,u,p) = get_two_e_integral(t,p,u,p,mo_integrals_map) + + enddo + enddo + enddo + !$OMP END DO + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_bi_int_3_shared,& + mo_num*mo_num, tmp_2rdm_3_shared, mo_num*mo_num, 0d0, tmp_accu, mo_num) + + !$OMP DO + do p = 1, mo_num + do q = 1, mo_num + + tmp_h_pqqp(q,p) = tmp_h_pqqp(q,p) - tmp_accu(q,p) - tmp_accu(p,q) + + enddo + enddo + !$OMP END DO +#+END_SRC + +Arrays of the kind (t,u,p,p) can be transformed in 3D arrays. By doing +so and using t,u as one variable, a matrix multiplication appears : +$$c_{p,q} = \sum_{tu} a_{p,tu} b_{tu,q}$$ + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !-------- + ! Part 2.2 + !-------- + ! - get_two_e_integral(t,u,p,p,mo_integrals_map) * two_e_dm_mo(q,u,t,q) & + ! - get_two_e_integral(t,u,q,q,mo_integrals_map) * two_e_dm_mo(p,u,t,p) + + !$OMP DO + do p = 1, mo_num + do u = 1, mo_num + do t = 1, mo_num + + tmp_bi_int_3_shared(t,u,p) = get_two_e_integral(t,u,p,p,mo_integrals_map) + + enddo + enddo + enddo + !$OMP END DO + + !$OMP DO + do q = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_shared(u,t,q) = two_e_dm_mo(q,u,t,q) + + enddo + enddo + enddo + !$OMP END DO + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_2rdm_3_shared,& + mo_num*mo_num, tmp_bi_int_3_shared, mo_num*mo_num, 0d0, tmp_accu, mo_num) + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + + tmp_h_pqqp(p,q) = tmp_h_pqqp(p,q) - tmp_accu(p,q) - tmp_accu(q,p) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6= t5-t4 + print*,'l3 2',t6 + !$OMP END MASTER + + !$OMP MASTER + CALL wall_TIME(t2) + t2 = t2 - t1 + print*, 'Time to compute the hessian :', t2 + !$OMP END MASTER +#+END_SRC + +** Deallocation of private arrays +In the OMP section ! +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + deallocate(tmp_2rdm_3,tmp_bi_int_3) + deallocate(tmp_accu) +#+END_SRC + +** Permutations +As we mentioned before there are two permutation operator in the +formula : +Hessian(p,q,r,s) = P_pq P_rs [...] +=> Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !!$OMP DO + !do p = 1, mo_num + ! hessian(p,p,p,p) = hessian(p,p,p,p) + tmp_h_pppp(p) + !enddo + !!$OMP END DO + + !!$OMP DO + !do q = 1, mo_num + ! do p = 1, mo_num + ! hessian(p,q,p,q) = hessian(p,q,p,q) + tmp_h_pqpq(p,q) + ! enddo + !enddo + !!$OMP END DO + ! + !!$OMP DO + !do q = 1, mo_num + ! do p = 1, mo_num + ! hessian(p,q,q,p) = hessian(p,q,q,p) + tmp_h_pqqp(p,q) + ! enddo + !enddo + !!$OMP END DO + + !!$OMP DO + !do s = 1, mo_num + ! do r = 1, mo_num + ! do q = 1, mo_num + ! do p = 1, mo_num + + ! h_tmpr(p,q,r,s) = (hessian(p,q,r,s) - hessian(q,p,r,s) - hessian(p,q,s,r) + hessian(q,p,s,r)) + + ! enddo + ! enddo + ! enddo + !enddo + !!$OMP END DO +#+END_SRC + +** 4D -> 2D matrix +We need a 2D matrix for the Newton method's. Since the Hessian is +"antisymmetric" : $$H_{pq,rs} = -H_{rs,pq}$$ +We can write it as a 2D matrix, N by N, with N = mo_num(mo_num-1)/2 +with p 2D :',t6 + !!$OMP END MASTER + + !$OMP END PARALLEL + call omp_set_max_active_levels(4) + + ! Display + !if (debug) then + ! print*,'2D diag Hessian matrix' + ! do pq = 1, n + ! write(*,'(100(F10.5))') H(pq,:) + ! enddo + !endif +#+END_SRC + +** Deallocation of shared arrays, end + +#+BEGIN_SRC f90 :comments org :tangle diagonal_hessian_opt.irp.f + !deallocate(hessian)!,h_tmpr) + deallocate(tmp_h_pppp,tmp_h_pqpq,tmp_h_pqqp) + deallocate(tmp_accu_1_shared, tmp_accu_shared) + + print*,'---diagonal_hessian' + +end subroutine +#+END_SRC + diff --git a/src/mo_optimization/org/diagonalization_hessian.org b/src/mo_optimization/org/diagonalization_hessian.org new file mode 100644 index 00000000..5eed7dd5 --- /dev/null +++ b/src/mo_optimization/org/diagonalization_hessian.org @@ -0,0 +1,138 @@ +* Diagonalization of the hessian + +Just a matrix diagonalization using Lapack + +Input: +| n | integer | mo_num*(mo_num-1)/2 | +| H(n,n) | double precision | hessian | + +Output: +| e_val(n) | double precision | eigenvalues of the hessian | +| w(n,n) | double precision | eigenvectors of the hessian | + +Internal: +| nb_negative_nv | integer | number of negative eigenvalues | +| lwork | integer | for Lapack | +| work(lwork,n) | double precision | temporary array for Lapack | +| info | integer | if 0 -> ok, else problem in the diagonalization | +| i,j | integer | dummy indexes | + +#+BEGIN_SRC f90 :comments org :tangle diagonalization_hessian.irp.f +subroutine diagonalization_hessian(n,H,e_val,w) + + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + double precision, intent(in) :: H(n,n) + + ! out + double precision, intent(out) :: e_val(n), w(n,n) + + ! internal + double precision, allocatable :: work(:,:) + integer, allocatable :: key(:) + integer :: info,lwork + integer :: i,j + integer :: nb_negative_vp + double precision :: t1,t2,t3,max_elem + + print*,'' + print*,'---Diagonalization_hessian---' + + call wall_time(t1) + + if (optimization_method == 'full') then + ! Allocation + ! For Lapack + lwork=3*n-1 + + allocate(work(lwork,n)) + + ! Calculation + + ! Copy the hessian matrix, the eigenvectors will be store in W + W=H + + ! Diagonalization of the hessian + call dsyev('V','U',n,W,size(W,1),e_val,work,lwork,info) + + if (info /= 0) then + print*, 'Error diagonalization : diagonalization_hessian' + print*, 'info = ', info + call ABORT + endif + + if (debug) then + print *, 'vp Hess:' + write(*,'(100(F10.5))') real(e_val(:)) + endif + + ! Number of negative eigenvalues + max_elem = 0d0 + nb_negative_vp = 0 + do i = 1, n + if (e_val(i) < 0d0) then + nb_negative_vp = nb_negative_vp + 1 + if (e_val(i) < max_elem) then + max_elem = e_val(i) + endif + !print*,'e_val < 0 :', e_val(i) + endif + enddo + print*,'Number of negative eigenvalues:', nb_negative_vp + print*,'Lowest eigenvalue:',max_elem + + !nb_negative_vp = 0 + !do i = 1, n + ! if (e_val(i) < -thresh_eig) then + ! nb_negative_vp = nb_negative_vp + 1 + ! endif + !enddo + !print*,'Number of negative eigenvalues <', -thresh_eig,':', nb_negative_vp + + ! Deallocation + deallocate(work) + + elseif (optimization_method == 'diag') then + ! Diagonalization of the diagonal hessian by hands + allocate(key(n)) + + do i = 1, n + e_val(i) = H(i,i) + enddo + + ! Key list for dsort + do i = 1, n + key(i) = i + enddo + + ! Sort of the eigenvalues + call dsort(e_val, key, n) + + ! Eigenvectors + W = 0d0 + do i = 1, n + j = key(i) + W(j,i) = 1d0 + enddo + + deallocate(key) + else + print*,'Diagonalization_hessian, abort' + call abort + endif + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in diagonalization_hessian:', t3 + + print*,'---End diagonalization_hessian---' + +end subroutine +#+END_SRC + diff --git a/src/mo_optimization/org/first_diagonal_hessian_list_opt.org b/src/mo_optimization/org/first_diagonal_hessian_list_opt.org new file mode 100644 index 00000000..391c6033 --- /dev/null +++ b/src/mo_optimization/org/first_diagonal_hessian_list_opt.org @@ -0,0 +1,376 @@ +* First diagonal hessian + +#+BEGIN_SRC f90 :comments :tangle first_diagonal_hessian_list_opt.irp.f +subroutine first_diag_hessian_list_opt(tmp_n,m,list,H)!, h_tmpr) + + include 'constants.h' + + implicit none + + !=========================================================================== + ! Compute the diagonal hessian of energy with respects to orbital rotations + !=========================================================================== + + !=========== + ! Variables + !=========== + + ! in + integer, intent(in) :: tmp_n, m, list(m) + ! tmp_n : integer, tmp_n = m*(m-1)/2 + + ! out + double precision, intent(out) :: H(tmp_n)!, h_tmpr(m,m,m,m) + ! H : n by n double precision matrix containing the 2D hessian + + ! internal + double precision, allocatable :: hessian(:,:,:,:), tmp(:,:),h_tmpr(:,:,:,:) + integer :: p,q, tmp_p,tmp_q + integer :: r,s,t,u,v,tmp_r,tmp_s,tmp_t,tmp_u,tmp_v + integer :: pq,rs,tmp_pq,tmp_rs + double precision :: t1,t2,t3 + ! hessian : mo_num 4D double precision matrix containing the hessian before the permutations + ! h_tmpr : mo_num 4D double precision matrix containing the hessian after the permutations + ! p,q,r,s : integer, indexes of the 4D hessian matrix + ! t,u,v : integer, indexes to compute hessian elements + ! pq,rs : integer, indexes for the conversion from 4D to 2D hessian matrix + ! t1,t2,t3 : double precision, t3 = t2 - t1, time to compute the hessian + + ! Function + double precision :: get_two_e_integral + ! get_two_e_integral : double precision function, two e integrals + + ! Provided : + ! mo_one_e_integrals : mono e- integrals + ! get_two_e_integral : two e- integrals + ! one_e_dm_mo_alpha, one_e_dm_mo_beta : one body density matrix + ! two_e_dm_mo : two body density matrix + + print*,'---first_diag_hess_list---' + + !============ + ! Allocation + !============ + + allocate(hessian(m,m,m,m),tmp(tmp_n,tmp_n),h_tmpr(mo_num,mo_num,mo_num,mo_num)) + + !============= + ! Calculation + !============= + + ! From Anderson et. al. (2014) + ! The Journal of Chemical Physics 141, 244104 (2014); doi: 10.1063/1.4904384 + + ! LaTeX formula : + + !\begin{align*} + !H_{pq,rs} &= \dfrac{\partial^2 E(x)}{\partial x_{pq}^2} \\ + !&= \mathcal{P}_{pq} \mathcal{P}_{rs} [ \frac{1}{2} \sum_u [\delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) + !+ \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_u^r)] + !-(h_p^s \gamma_r^q + h_r^q \gamma_p^s) \\ + !&+ \frac{1}{2} \sum_{tuv} [\delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} +v_{uv}^{st} \Gamma_{pt}^{uv}) + !+ \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt})] \\ + !&+ \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{ps}^{uv}) \\ + !&- \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) + !\end{align*} + + !================ + ! Initialization + !================ + hessian = 0d0 + + CALL wall_time(t1) + + !======================== + ! First line, first term + !======================== + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + if (q==r) then + do u = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & + + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + + enddo + endif + endif + + enddo + enddo + enddo + enddo + + !========================= + ! First line, second term + !========================= + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + if (p==s) then + do u = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + 0.5d0 * ( & + mo_one_e_integrals(u,r) * one_e_dm_mo(u,q) & + + mo_one_e_integrals(q,u) * one_e_dm_mo(r,u)) + enddo + endif + endif + + enddo + enddo + enddo + enddo + + !======================== + ! First line, third term + !======================== + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) & + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) + + endif + + enddo + enddo + enddo + enddo + + !========================= + ! Second line, first term + !========================= + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + if (q==r) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & + + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + + enddo + enddo + enddo + endif + endif + + enddo + enddo + enddo + enddo + + !========================== + ! Second line, second term + !========================== + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + if (p==s) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & + + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + + enddo + enddo + enddo + endif + endif + + enddo + enddo + enddo + enddo + + !======================== + ! Third line, first term + !======================== + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + do u = 1, mo_num + do v = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) & + + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & + + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + + enddo + enddo + endif + + enddo + enddo + enddo + enddo + + !========================= + ! Third line, second term + !========================= + do tmp_p = 1, m + p = list(tmp_p) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_s = 1, m + s = list(tmp_s) + + ! Permutations + if (((p==r) .and. (q==s)) .or. ((q==r) .and. (p==s)) & + .or. ((p==s) .and. (q==r))) then + + do t = 1, mo_num + do u = 1, mo_num + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) & + - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & + - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & + - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & + - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + + enddo + enddo + + endif + + enddo + enddo + enddo + enddo + + CALL wall_time(t2) + t2 = t2 - t1 + print*, 'Time to compute the hessian :', t2 + + !============== + ! Permutations + !============== + + ! Convert the hessian mo_num * mo_num * mo_num * mo_num matrix in a + ! 2D n * n matrix (n = mo_num*(mo_num-1)/2) + ! H(pq,rs) : p 2D matrix + !======================== + + ! Convert the hessian mo_num * mo_num * mo_num * mo_num matrix in a + ! 2D n * n matrix (n = mo_num*(mo_num-1)/2) + ! H(pq,rs) : p 2D matrix + !======================== + + ! Convert the hessian mo_num * mo_num * mo_num * mo_num matrix in a + ! 2D n * n matrix (n = mo_num*(mo_num-1)/2) + ! H(pq,rs) : p Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + + do tmp_s = 1, m + do tmp_r = 1, m + do tmp_q = 1, m + do tmp_p = 1, m + + h_tmpr(tmp_p,tmp_q,tmp_r,tmp_s) = (hessian(tmp_p,tmp_q,tmp_r,tmp_s) - hessian(tmp_q,tmp_p,tmp_r,tmp_s) & + - hessian(tmp_p,tmp_q,tmp_s,tmp_r) + hessian(tmp_q,tmp_p,tmp_s,tmp_r)) + + enddo + enddo + enddo + enddo + + !======================== + ! 4D matrix to 2D matrix + !======================== + + ! Convert the hessian mo_num * mo_num * mo_num * mo_num matrix in a + ! 2D n * n matrix (n = mo_num*(mo_num-1)/2) + ! H(pq,rs) : p Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + + do s = 1, mo_num + do r = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + + h_tmpr(p,q,r,s) = (hessian(p,q,r,s) - hessian(q,p,r,s) - hessian(p,q,s,r) + hessian(q,p,s,r)) + + enddo + enddo + enddo + enddo + + !======================== + ! 4D matrix to 2D matrix + !======================== + + ! Convert the hessian mo_num * mo_num * mo_num * mo_num matrix in a + ! 2D n * n matrix (n = mo_num*(mo_num-1)/2) + ! H(pq,rs) : p vector, transformation +In addition there is a permutation in the gradient formula : +\begin{equation} +P_{pq} = 1 - (p <-> q) +\end{equation} + +We need a vector to use the gradient. Here the gradient is a +antisymetric matrix so we can transform it in a vector of length +mo_num*(mo_num-1)/2. + +Here we do these two things at the same time. + +#+BEGIN_SRC f90 :comments org :tangle gradient_list_opt.irp.f + do i=1,n + call vec_to_mat_index(i,p,q) + v_grad(i)=(grad(p,q) - grad(q,p)) + enddo + + ! Debug, diplay the vector containing the gradient elements + if (debug) then + print*,'Vector containing the gradient :' + write(*,'(100(F10.5))') v_grad(1:n) + endif +#+END_SRC + +*** Norm of the gradient +The norm can be useful. +#+BEGIN_SRC f90 :comments org :tangle gradient_list_opt.irp.f + norm = dnrm2(n,v_grad,1) + print*, 'Gradient norm : ', norm +#+END_SRC + +*** Maximum element in the gradient +The maximum element in the gradient is very important for the +convergence criterion of the Newton method. + +#+BEGIN_SRC f90 :comments org :tangle gradient_list_opt.irp.f + ! Max element of the gradient + max_elem = 0d0 + do i = 1, n + if (DABS(v_grad(i)) > DABS(max_elem)) then + max_elem = v_grad(i) + endif + enddo + + print*,'Max element in the gradient :', max_elem + + ! Debug, display the matrix containting the gradient elements + if (debug) then + ! Matrix gradient + A = 0d0 + do q=1,m + do p=1,m + A(p,q) = grad(p,q) - grad(q,p) + enddo + enddo + print*,'Matrix containing the gradient :' + do i = 1, m + write(*,'(100(F10.5))') A(i,1:m) + enddo + endif +#+END_SRC + +*** Deallocation of shared arrays and end +#+BEGIN_SRC f90 :comments org :tangle gradient_list_opt.irp.f + deallocate(grad,A, tmp_mo_one_e_integrals,tmp_one_e_dm_mo) + + print*,'---End gradient---' + + end subroutine + +#+END_SRC + diff --git a/src/mo_optimization/org/gradient_opt.org b/src/mo_optimization/org/gradient_opt.org new file mode 100644 index 00000000..45c761e9 --- /dev/null +++ b/src/mo_optimization/org/gradient_opt.org @@ -0,0 +1,358 @@ +* Gradient + +The gradient of the CI energy with respects to the orbital rotation +is: +(C-c C-x C-l) +$$ +G(p,q) = \mathcal{P}_{pq} \left[ \sum_r (h_p^r \gamma_r^q - h_r^q \gamma_p^r) + +\sum_{rst}(v_{pt}^{rs} \Gamma_{rs}^{qt} - v_{rs}^{qt} \Gamma_{pt}^{rs}) +\right] +$$ + + +$$ +\mathcal{P}_{pq}= 1 - (p \leftrightarrow q) +$$ + +$$ +G(p,q) = \left[ +\sum_r (h_p^r \gamma_r^q - h_r^q \gamma_p^r) + +\sum_{rst}(v_{pt}^{rs} \Gamma_{rs}^{qt} - v_{rs}^{qt} \Gamma_{pt}^{rs}) +\right] - +\left[ +\sum_r (h_q^r \gamma_r^p - h_r^p \gamma_q^r) + +\sum_{rst}(v_{qt}^{rs} \Gamma_{rs}^{pt} - v_{rs}^{pt} +\Gamma_{qt}^{rs}) +\right] +$$ + +Where p,q,r,s,t are general spatial orbitals +mo_num : the number of molecular orbitals +$$h$$ : One electron integrals +$$\gamma$$ : One body density matrix (state average in our case) +$$v$$ : Two electron integrals +$$\Gamma$$ : Two body density matrice (state average in our case) + +The gradient is a mo_num by mo_num matrix, p,q,r,s,t take all the +values between 1 and mo_num (1 and mo_num include). + +To do that we compute $$G(p,q)$$ for all the pairs (p,q). + +Source : +Seniority-based coupled cluster theory +J. Chem. Phys. 141, 244104 (2014); https://doi.org/10.1063/1.4904384 +Thomas M. Henderson, Ireneusz W. Bulik, Tamar Stein, and Gustavo +E. Scuseria + +*Compute the gradient of energy with respects to orbital rotations* + +Provided: +| mo_num | integer | number of MOs | +| mo_one_e_integrals(mo_num,mo_num) | double precision | mono_electronic integrals | +| one_e_dm_mo(mo_num,mo_num) | double precision | one e- density matrix | +| two_e_dm_mo(mo_num,mo_num,mo_num,mo_num) | double precision | two e- density matrix | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | + +Output: +| v_grad(n) | double precision | the gradient | +| max_elem | double precision | maximum element of the gradient | + +Internal: +| grad(mo_num,mo_num) | double precison | gradient before the tranformation in a vector | +| A((mo_num,mo_num) | doubre precision | gradient after the permutations | +| norm | double precision | norm of the gradient | +| p, q | integer | indexes of the element in the matrix grad | +| i | integer | index for the tranformation in a vector | +| r, s, t | integer | indexes dor the sums | +| t1, t2, t3 | double precision | t3 = t2 - t1, time to compute the gradient | +| t4, t5, t6 | double precission | t6 = t5 - t4, time to compute each element | +| tmp_bi_int_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bi-electronic integrals | +| tmp_2rdm_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the two e- density matrix | +| tmp_accu(mo_num,mo_num) | double precision | temporary array | + +Function: +| get_two_e_integral | double precision | bi-electronic integrals | +| dnrm2 | double precision | (Lapack) norm | + +#+BEGIN_SRC f90 :comments org :tangle gradient_opt.irp.f +subroutine gradient_opt(n,v_grad,max_elem) + use omp_lib + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + + ! out + double precision, intent(out) :: v_grad(n), max_elem + + ! internal + double precision, allocatable :: grad(:,:),A(:,:) + double precision :: norm + integer :: i,p,q,r,s,t + double precision :: t1,t2,t3,t4,t5,t6 + + double precision, allocatable :: tmp_accu(:,:) + double precision, allocatable :: tmp_bi_int_3(:,:,:), tmp_2rdm_3(:,:,:) + + ! Functions + double precision :: get_two_e_integral, dnrm2 + + + print*,'' + print*,'---gradient---' + + ! Allocation of shared arrays + allocate(grad(mo_num,mo_num),A(mo_num,mo_num)) + + ! Initialization omp + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE( & + !$OMP p,q,r,s,t, & + !$OMP tmp_accu, tmp_bi_int_3, tmp_2rdm_3) & + !$OMP SHARED(grad, one_e_dm_mo, mo_num,mo_one_e_integrals, & + !$OMP mo_integrals_map,t4,t5,t6) & + !$OMP DEFAULT(SHARED) + + ! Allocation of private arrays + allocate(tmp_accu(mo_num,mo_num)) + allocate(tmp_bi_int_3(mo_num,mo_num,mo_num)) + allocate(tmp_2rdm_3(mo_num,mo_num,mo_num)) +#+END_SRC + +** Calculation +*** Initialization +#+BEGIN_SRC f90 :comments org :tangle gradient_opt.irp.f + !$OMP DO + do q = 1, mo_num + do p = 1,mo_num + grad(p,q) = 0d0 + enddo + enddo + !$OMP END DO +#+END_SRC + +*** Term 1 + +Without optimization the term 1 is : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + grad(p,q) = grad(p,q) & + + mo_one_e_integrals(p,r) * one_e_dm_mo(r,q) & + - mo_one_e_integrals(r,q) * one_e_dm_mo(p,r) + enddo + enddo +enddo + +Since the matrix multiplication A.B is defined like : +\begin{equation} +c_{ij} = \sum_k a_{ik}.b_{kj} +\end{equation} +The previous equation can be rewritten as a matrix multplication + +#+BEGIN_SRC f90 :comments org :tangle gradient_opt.irp.f + !**************** + ! Opt first term + !**************** + + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + call dgemm('N','N',mo_num,mo_num,mo_num,1d0,mo_one_e_integrals,& + mo_num,one_e_dm_mo,mo_num,0d0,tmp_accu,mo_num) + + !$OMP DO + do q = 1, mo_num + do p = 1, mo_num + + grad(p,q) = grad(p,q) + (tmp_accu(p,q) - tmp_accu(q,p)) + + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6 = t5-t4 + print*,'Gradient, first term (s) :', t6 + !$OMP END MASTER +#+END_SRC + +*** Term 2 + +Without optimization the second term is : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + do t= 1, mo_num + + grad(p,q) = grad(p,q) & + + get_two_e_integral(p,t,r,s,mo_integrals_map) * two_e_dm_mo(r,s,q,t) & + - get_two_e_integral(r,s,q,t,mo_integrals_map) * two_e_dm_mo(p,t,r,s) + enddo + enddo + enddo + enddo +enddo + +Using the bielectronic integral properties : +get_two_e_integral(p,t,r,s,mo_integrals_map) = get_two_e_integral(r,s,p,t,mo_integrals_map) + +Using the two body matrix properties : +two_e_dm_mo(p,t,r,s) = two_e_dm_mo(r,s,p,t) + +t is one the right, we can put it on the external loop and create 3 +indexes temporary array +r,s can be seen as one index + +By doing so, a matrix multiplication appears + +#+BEGIN_SRC f90 :comments org :tangle gradient_opt.irp.f + !***************** + ! Opt second term + !***************** + + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do t = 1, mo_num + + do p = 1, mo_num + do s = 1, mo_num + do r = 1, mo_num + + tmp_bi_int_3(r,s,p) = get_two_e_integral(r,s,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do q = 1, mo_num + do s = 1, mo_num + do r = 1, mo_num + + tmp_2rdm_3(r,s,q) = two_e_dm_mo(r,s,q,t) + + enddo + enddo + enddo + + call dgemm('T','N',mo_num,mo_num,mo_num*mo_num,1d0,tmp_bi_int_3,& + mo_num*mo_num,tmp_2rdm_3,mo_num*mo_num,0d0,tmp_accu,mo_num) + + !$OMP CRITICAL + do q = 1, mo_num + do p = 1, mo_num + + grad(p,q) = grad(p,q) + tmp_accu(p,q) - tmp_accu(q,p) + + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6 = t5-t4 + print*,'Gradient second term (s) : ', t6 + !$OMP END MASTER +#+END_SRC + +*** Deallocation of private arrays +#+BEGIN_SRC f90 :comments org :tangle gradient_opt.irp.f + deallocate(tmp_bi_int_3,tmp_2rdm_3,tmp_accu) + + !$OMP END PARALLEL + + call omp_set_max_active_levels(4) +#+END_SRC + +*** Permutation, 2D matrix -> vector, transformation +In addition there is a permutation in the gradient formula : +\begin{equation} +P_{pq} = 1 - (p <-> q) +\end{equation} + +We need a vector to use the gradient. Here the gradient is a +antisymetric matrix so we can transform it in a vector of length +mo_num*(mo_num-1)/2. + +Here we do these two things at the same time. + +#+BEGIN_SRC f90 :comments org :tangle gradient_opt.irp.f + do i=1,n + call vec_to_mat_index(i,p,q) + v_grad(i)=(grad(p,q) - grad(q,p)) + enddo + + ! Debug, diplay the vector containing the gradient elements + if (debug) then + print*,'Vector containing the gradient :' + write(*,'(100(F10.5))') v_grad(1:n) + endif +#+END_SRC + +*** Norm of the gradient +The norm can be useful. +#+BEGIN_SRC f90 :comments org :tangle gradient_opt.irp.f + norm = dnrm2(n,v_grad,1) + print*, 'Gradient norm : ', norm +#+END_SRC + +*** Maximum element in the gradient +The maximum element in the gradient is very important for the +convergence criterion of the Newton method. + +#+BEGIN_SRC f90 :comments org :tangle gradient_opt.irp.f + ! Max element of the gradient + max_elem = 0d0 + do i = 1, n + if (ABS(v_grad(i)) > ABS(max_elem)) then + max_elem = v_grad(i) + endif + enddo + + print*,'Max element in the gradient :', max_elem + + ! Debug, display the matrix containting the gradient elements + if (debug) then + ! Matrix gradient + A = 0d0 + do q=1,mo_num + do p=1,mo_num + A(p,q) = grad(p,q) - grad(q,p) + enddo + enddo + print*,'Matrix containing the gradient :' + do i = 1, mo_num + write(*,'(100(F10.5))') A(i,1:mo_num) + enddo + endif +#+END_SRC + +*** Deallocation of shared arrays and end +#+BEGIN_SRC f90 :comments org :tangle gradient_opt.irp.f + deallocate(grad,A) + + print*,'---End gradient---' + + end subroutine + +#+END_SRC + diff --git a/src/mo_optimization/org/hessian_list_opt.org b/src/mo_optimization/org/hessian_list_opt.org new file mode 100644 index 00000000..3df461cf --- /dev/null +++ b/src/mo_optimization/org/hessian_list_opt.org @@ -0,0 +1,1141 @@ +* Hessian + +The hessian of the CI energy with respects to the orbital rotation is : +(C-c C-x C-l) + +\begin{align*} +H_{pq,rs} &= \dfrac{\partial^2 E(x)}{\partial x_{pq}^2} \\ + &= \mathcal{P}_{pq} \mathcal{P}_{rs} [ \frac{1}{2} \sum_u [\delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) + + \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u)] + -(h_p^s \gamma_r^q + h_r^q \gamma_p^s) \\ + &+ \frac{1}{2} \sum_{tuv} [\delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) + + \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt})] \\ + &+ \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) + - \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +\end{align*} +With pq a permutation operator : + +\begin{align*} +\mathcal{P}_{pq}= 1 - (p \leftrightarrow q) +\end{align*} +\begin{align*} +\mathcal{P}_{pq} \mathcal{P}_{rs} &= (1 - (p \leftrightarrow q))(1 - (r \leftrightarrow s)) \\ +&= 1 - (p \leftrightarrow q) - (r \leftrightarrow s) + (p \leftrightarrow q, r \leftrightarrow s) +\end{align*} + +Where p,q,r,s,t,u,v are general spatial orbitals +mo_num : the number of molecular orbitals +$$h$$ : One electron integrals +$$\gamma$$ : One body density matrix (state average in our case) +$$v$$ : Two electron integrals +$$\Gamma$$ : Two body density matrice (state average in our case) + +The hessian is a 4D matrix of size mo_num, p,q,r,s,t,u,v take all the +values between 1 and mo_num (1 and mo_num include). + +To do that we compute all the pairs (pq,rs) + +Source : +Seniority-based coupled cluster theory +J. Chem. Phys. 141, 244104 (2014); https://doi.org/10.1063/1.4904384 +Thomas M. Henderson, Ireneusz W. Bulik, Tamar Stein, and Gustavo E. Scuseria + +*Compute the hessian of energy with respects to orbital rotations* + +Provided: +| mo_num | integer | number of MOs | +| mo_one_e_integrals(mo_num,mo_num) | double precision | mono-electronic integrals | +| one_e_dm_mo(mo_num,mo_num) | double precision | one e- density matrix (state average) | +| two_e_dm_mo(mo_num,mo_num,mo_num) | double precision | two e- density matrix (state average) | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | + +Output: +| H(n,n) | double precision | Hessian matrix | +| h_tmpr(mo_num,mo_num,mo_num,mo_num) | double precision | Complete hessian matrix before the tranformation | +| | | in n by n matrix | + +Internal: +| hessian(mo_num,mo_num,mo_num,mo_num) | double precision | temporary array containing the hessian before | +| | | the permutations | +| p, q, r, s | integer | indexes of the hessian elements | +| t, u, v | integer | indexes for the sums | +| pq, rs | integer | indexes for the transformation of the hessian | +| | | (4D -> 2D) | +| t1,t2,t3 | double precision | t3 = t2 - t1, time to compute the hessian | +| t4,t5,t6 | double precision | t6 = t5 - t4, time to compute each element | +| tmp_bi_int_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals | +| tmp_2rdm_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix | +| ind_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for matrix multiplication | +| tmp_accu(mo_num,mo_num) | double precision | temporary array | +| tmp_accu_sym(mo_num,mo_num) | double precision | temporary array | + +Function: +| get_two_e_integral | double precision | bielectronic integrals | + +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f +subroutine hessian_list_opt(n,m,list,H,h_tmpr) + use omp_lib + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n,m,list(m) + + ! out + double precision, intent(out) :: H(n,n),h_tmpr(m,m,m,m) + + ! internal + double precision, allocatable :: hessian(:,:,:,:)!, h_tmpr(:,:,:,:) + double precision, allocatable :: H_test(:,:) + integer :: p,q,tmp_p,tmp_q,tmp_r,tmp_s + integer :: r,s,t,u,v,k + integer :: pq,rs + double precision :: t1,t2,t3,t4,t5,t6 + ! H_test : monum**2 by mo_num**2 double precision matrix to debug the H matrix + + double precision, allocatable :: tmp_bi_int_3(:,:,:), tmp_2rdm_3(:,:,:), ind_3(:,:,:),ind_3_3(:,:,:) + double precision, allocatable :: tmp_bi_int_3_3(:,:,:), tmp_2rdm_3_3(:,:,:) + double precision, allocatable :: tmp_accu(:,:), tmp_accu_sym(:,:),tmp_one_e_dm_mo(:,:),tmp_mo_one_e_integrals(:,:) + + ! Function + double precision :: get_two_e_integral + + print*,'' + print*,'---hessian---' + print*,'Use the full hessian' + + ! Allocation of shared arrays + allocate(hessian(m,m,m,m),tmp_one_e_dm_mo(mo_num,m),tmp_mo_one_e_integrals(mo_num,m)) + + ! Calculations + + ! OMP + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE( & + !$OMP tmp_p,tmp_q,tmp_r,tmp_s,p,q,r,s, tmp_accu, tmp_accu_sym, & + !$OMP u,v,t, tmp_bi_int_3, tmp_2rdm_3, ind_3, tmp_bi_int_3_3,tmp_2rdm_3_3, ind_3_3 ) & + !$OMP SHARED(m,list,hessian,h_tmpr,H, mo_num,n, & + !$OMP mo_one_e_integrals, one_e_dm_mo, & + !$OMP two_e_dm_mo,mo_integrals_map, & + !$OMP t1,t2,t3,t4,t5,t6,& + !$OMP tmp_mo_one_e_integrals,tmp_one_e_dm_mo)& + !$OMP DEFAULT(NONE) + + ! Allocation of private arrays + allocate(tmp_bi_int_3(mo_num,mo_num,mo_num)) + allocate(tmp_2rdm_3(mo_num,mo_num,mo_num), ind_3(mo_num,mo_num,mo_num)) + allocate(tmp_accu(m,m), tmp_accu_sym(mo_num,mo_num)) +#+END_SRC + +** Initialization of the arrays +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !$OMP MASTER + do tmp_q = 1, m + do tmp_p = 1, m + tmp_accu(tmp_p,tmp_q) = 0d0 + enddo + enddo + !$OMP END MASTER + + !$OMP MASTER + do tmp_q = 1, m + do tmp_p = 1, m + tmp_accu_sym(tmp_p,tmp_q) = 0d0 + enddo + enddo + !$OMP END MASTER + + !$OMP DO + do tmp_s = 1, m + do tmp_r = 1, m + do tmp_q = 1, m + do tmp_p = 1, m + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = 0d0 + enddo + enddo + enddo + enddo + !$OMP ENDDO + + !$OMP MASTER + CALL wall_TIME(t1) + !$OMP END MASTER +#+END_SRC + +** Line 1, term 1 + +Without optimization the term 1 of the line 1 is : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + if (q==r) then + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & + + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + + enddo + endif + + enddo + enddo + enddo +enddo + +We can write the formula as matrix multiplication. +$$c_{p,s} = \sum_u a_{p,u} b_{u,s}$$ + +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + tmp_mo_one_e_integrals(u,tmp_p) = mo_one_e_integrals(u,p) + enddo + enddo + !$OMP END DO + + !$OMP DO + do tmp_s = 1, m + s = list(tmp_s) + do u = 1, mo_num + tmp_one_e_dm_mo(u,tmp_s) = one_e_dm_mo(u,s) + enddo + enddo + !$OMP END DO + + + call dgemm('T','N', m, m, mo_num, 1d0, tmp_mo_one_e_integrals,& + size(tmp_mo_one_e_integrals,1), tmp_one_e_dm_mo, size(tmp_one_e_dm_mo,1),& + 0d0, tmp_accu, size(tmp_accu,1)) + + !$OMP DO + do tmp_s = 1, m + do tmp_p = 1, m + + tmp_accu_sym(tmp_p,tmp_s) = 0.5d0 * (tmp_accu(tmp_p,tmp_s) + tmp_accu(tmp_s,tmp_p)) + + enddo + enddo + !$OMP END DO + + !$OMP DO + do tmp_s = 1, m + do tmp_p = 1, m + do tmp_r = 1, m + + hessian(tmp_p,tmp_r,tmp_r,tmp_s) = hessian(tmp_p,tmp_r,tmp_r,tmp_s) + tmp_accu_sym(tmp_p,tmp_s) + + enddo + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6=t5-t4 + print*,'l1 1',t6 + !$OMP END MASTER +#+END_SRC + +** Line 1, term 2 +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + if (p==s) then + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + mo_one_e_integrals(u,r) * (one_e_dm_mo(u,q) & + + mo_one_e_integrals(q,u) * (one_e_dm_mo(r,u)) + enddo + endif + + enddo + enddo + enddo +enddo + +We can write the formula as matrix multiplication. +$$c_{r,q} = \sum_u a_{r,u} b_{u,q}$$ + +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + call dgemm('T','N', m, m, mo_num, 1d0, tmp_mo_one_e_integrals,& + size(tmp_mo_one_e_integrals,1), tmp_one_e_dm_mo, size(tmp_one_e_dm_mo,1),& + 0d0, tmp_accu, size(tmp_accu,1)) + + !$OMP DO + do tmp_r = 1, m + do tmp_q = 1, m + + tmp_accu_sym(tmp_q,tmp_r) = 0.5d0 * (tmp_accu(tmp_q,tmp_r) + tmp_accu(tmp_r,tmp_q)) + + enddo + enddo + !OMP END DO + + !$OMP DO + do tmp_r = 1, m + do tmp_q = 1, m + do tmp_s = 1, m + + hessian(tmp_s,tmp_q,tmp_r,tmp_s) = hessian(tmp_s,tmp_q,tmp_r,tmp_s) + tmp_accu_sym(tmp_q,tmp_r) + + enddo + enddo + enddo + !OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6=t5-t4 + print*,'l1 2',t6 + !$OMP END MASTER +#+END_SRC + +** Line 1, term 3 + +Without optimization the third term is : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s)) + + enddo + enddo + enddo +enddo + +We can just re-order the indexes + +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do tmp_s = 1, m + s = list(tmp_s) + do tmp_r = 1, m + r = list(tmp_r) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_p = 1, m + p = list(tmp_p) + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) & + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q)& + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) + + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6=t5-t4 + print*,'l1 3',t6 + !$OMP END MASTER + +#+END_SRC + +** Line 2, term 1 + +Without optimization the fourth term is : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + if (q==r) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & + + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + + enddo + enddo + enddo + endif + + enddo + enddo + enddo +enddo + +Using bielectronic integral properties : +get_two_e_integral(s,t,u,v,mo_integrals_map) = +get_two_e_integral(u,v,s,t,mo_integrals_map) + +Using the two electron density matrix properties : +two_e_dm_mo(p,t,u,v) = two_e_dm_mo(u,v,p,t) + +With t on the external loop, using temporary arrays for each t and by +taking u,v as one variable a matrix multplication appears. +$$c_{p,s} = \sum_{uv} a_{p,uv} b_{uv,s}$$ + +There is a kroenecker delta $$\delta_{qr}$$, so we juste compute the +terms like : hessian(p,r,r,s) + +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !$OMP MASTER + call wall_TIME(t4) + !$OMP END MASTER + + allocate(tmp_bi_int_3_3(mo_num,mo_num,m), tmp_2rdm_3_3(mo_num,mo_num,m)) + + !$OMP DO + do t = 1, mo_num + + do tmp_p = 1, m + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,v,tmp_p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m ! error, the p might be replace by a s + ! it's a temporary array, the result by replacing p and s will be the same + p = list(tmp_p) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,v,tmp_p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1.d0, & + tmp_bi_int_3_3, mo_num*mo_num, tmp_2rdm_3_3, mo_num*mo_num, & + 0.d0, tmp_accu, size(tmp_accu,1)) + + do tmp_p = 1, m + do tmp_s = 1, m + + tmp_accu_sym(tmp_s,tmp_p) = 0.5d0 * (tmp_accu(tmp_p,tmp_s)+tmp_accu(tmp_s,tmp_p)) + + enddo + enddo + + !$OMP CRITICAL + do tmp_s = 1, m + do tmp_r = 1, m + do tmp_p = 1, m + + hessian(tmp_p,tmp_r,tmp_r,tmp_s) = hessian(tmp_p,tmp_r,tmp_r,tmp_s) + tmp_accu_sym(tmp_p,tmp_s) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP MASTER + call wall_TIME(t5) + t6=t5-t4 + print*,'l2 1', t6 + !$OMP END MASTER +#+END_SRC + +** Line 2, term 2 + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + if (p==s) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & + + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + + enddo + enddo + enddo + endif + + enddo + enddo + enddo +enddo + +Using the two electron density matrix properties : +get_two_e_integral(q,t,u,v,mo_integrals_map) = +get_two_e_integral(u,v,q,t,mo_integrals_map) + +Using the two electron density matrix properties : +two_e_dm_mo(r,t,u,v) = two_e_dm_mo(u,v,r,t) + +With t on the external loop, using temporary arrays for each t and by +taking u,v as one variable a matrix multplication appears. +$$c_{q,r} = \sum_uv a_{q,uv} b_{uv,r}$$ + +There is a kroenecker delta $$\delta_{ps}$$, so we juste compute the +terms like : hessian(s,q,r,s) + +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !****************************** + ! Opt Second line, second term + !****************************** + + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + + + !$OMP DO + do t = 1, mo_num + + do tmp_q = 1, m + q = list(tmp_q) + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,v,tmp_q) = get_two_e_integral(u,v,q,t,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_r = 1, m + r = list(tmp_r) + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,v,tmp_r) = two_e_dm_mo(u,v,r,t) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1.d0, & + tmp_bi_int_3_3 , mo_num*mo_num, tmp_2rdm_3_3, mo_num*mo_num, & + 0.d0, tmp_accu, size(tmp_accu,1)) + + do tmp_r = 1, m + do tmp_q = 1, m + + tmp_accu_sym(tmp_q,tmp_r) = 0.5d0 * (tmp_accu(tmp_q,tmp_r) + tmp_accu(tmp_r,tmp_q)) + + enddo + enddo + + !$OMP CRITICAL + do tmp_r = 1, m + do tmp_q = 1, m + do tmp_s = 1, m + + hessian(tmp_s,tmp_q,tmp_r,tmp_s) = hessian(tmp_s,tmp_q,tmp_r,tmp_s) + tmp_accu_sym(tmp_q,tmp_r) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3) + + !$OMP MASTER + CALL wall_TIME(t5) + t6=t5-t4 + print*,'l2 2',t6 + !$OMP END MASTER +#+END_SRC + +** Line 3, term 1 + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & + + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + + enddo + enddo + + enddo + enddo + enddo +enddo + +Using the two electron density matrix properties : +get_two_e_integral(u,v,p,r,mo_integrals_map) = +get_two_e_integral(p,r,u,v,mo_integrals_map) + +Using the two electron density matrix properties : +two_e_dm_mo(u,v,q,s) = two_e_dm_mo(q,s,u,v) + +With v on the external loop, using temporary arrays for each v and by +taking p,r and q,s as one dimension a matrix multplication +appears. $$c_{pr,qs} = \sum_u a_{pr,u} b_{u,qs}$$ + +Part 1 +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !$OMP MASTER + call wall_TIME(t4) + !$OMP END MASTER + + !-------- + ! part 1 + ! get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) + !-------- + + allocate(tmp_bi_int_3_3(m,m,mo_num), tmp_2rdm_3_3(mo_num,m,m),ind_3_3(m,m,m)) + + !$OMP DO + do v = 1, mo_num + + do u = 1, mo_num + do tmp_r = 1, m + r = list(tmp_r) + do tmp_p = 1, m + p = list(tmp_p) + + tmp_bi_int_3_3(tmp_p,tmp_r,u) = get_two_e_integral(p,r,u,v,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_s = 1, m + s = list(tmp_s) + do tmp_q = 1, m + q = list(tmp_q) + do u = 1, mo_num + + tmp_2rdm_3_3(u,tmp_q,tmp_s) = two_e_dm_mo(q,s,u,v) + + enddo + enddo + enddo + + do tmp_s = 1, m + + call dgemm('N','N',m*m, m, mo_num, 1d0, tmp_bi_int_3_3,& + size(tmp_bi_int_3_3,1)*size(tmp_bi_int_3_3,2), tmp_2rdm_3_3(1,1,tmp_s),& + mo_num, 0d0, ind_3_3, size(ind_3_3,1) * size(ind_3_3,2)) + + !$OMP CRITICAL + do tmp_r = 1, m + do tmp_q = 1, m + do tmp_p = 1, m + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + ind_3_3(tmp_p,tmp_r,tmp_q) + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO + + deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3,ind_3_3) +#+END_SRC + +With v on the external loop, using temporary arrays for each v and by +taking q,s and p,r as one dimension a matrix multplication +appears. $$c_{qs,pr} = \sum_u a_{qs,u}*b_{u,pr}$$ + +Part 2 +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !-------- + ! part 2 + ! get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + !-------- + + allocate(tmp_bi_int_3_3(m,m,mo_num), tmp_2rdm_3_3(mo_num,m,m),ind_3_3(m,m,m)) + + !$OMP DO + do v = 1, mo_num + + do u = 1, mo_num + do tmp_s = 1, m + s = list(tmp_s) + do tmp_q = 1, m + q = list(tmp_q) + + tmp_bi_int_3_3(tmp_q,tmp_s,u) = get_two_e_integral(q,s,u,v,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_r = 1, m + r = list(tmp_r) + do tmp_p = 1, m + p = list(tmp_p) + do u = 1, mo_num + + tmp_2rdm_3_3(u,tmp_p,tmp_r) = two_e_dm_mo(p,r,u,v) + + enddo + enddo + enddo + + do tmp_r = 1, m + + call dgemm('N','N', m*m, m, mo_num, 1d0, tmp_bi_int_3_3,& + size(tmp_bi_int_3_3,1)*size(tmp_bi_int_3_3,2), tmp_2rdm_3_3(1,1,tmp_r),& + mo_num, 0d0, ind_3_3, size(ind_3_3,1) * size(ind_3_3,2)) + + !$OMP CRITICAL + do tmp_s = 1, m + s = list(tmp_s) + do tmp_q = 1, m + q = list(tmp_q) + do tmp_p = 1, m + p = list(tmp_p) + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + ind_3_3(tmp_q,tmp_s,tmp_p) + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO + + deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3,ind_3_3) + + !$OMP MASTER + call wall_TIME(t5) + t6 = t5 - t4 + print*,'l3 1', t6 + !$OMP END MASTER +#+END_SRC + +** Line 3, term 2 + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + do t = 1, mo_num + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & + - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & + - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & + - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + + enddo + enddo + + enddo + enddo + enddo +enddo + +With q on the external loop, using temporary arrays for each p and q, +and taking u,v as one variable, a matrix multiplication appears: +$$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +Part 1 +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !-------- + ! Part 1 + ! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) + !-------- + + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + allocate(tmp_bi_int_3_3(mo_num,mo_num,m), tmp_2rdm_3_3(mo_num,mo_num,m)) + + !$OMP DO + do tmp_q = 1, m + q = list(tmp_q) + + do tmp_r = 1, m + r = list(tmp_r) + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,t,tmp_r) = two_e_dm_mo(q,u,r,t) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + + do tmp_s = 1, m + s = list(tmp_s) + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,t,tmp_s) = - get_two_e_integral(u,s,t,p,mo_integrals_map) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_bi_int_3_3,& + mo_num*mo_num, tmp_2rdm_3_3, mo_num*mo_num, 0d0, tmp_accu, m) + + !$OMP CRITICAL + do tmp_s = 1, m + do tmp_r = 1, m + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + tmp_accu(tmp_s,tmp_r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO + + deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3) +#+END_SRC + +With q on the external loop, using temporary arrays for each p and q, +and taking u,v as one variable, a matrix multiplication appears: +$$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +Part 2 +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !-------- + ! Part 2 + !- get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) + !-------- + + allocate(tmp_bi_int_3_3(mo_num,mo_num,m), tmp_2rdm_3_3(mo_num,mo_num,m)) + + !$OMP DO + do tmp_q = 1, m + q = list(tmp_q) + + do tmp_r = 1, m + r = list(tmp_r) + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,t,tmp_r) = two_e_dm_mo(q,u,t,r) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + + do tmp_s = 1, m + s = list(tmp_s) + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,t,tmp_s) = - get_two_e_integral(u,t,s,p,mo_integrals_map) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_bi_int_3_3,& + mo_num*mo_num, tmp_2rdm_3_3, mo_num*mo_num, 0d0, tmp_accu, m) + + !$OMP CRITICAL + do tmp_s = 1, m + do tmp_r = 1, m + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + tmp_accu(tmp_s,tmp_r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO + + deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3) +#+END_SRC + +With q on the external loop, using temporary arrays for each p and q, +and taking u,v as one variable, a matrix multiplication appears: +$$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +Part 3 +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !-------- + ! Part 3 + !- get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) + !-------- + + allocate(tmp_bi_int_3_3(mo_num,mo_num,m), tmp_2rdm_3_3(mo_num,mo_num,m)) + + !$OMP DO + do tmp_q = 1, m + q = list(tmp_q) + + do tmp_r = 1, m + r = list(tmp_r) + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,t,tmp_r) = - get_two_e_integral(u,q,t,r,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + + do tmp_s = 1, m + s = list(tmp_s) + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,t,tmp_s) = two_e_dm_mo(p,u,s,t) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_2rdm_3_3,& + mo_num*mo_num, tmp_bi_int_3_3, mo_num*mo_num, 0d0, tmp_accu, m) + + !$OMP CRITICAL + do tmp_s = 1, m + do tmp_r = 1, m + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + tmp_accu(tmp_s,tmp_r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO + + deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3) + +#+END_SRC + +With q on the external loop, using temporary arrays for each p and q, +and taking u,v as one variable, a matrix multiplication appears: +$$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +Part 4 +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !-------- + ! Part 4 + ! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + !-------- + + allocate(tmp_bi_int_3_3(mo_num,mo_num,m), tmp_2rdm_3_3(mo_num,mo_num,m)) + + !$OMP DO + do tmp_q = 1, m + q = list(tmp_q) + + do tmp_r = 1, m + r = list(tmp_r) + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3_3(u,t,tmp_r) = - get_two_e_integral(u,t,r,q,mo_integrals_map) + + enddo + enddo + enddo + + do tmp_p = 1, m + p = list(tmp_p) + + do tmp_s = 1, m + s = list(tmp_s) + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3_3(u,t,tmp_s) = two_e_dm_mo(p,u,t,s) + + enddo + enddo + enddo + + call dgemm('T','N', m, m, mo_num*mo_num, 1d0, tmp_2rdm_3_3,& + mo_num*mo_num, tmp_bi_int_3_3, mo_num*mo_num, 0d0, tmp_accu, m) + + !$OMP CRITICAL + do tmp_s = 1, m + do tmp_r = 1, m + + hessian(tmp_p,tmp_q,tmp_r,tmp_s) = hessian(tmp_p,tmp_q,tmp_r,tmp_s) + tmp_accu(tmp_s,tmp_r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO + + deallocate(tmp_bi_int_3_3, tmp_2rdm_3_3) + + !$OMP MASTER + call wall_TIME(t5) + t6 = t5-t4 + print*,'l3 2',t6 + !$OMP END MASTER + + !$OMP MASTER + CALL wall_TIME(t2) + t3 = t2 -t1 + print*,'Time to compute the hessian : ', t3 + !$OMP END MASTER +#+END_SRC + +** Deallocation of private arrays +In the omp section ! +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + deallocate(tmp_bi_int_3, tmp_2rdm_3, tmp_accu, tmp_accu_sym, ind_3) +#+END_SRC + +** Permutations +As we mentioned before there are two permutation operator in the +formula : +Hessian(p,q,r,s) = P_pq P_rs [...] +=> Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do s = 1, m + do r = 1, m + do q = 1, m + do p = 1, m + + h_tmpr(p,q,r,s) = (hessian(p,q,r,s) - hessian(q,p,r,s) - hessian(p,q,s,r) + hessian(q,p,s,r)) + + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP MASTER + call wall_TIME(t5) + t6 = t5-t4 + print*,'Time for permutations :',t6 + !$OMP END MASTER +#+END_SRC + +** 4D -> 2D matrix +We need a 2D matrix for the Newton method's. Since the Hessian is +"antisymmetric" : $$H_{pq,rs} = -H_{rs,pq}$$ +We can write it as a 2D matrix, N by N, with N = mo_num(mo_num-1)/2 +with p 2D :',t6 + !$OMP END MASTER + + !$OMP END PARALLEL + call omp_set_max_active_levels(4) + + ! Display + if (debug) then + print*,'2D Hessian matrix' + do pq = 1, n + write(*,'(100(F10.5))') H(pq,:) + enddo + endif +#+END_SRC + +** Deallocation of shared arrays, end +#+BEGIN_SRC f90 :comments org :tangle hessian_list_opt.irp.f + deallocate(hessian,tmp_one_e_dm_mo,tmp_mo_one_e_integrals)!,h_tmpr) +! h_tmpr is intent out in order to debug the subroutine +! It's why we don't deallocate it + + print*,'---End hessian---' + +end subroutine +#+END_SRC + diff --git a/src/mo_optimization/org/hessian_opt.org b/src/mo_optimization/org/hessian_opt.org new file mode 100644 index 00000000..5b0642e3 --- /dev/null +++ b/src/mo_optimization/org/hessian_opt.org @@ -0,0 +1,1056 @@ +* Hessian + +The hessian of the CI energy with respects to the orbital rotation is : +(C-c C-x C-l) + +\begin{align*} +H_{pq,rs} &= \dfrac{\partial^2 E(x)}{\partial x_{pq}^2} \\ + &= \mathcal{P}_{pq} \mathcal{P}_{rs} [ \frac{1}{2} \sum_u [\delta_{qr}(h_p^u \gamma_u^s + h_u^s \gamma_p^u) + + \delta_{ps}(h_r^u \gamma_u^q + h_u^q \gamma_r^u)] + -(h_p^s \gamma_r^q + h_r^q \gamma_p^s) \\ + &+ \frac{1}{2} \sum_{tuv} [\delta_{qr}(v_{pt}^{uv} \Gamma_{uv}^{st} + v_{uv}^{st} \Gamma_{pt}^{uv}) + + \delta_{ps}(v_{uv}^{qt} \Gamma_{rt}^{uv} + v_{rt}^{uv}\Gamma_{uv}^{qt})] \\ + &+ \sum_{uv} (v_{pr}^{uv} \Gamma_{uv}^{qs} + v_{uv}^{qs} \Gamma_{pr}^{uv}) + - \sum_{tu} (v_{pu}^{st} \Gamma_{rt}^{qu}+v_{pu}^{tr} \Gamma_{tr}^{qu}+v_{rt}^{qu}\Gamma_{pu}^{st} + v_{tr}^{qu}\Gamma_{pu}^{ts}) +\end{align*} +With pq a permutation operator : + +\begin{align*} +\mathcal{P}_{pq}= 1 - (p \leftrightarrow q) +\end{align*} +\begin{align*} +\mathcal{P}_{pq} \mathcal{P}_{rs} &= (1 - (p \leftrightarrow q))(1 - (r \leftrightarrow s)) \\ +&= 1 - (p \leftrightarrow q) - (r \leftrightarrow s) + (p \leftrightarrow q, r \leftrightarrow s) +\end{align*} + +Where p,q,r,s,t,u,v are general spatial orbitals +mo_num : the number of molecular orbitals +$$h$$ : One electron integrals +$$\gamma$$ : One body density matrix (state average in our case) +$$v$$ : Two electron integrals +$$\Gamma$$ : Two body density matrice (state average in our case) + +The hessian is a 4D matrix of size mo_num, p,q,r,s,t,u,v take all the +values between 1 and mo_num (1 and mo_num include). + +To do that we compute all the pairs (pq,rs) + +Source : +Seniority-based coupled cluster theory +J. Chem. Phys. 141, 244104 (2014); https://doi.org/10.1063/1.4904384 +Thomas M. Henderson, Ireneusz W. Bulik, Tamar Stein, and Gustavo E. Scuseria + +*Compute the hessian of energy with respects to orbital rotations* + +Provided: +| mo_num | integer | number of MOs | +| mo_one_e_integrals(mo_num,mo_num) | double precision | mono-electronic integrals | +| one_e_dm_mo(mo_num,mo_num) | double precision | one e- density matrix (state average) | +| two_e_dm_mo(mo_num,mo_num,mo_num) | double precision | two e- density matrix (state average) | + +Input: +| n | integer | mo_num*(mo_num-1)/2 | + +Output: +| H(n,n) | double precision | Hessian matrix | +| h_tmpr(mo_num,mo_num,mo_num,mo_num) | double precision | Complete hessian matrix before the tranformation | +| | | in n by n matrix | + +Internal: +| hessian(mo_num,mo_num,mo_num,mo_num) | double precision | temporary array containing the hessian before | +| | | the permutations | +| p, q, r, s | integer | indexes of the hessian elements | +| t, u, v | integer | indexes for the sums | +| pq, rs | integer | indexes for the transformation of the hessian | +| | | (4D -> 2D) | +| t1,t2,t3 | double precision | t3 = t2 - t1, time to compute the hessian | +| t4,t5,t6 | double precision | t6 = t5 - t4, time to compute each element | +| tmp_bi_int_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the bielectronic integrals | +| tmp_2rdm_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for the 2 body density matrix | +| ind_3(mo_num,mo_num,mo_num) | double precision | 3 indexes temporary array for matrix multiplication | +| tmp_accu(mo_num,mo_num) | double precision | temporary array | +| tmp_accu_sym(mo_num,mo_num) | double precision | temporary array | + +Function: +| get_two_e_integral | double precision | bielectronic integrals | + +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f +subroutine hessian_opt(n,H,h_tmpr) + use omp_lib + include 'constants.h' + + implicit none + + ! Variables + + ! in + integer, intent(in) :: n + + ! out + double precision, intent(out) :: H(n,n),h_tmpr(mo_num,mo_num,mo_num,mo_num) + + ! internal + double precision, allocatable :: hessian(:,:,:,:)!, h_tmpr(:,:,:,:) + double precision, allocatable :: H_test(:,:) + integer :: p,q + integer :: r,s,t,u,v,k + integer :: pq,rs + double precision :: t1,t2,t3,t4,t5,t6 + ! H_test : monum**2 by mo_num**2 double precision matrix to debug the H matrix + + double precision, allocatable :: tmp_bi_int_3(:,:,:), tmp_2rdm_3(:,:,:), ind_3(:,:,:) + double precision, allocatable :: tmp_accu(:,:), tmp_accu_sym(:,:), tmp_accu_shared(:,:),tmp_accu_sym_shared(:,:) + + ! Function + double precision :: get_two_e_integral + + print*,'' + print*,'---hessian---' + print*,'Use the full hessian' + + ! Allocation of shared arrays + allocate(hessian(mo_num,mo_num,mo_num,mo_num))!,h_tmpr(mo_num,mo_num,mo_num,mo_num)) + allocate(tmp_accu_shared(mo_num,mo_num),tmp_accu_sym_shared(mo_num,mo_num)) + + ! Calculations + + ! OMP + call omp_set_max_active_levels(1) + + !$OMP PARALLEL & + !$OMP PRIVATE( & + !$OMP p,q,r,s, tmp_accu, tmp_accu_sym, & + !$OMP u,v,t, tmp_bi_int_3, tmp_2rdm_3, ind_3) & + !$OMP SHARED(hessian,h_tmpr,H, mo_num,n, & + !$OMP mo_one_e_integrals, one_e_dm_mo, & + !$OMP two_e_dm_mo,mo_integrals_map,tmp_accu_sym_shared, tmp_accu_shared, & + !$OMP t1,t2,t3,t4,t5,t6)& + !$OMP DEFAULT(NONE) + + ! Allocation of private arrays + allocate(tmp_bi_int_3(mo_num,mo_num,mo_num)) + allocate(tmp_2rdm_3(mo_num,mo_num,mo_num), ind_3(mo_num,mo_num,mo_num)) + allocate(tmp_accu(mo_num,mo_num), tmp_accu_sym(mo_num,mo_num)) +#+END_SRC + +** Initialization of the arrays +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !$OMP MASTER + do q = 1, mo_num + do p = 1, mo_num + tmp_accu_shared(p,q) = 0d0 + enddo + enddo + !$OMP END MASTER + + !$OMP MASTER + do q = 1, mo_num + do p = 1, mo_num + tmp_accu_sym(p,q) = 0d0 + enddo + enddo + !$OMP END MASTER + + !$OMP DO + do s=1,mo_num + do r=1,mo_num + do q=1,mo_num + do p=1,mo_num + hessian(p,q,r,s) = 0d0 + enddo + enddo + enddo + enddo + !$OMP ENDDO + + !$OMP MASTER + CALL wall_TIME(t1) + !$OMP END MASTER +#+END_SRC + +** Line 1, term 1 + +Without optimization the term 1 of the line 1 is : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + if (q==r) then + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + mo_one_e_integrals(u,p) * one_e_dm_mo(u,s) & + + mo_one_e_integrals(s,u) * one_e_dm_mo(p,u)) + + enddo + endif + + enddo + enddo + enddo +enddo + +We can write the formula as matrix multiplication. +$$c_{p,s} = \sum_u a_{p,u} b_{u,s}$$ + +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + call dgemm('T','N', mo_num, mo_num, mo_num, 1d0, mo_one_e_integrals,& + size(mo_one_e_integrals,1), one_e_dm_mo, size(one_e_dm_mo,1),& + 0d0, tmp_accu_shared, size(tmp_accu_shared,1)) + + !$OMP DO + do s = 1, mo_num + do p = 1, mo_num + + tmp_accu_sym_shared(p,s) = 0.5d0 * (tmp_accu_shared(p,s) + tmp_accu_shared(s,p)) + + enddo + enddo + !$OMP END DO + + !$OMP DO + do s = 1, mo_num + do p = 1, mo_num + do r = 1, mo_num + + hessian(p,r,r,s) = hessian(p,r,r,s) + tmp_accu_sym_shared(p,s) + + enddo + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6=t5-t4 + print*,'l1 1',t6 + !$OMP END MASTER +#+END_SRC + +** Line 1, term 2 +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + if (p==s) then + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + mo_one_e_integrals(u,r) * (one_e_dm_mo(u,q) & + + mo_one_e_integrals(q,u) * (one_e_dm_mo(r,u)) + enddo + endif + + enddo + enddo + enddo +enddo + +We can write the formula as matrix multiplication. +$$c_{r,q} = \sum_u a_{r,u} b_{u,q}$$ + +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + call dgemm('T','N', mo_num, mo_num, mo_num, 1d0, mo_one_e_integrals,& + size(mo_one_e_integrals,1), one_e_dm_mo, size(one_e_dm_mo,1),& + 0d0, tmp_accu_shared, size(tmp_accu_shared,1)) + + !$OMP DO + do r = 1, mo_num + do q = 1, mo_num + + tmp_accu_sym_shared(q,r) = 0.5d0 * (tmp_accu_shared(q,r) + tmp_accu_shared(r,q)) + + enddo + enddo + !OMP END DO + + !$OMP DO + do r = 1, mo_num + do q = 1, mo_num + do s = 1, mo_num + + hessian(s,q,r,s) = hessian(s,q,r,s) + tmp_accu_sym_shared(q,r) + + enddo + enddo + enddo + !OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6=t5-t4 + print*,'l1 2',t6 + !$OMP END MASTER +#+END_SRC + +** Line 1, term 3 + +Without optimization the third term is : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q) & + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s)) + + enddo + enddo + enddo +enddo + +We can just re-order the indexes + +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do s = 1, mo_num + do r = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + - mo_one_e_integrals(s,p) * one_e_dm_mo(r,q)& + - mo_one_e_integrals(q,r) * one_e_dm_mo(p,s) + + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6=t5-t4 + print*,'l1 3',t6 + !$OMP END MASTER + +#+END_SRC + +** Line 2, term 1 + +Without optimization the fourth term is : + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + if (q==r) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + get_two_e_integral(u,v,p,t,mo_integrals_map) * two_e_dm_mo(u,v,s,t) & + + get_two_e_integral(s,t,u,v,mo_integrals_map) * two_e_dm_mo(p,t,u,v)) + + enddo + enddo + enddo + endif + + enddo + enddo + enddo +enddo + +Using bielectronic integral properties : +get_two_e_integral(s,t,u,v,mo_integrals_map) = +get_two_e_integral(u,v,s,t,mo_integrals_map) + +Using the two electron density matrix properties : +two_e_dm_mo(p,t,u,v) = two_e_dm_mo(u,v,p,t) + +With t on the external loop, using temporary arrays for each t and by +taking u,v as one variable a matrix multplication appears. +$$c_{p,s} = \sum_{uv} a_{p,uv} b_{uv,s}$$ + +There is a kroenecker delta $$\delta_{qr}$$, so we juste compute the +terms like : hessian(p,r,r,s) + +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !$OMP MASTER + call wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do t = 1, mo_num + + do p = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,p) = get_two_e_integral(u,v,p,t,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num ! error, the p might be replace by a s + ! it's a temporary array, the result by replacing p and s will be the same + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,p) = two_e_dm_mo(u,v,p,t) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1.d0, & + tmp_bi_int_3, mo_num*mo_num, tmp_2rdm_3, mo_num*mo_num, & + 0.d0, tmp_accu, size(tmp_accu,1)) + + do p = 1, mo_num + do s = 1, mo_num + + tmp_accu_sym(s,p) = 0.5d0 * (tmp_accu(p,s)+tmp_accu(s,p)) + + enddo + enddo + + !$OMP CRITICAL + do s = 1, mo_num + do r = 1, mo_num + do p = 1, mo_num + + hessian(p,r,r,s) = hessian(p,r,r,s) + tmp_accu_sym(p,s) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP MASTER + call wall_TIME(t5) + t6=t5-t4 + print*,'l2 1', t6 + !$OMP END MASTER +#+END_SRC + +** Line 2, term 2 + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + if (p==s) then + do t = 1, mo_num + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + 0.5d0 * ( & + get_two_e_integral(q,t,u,v,mo_integrals_map) * two_e_dm_mo(r,t,u,v) & + + get_two_e_integral(u,v,r,t,mo_integrals_map) * two_e_dm_mo(u,v,q,t)) + + enddo + enddo + enddo + endif + + enddo + enddo + enddo +enddo + +Using the two electron density matrix properties : +get_two_e_integral(q,t,u,v,mo_integrals_map) = +get_two_e_integral(u,v,q,t,mo_integrals_map) + +Using the two electron density matrix properties : +two_e_dm_mo(r,t,u,v) = two_e_dm_mo(u,v,r,t) + +With t on the external loop, using temporary arrays for each t and by +taking u,v as one variable a matrix multplication appears. +$$c_{q,r} = \sum_uv a_{q,uv} b_{uv,r}$$ + +There is a kroenecker delta $$\delta_{ps}$$, so we juste compute the +terms like : hessian(s,q,r,s) + +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !****************************** + ! Opt Second line, second term + !****************************** + + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do t = 1, mo_num + + do q = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,v,q) = get_two_e_integral(u,v,q,t,mo_integrals_map) + + enddo + enddo + enddo + + do r = 1, mo_num + do v = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,v,r) = two_e_dm_mo(u,v,r,t) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1.d0, & + tmp_bi_int_3 , mo_num*mo_num, tmp_2rdm_3, mo_num*mo_num, & + 0.d0, tmp_accu, size(tmp_accu,1)) + + do r = 1, mo_num + do q = 1, mo_num + + tmp_accu_sym(q,r) = 0.5d0 * (tmp_accu(q,r) + tmp_accu(r,q)) + + enddo + enddo + + !$OMP CRITICAL + do r = 1, mo_num + do q = 1, mo_num + do s = 1, mo_num + + hessian(s,q,r,s) = hessian(s,q,r,s) + tmp_accu_sym(q,r) + + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + !$OMP END DO + + !$OMP MASTER + CALL wall_TIME(t5) + t6=t5-t4 + print*,'l2 2',t6 + !$OMP END MASTER +#+END_SRC + +** Line 3, term 1 + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + do u = 1, mo_num + do v = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + + get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) & + + get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + + enddo + enddo + + enddo + enddo + enddo +enddo + +Using the two electron density matrix properties : +get_two_e_integral(u,v,p,r,mo_integrals_map) = +get_two_e_integral(p,r,u,v,mo_integrals_map) + +Using the two electron density matrix properties : +two_e_dm_mo(u,v,q,s) = two_e_dm_mo(q,s,u,v) + +With v on the external loop, using temporary arrays for each v and by +taking p,r and q,s as one dimension a matrix multplication +appears. $$c_{pr,qs} = \sum_u a_{pr,u} b_{u,qs}$$ + +Part 1 +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !$OMP MASTER + call wall_TIME(t4) + !$OMP END MASTER + + !-------- + ! part 1 + ! get_two_e_integral(u,v,p,r,mo_integrals_map) * two_e_dm_mo(u,v,q,s) + !-------- + + !$OMP DO + do v = 1, mo_num + + do u = 1, mo_num + do r = 1, mo_num + do p = 1, mo_num + + tmp_bi_int_3(p,r,u) = get_two_e_integral(p,r,u,v,mo_integrals_map) + + enddo + enddo + enddo + + do s = 1, mo_num + do q = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,q,s) = two_e_dm_mo(q,s,u,v) + + enddo + enddo + enddo + + do s = 1, mo_num + + call dgemm('N','N',mo_num*mo_num, mo_num, mo_num, 1d0, tmp_bi_int_3,& + size(tmp_bi_int_3,1)*size(tmp_bi_int_3,2), tmp_2rdm_3(1,1,s),& + size(tmp_2rdm_3,1), 0d0, ind_3, size(ind_3,1) * size(ind_3,2)) + + !$OMP CRITICAL + do r = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + hessian(p,q,r,s) = hessian(p,q,r,s) + ind_3(p,r,q) + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO + +#+END_SRC + +With v on the external loop, using temporary arrays for each v and by +taking q,s and p,r as one dimension a matrix multplication +appears. $$c_{qs,pr} = \sum_u a_{qs,u}*b_{u,pr}$$ + +Part 2 +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !-------- + ! part 2 + ! get_two_e_integral(q,s,u,v,mo_integrals_map) * two_e_dm_mo(p,r,u,v) + !-------- + + !$OMP DO + do v = 1, mo_num + + do u = 1, mo_num + do s = 1, mo_num + do q = 1, mo_num + + tmp_bi_int_3(q,s,u) = get_two_e_integral(q,s,u,v,mo_integrals_map) + + enddo + enddo + enddo + + do r = 1, mo_num + do p = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,p,r) = two_e_dm_mo(p,r,u,v) + + enddo + enddo + enddo + + do r = 1, mo_num + call dgemm('N','N', mo_num*mo_num, mo_num, mo_num, 1d0, tmp_bi_int_3,& + size(tmp_bi_int_3,1)*size(tmp_bi_int_3,2), tmp_2rdm_3(1,1,r),& + size(tmp_2rdm_3,1), 0d0, ind_3, size(ind_3,1) * size(ind_3,2)) + + !$OMP CRITICAL + do s = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + hessian(p,q,r,s) = hessian(p,q,r,s) + ind_3(q,s,p) + enddo + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO + + !$OMP MASTER + call wall_TIME(t5) + t6 = t5 - t4 + print*,'l3 1', t6 + !$OMP END MASTER +#+END_SRC + +** Line 3, term 2 + +do p = 1, mo_num + do q = 1, mo_num + do r = 1, mo_num + do s = 1, mo_num + + do t = 1, mo_num + do u = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) & + - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) & + - get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) & + - get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) & + - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + + enddo + enddo + + enddo + enddo + enddo +enddo + +With q on the external loop, using temporary arrays for each p and q, +and taking u,v as one variable, a matrix multiplication appears: +$$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +Part 1 +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !-------- + ! Part 1 + ! - get_two_e_integral(s,t,p,u,mo_integrals_map) * two_e_dm_mo(r,t,q,u) + !-------- + + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do q = 1, mo_num + + do r = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,t,r) = two_e_dm_mo(q,u,r,t) + + enddo + enddo + enddo + + do p = 1, mo_num + + do s = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,t,s) = - get_two_e_integral(u,s,t,p,mo_integrals_map) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_bi_int_3,& + mo_num*mo_num, tmp_2rdm_3, mo_num*mo_num, 0d0, tmp_accu, mo_num) + + !$OMP CRITICAL + do s = 1, mo_num + do r = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + tmp_accu(s,r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO +#+END_SRC + +With q on the external loop, using temporary arrays for each p and q, +and taking u,v as one variable, a matrix multiplication appears: +$$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +Part 2 +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !-------- + ! Part 2 + !- get_two_e_integral(t,s,p,u,mo_integrals_map) * two_e_dm_mo(t,r,q,u) + !-------- + + !$OMP DO + do q = 1, mo_num + + do r = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,t,r) = two_e_dm_mo(q,u,t,r) + + enddo + enddo + enddo + + do p = 1, mo_num + + do s = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,t,s) = - get_two_e_integral(u,t,s,p,mo_integrals_map) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_bi_int_3,& + mo_num*mo_num, tmp_2rdm_3, mo_num*mo_num, 0d0, tmp_accu, mo_num) + + !$OMP CRITICAL + do s = 1, mo_num + do r = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + tmp_accu(s,r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO +#+END_SRC + +With q on the external loop, using temporary arrays for each p and q, +and taking u,v as one variable, a matrix multiplication appears: +$$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +Part 3 +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !-------- + ! Part 3 + !- get_two_e_integral(q,u,r,t,mo_integrals_map) * two_e_dm_mo(p,u,s,t) + !-------- + + !$OMP DO + do q = 1, mo_num + + do r = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,t,r) = - get_two_e_integral(u,q,t,r,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + + do s = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,t,s) = two_e_dm_mo(p,u,s,t) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_2rdm_3,& + mo_num*mo_num, tmp_bi_int_3, mo_num*mo_num, 0d0, tmp_accu, mo_num) + + !$OMP CRITICAL + do s = 1, mo_num + do r = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + tmp_accu(s,r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO + +#+END_SRC + +With q on the external loop, using temporary arrays for each p and q, +and taking u,v as one variable, a matrix multiplication appears: +$$c_{r,s} = \sum_{ut} a_{r,ut} b_{ut,s}$$ + +Part 4 +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !-------- + ! Part 4 + ! - get_two_e_integral(q,u,t,r,mo_integrals_map) * two_e_dm_mo(p,u,t,s) + !-------- + + !$OMP DO + do q = 1, mo_num + + do r = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_bi_int_3(u,t,r) = - get_two_e_integral(u,t,r,q,mo_integrals_map) + + enddo + enddo + enddo + + do p = 1, mo_num + + do s = 1, mo_num + do t = 1, mo_num + do u = 1, mo_num + + tmp_2rdm_3(u,t,s) = two_e_dm_mo(p,u,t,s) + + enddo + enddo + enddo + + call dgemm('T','N', mo_num, mo_num, mo_num*mo_num, 1d0, tmp_2rdm_3,& + mo_num*mo_num, tmp_bi_int_3, mo_num*mo_num, 0d0, tmp_accu, mo_num) + + !$OMP CRITICAL + do s = 1, mo_num + do r = 1, mo_num + + hessian(p,q,r,s) = hessian(p,q,r,s) + tmp_accu(s,r) + + enddo + enddo + !$OMP END CRITICAL + + enddo + + enddo + !$OMP END DO + + !$OMP MASTER + call wall_TIME(t5) + t6 = t5-t4 + print*,'l3 2',t6 + !$OMP END MASTER + + !$OMP MASTER + CALL wall_TIME(t2) + t3 = t2 -t1 + print*,'Time to compute the hessian : ', t3 + !$OMP END MASTER +#+END_SRC + +** Deallocation of private arrays +In the omp section ! +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + deallocate(tmp_bi_int_3, tmp_2rdm_3, tmp_accu, tmp_accu_sym, ind_3) +#+END_SRC + +** Permutations +As we mentioned before there are two permutation operator in the +formula : +Hessian(p,q,r,s) = P_pq P_rs [...] +=> Hessian(p,q,r,s) = (p,q,r,s) - (q,p,r,s) - (p,q,s,r) + (q,p,s,r) + +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + !$OMP MASTER + CALL wall_TIME(t4) + !$OMP END MASTER + + !$OMP DO + do s = 1, mo_num + do r = 1, mo_num + do q = 1, mo_num + do p = 1, mo_num + + h_tmpr(p,q,r,s) = (hessian(p,q,r,s) - hessian(q,p,r,s) - hessian(p,q,s,r) + hessian(q,p,s,r)) + + enddo + enddo + enddo + enddo + !$OMP END DO + + !$OMP MASTER + call wall_TIME(t5) + t6 = t5-t4 + print*,'Time for permutations :',t6 + !$OMP END MASTER +#+END_SRC + +** 4D -> 2D matrix +We need a 2D matrix for the Newton method's. Since the Hessian is +"antisymmetric" : $$H_{pq,rs} = -H_{rs,pq}$$ +We can write it as a 2D matrix, N by N, with N = mo_num(mo_num-1)/2 +with p 2D :',t6 + !$OMP END MASTER + + !$OMP END PARALLEL + call omp_set_max_active_levels(4) + + ! Display + if (debug) then + print*,'2D Hessian matrix' + do pq = 1, n + write(*,'(100(F10.5))') H(pq,:) + enddo + endif +#+END_SRC + +** Deallocation of shared arrays, end +#+BEGIN_SRC f90 :comments org :tangle hessian_opt.irp.f + deallocate(hessian)!,h_tmpr) +! h_tmpr is intent out in order to debug the subroutine +! It's why we don't deallocate it + + print*,'---End hessian---' + +end subroutine +#+END_SRC + diff --git a/src/mo_optimization/org/my_providers.org b/src/mo_optimization/org/my_providers.org new file mode 100644 index 00000000..b88cbd62 --- /dev/null +++ b/src/mo_optimization/org/my_providers.org @@ -0,0 +1,308 @@ +* Providers +** Dimensions of MOs + +#+BEGIN_SRC f90 :comments org :tangle my_providers.irp.f +BEGIN_PROVIDER [ integer, n_mo_dim ] + implicit none + BEGIN_DOC + ! Number of different pairs (i,j) of MOs we can build, + ! with i>j + END_DOC + + n_mo_dim = mo_num*(mo_num-1)/2 + +END_PROVIDER +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle my_providers.irp.f +BEGIN_PROVIDER [ integer, n_mo_dim_core ] + implicit none + BEGIN_DOC + ! Number of different pairs (i,j) of core MOs we can build, + ! with i>j + END_DOC + + n_mo_dim_core = dim_list_core_orb*(dim_list_core_orb-1)/2 + +END_PROVIDER +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle my_providers.irp.f +BEGIN_PROVIDER [ integer, n_mo_dim_act ] + implicit none + BEGIN_DOC + ! Number of different pairs (i,j) of active MOs we can build, + ! with i>j + END_DOC + + n_mo_dim_act = dim_list_act_orb*(dim_list_act_orb-1)/2 + +END_PROVIDER +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle my_providers.irp.f +BEGIN_PROVIDER [ integer, n_mo_dim_inact ] + implicit none + BEGIN_DOC + ! Number of different pairs (i,j) of inactive MOs we can build, + ! with i>j + END_DOC + + n_mo_dim_inact = dim_list_inact_orb*(dim_list_inact_orb-1)/2 + +END_PROVIDER +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle my_providers.irp.f +BEGIN_PROVIDER [ integer, n_mo_dim_virt ] + implicit none + BEGIN_DOC + ! Number of different pairs (i,j) of virtual MOs we can build, + ! with i>j + END_DOC + + n_mo_dim_virt = dim_list_virt_orb*(dim_list_virt_orb-1)/2 + +END_PROVIDER +#+END_SRC + +** Energies/criterions +#+BEGIN_SRC f90 :comments org :tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_st_av_energy ] + implicit none + BEGIN_DOC + ! State average CI energy + END_DOC + + !call update_st_av_ci_energy(my_st_av_energy) + call state_average_energy(my_st_av_energy) + +END_PROVIDER +#+END_SRC + +** Gradient/hessian +*** Orbital optimization +**** With all the MOs +#+BEGIN_SRC f90 :comments org :tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_gradient_opt, (n_mo_dim) ] +&BEGIN_PROVIDER [ double precision, my_CC1_opt ] + implicit none + BEGIN_DOC + ! - Gradient of the energy with respect to the MO rotations, for all the MOs. + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision :: norm_grad + + PROVIDE mo_two_e_integrals_in_map + + call gradient_opt(n_mo_dim, my_gradient_opt, my_CC1_opt, norm_grad) + +END_PROVIDER +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_hessian_opt, (n_mo_dim, n_mo_dim) ] + implicit none + BEGIN_DOC + ! - Gradient of the energy with respect to the MO rotations, for all the MOs. + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision, allocatable :: h_f(:,:,:,:) + + PROVIDE mo_two_e_integrals_in_map + + allocate(h_f(mo_num, mo_num, mo_num, mo_num)) + + call hessian_list_opt(n_mo_dim, my_hessian_opt, h_f) + +END_PROVIDER +#+END_SRC + +**** With the list of active MOs +Can be generalized to any mo_class by changing the list/dimension +#+BEGIN_SRC f90 :comments org :tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_gradient_list_opt, (n_mo_dim_act) ] +&BEGIN_PROVIDER [ double precision, my_CC2_opt ] + implicit none + BEGIN_DOC + ! - Gradient of the energy with respect to the MO rotations, only for the active MOs ! + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision :: norm_grad + + PROVIDE mo_two_e_integrals_in_map !one_e_dm_mo two_e_dm_mo mo_one_e_integrals + + call gradient_list_opt(n_mo_dim_act, dim_list_act_orb, list_act, my_gradient_list_opt, my_CC2_opt, norm_grad) + +END_PROVIDER +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_hessian_list_opt, (n_mo_dim_act, n_mo_dim_act) ] + implicit none + BEGIN_DOC + ! - Gradient of the energy with respect to the MO rotations, only for the active MOs ! + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision, allocatable :: h_f(:,:,:,:) + + PROVIDE mo_two_e_integrals_in_map + + allocate(h_f(dim_list_act_orb, dim_list_act_orb, dim_list_act_orb, dim_list_act_orb)) + + call hessian_list_opt(n_mo_dim_act, dim_list_act_orb, list_act, my_hessian_list_opt, h_f) + +END_PROVIDER +#+END_SRC + +*** Orbital localization +**** Gradient +***** Core MOs +#+BEGIN_SRC f90 :comments org +!:tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_gradient_loc_core, (n_mo_dim_core) ] +&BEGIN_PROVIDER [ double precision, my_CC_loc_core ] + implicit none + BEGIN_DOC + ! - Gradient of the MO localization with respect to the MO rotations for the core MOs + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision :: norm_grad + + !PROVIDE something ? + + call gradient_localization(n_mo_dim_core, dim_list_core_orb, list_core, my_gradient_loc_core, my_CC_loc_core , norm_grad) + +END_PROVIDER +#+END_SRC + +***** Active MOs +#+BEGIN_SRC f90 :comments org +!:tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_gradient_loc_act, (n_mo_dim_act) ] +&BEGIN_PROVIDER [ double precision, my_CC_loc_act ] + implicit none + BEGIN_DOC + ! - Gradient of the MO localization with respect to the MO rotations for the active MOs + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision :: norm_grad + + !PROVIDE something ? + + call gradient_localization(n_mo_dim_act, dim_list_act_orb, list_act, my_gradient_loc_act, my_CC_loc_act , norm_grad) + +END_PROVIDER +#+END_SRC + +***** Inactive MOs +#+BEGIN_SRC f90 :comments org ! +:tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_gradient_loc_inact, (n_mo_dim_inact) ] +&BEGIN_PROVIDER [ double precision, my_CC_loc_inact ] + implicit none + BEGIN_DOC + ! - Gradient of the MO localization with respect to the MO rotations for the inactive MOs + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision :: norm_grad + + !PROVIDE something ? + + call gradient_localization(n_mo_dim_inact, dim_list_inact_orb, list_inact, my_gradient_loc_inact, my_CC_loc_inact , norm_grad) + +END_PROVIDER +#+END_SRC + +***** Virtual MOs +#+BEGIN_SRC f90 :comments org +!:tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_gradient_loc_virt, (n_mo_dim_virt) ] +&BEGIN_PROVIDER [ double precision, my_CC_loc_virt ] + implicit none + BEGIN_DOC + ! - Gradient of the MO localization with respect to the MO rotations for the virtual MOs + ! - Maximal element of the gradient in absolute value + END_DOC + + double precision :: norm_grad + + !PROVIDE something ? + + call gradient_localization(n_mo_dim_virt, dim_list_virt_orb, list_virt, my_gradient_loc_virt, my_CC_loc_virt , norm_grad) + +END_PROVIDER +#+END_SRC + +**** Hessian +***** Core MOs +#+BEGIN_SRC f90 :comments org +!:tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_hessian_loc_core, (n_mo_dim_core) ] + implicit none + BEGIN_DOC + ! - Hessian of the MO localization with respect to the MO rotations for the core MOs + END_DOC + + !PROVIDE something ? + + call hessian_localization(n_mo_dim_core, dim_list_core_orb, list_core, my_hessian_loc_core) + +END_PROVIDER +#+END_SRC + +***** Active MOs +#+BEGIN_SRC f90 :comments org +!:tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_hessian_loc_act, (n_mo_dim_act) ] + implicit none + BEGIN_DOC + ! - Hessian of the MO localization with respect to the MO rotations for the active MOs + END_DOC + + !PROVIDE something ? + + call hessian_localization(n_mo_dim_act, dim_list_act_orb, list_act, my_hessian_loc_act) + +END_PROVIDER +#+END_SRC + +***** Inactive MOs +#+BEGIN_SRC f90 :comments org +!:tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_hessian_loc_inact, (n_mo_dim_inact) ] + implicit none + BEGIN_DOC + ! - Hessian of the MO localization with respect to the MO rotations for the inactive MOs + END_DOC + + !PROVIDE something ? + + call hessian_localization(n_mo_dim_inact, dim_list_inact_orb, list_inact, my_hessian_loc_inact) + +END_PROVIDER +#+END_SRC + +***** Virtual MOs +#+BEGIN_SRC f90 :comments org +!:tangle my_providers.irp.f +BEGIN_PROVIDER [ double precision, my_hessian_loc_virt, (n_mo_dim_virt) ] + implicit none + BEGIN_DOC + ! - Hessian of the MO localization with respect to the MO rotations for the virtual MOs + END_DOC + + !PROVIDE something ? + + call hessian_localization(n_mo_dim_virt, dim_list_virt_orb, list_virt, my_hessian_loc_virt) + +END_PROVIDER +#+END_SRC + diff --git a/src/mo_optimization/org/optimization.org b/src/mo_optimization/org/optimization.org new file mode 100644 index 00000000..cbb7b752 --- /dev/null +++ b/src/mo_optimization/org/optimization.org @@ -0,0 +1,91 @@ +#+BEGIN_SRC f90 :comments org :tangle optimization.irp.f +program optimization + + read_wf = .true. ! must be True for the orbital optimization !!! + TOUCH read_wf + call run_optimization + +end +#+END_SRC + +#+BEGIN_SRC f90 :comments org :tangle optimization.irp.f +subroutine run_optimization + + implicit none + + double precision :: e_cipsi, e_opt, delta_e + integer :: nb_iter,i + logical :: not_converged + character (len=100) :: filename + + PROVIDE psi_det psi_coef mo_two_e_integrals_in_map + + not_converged = .True. + nb_iter = 0 + + ! To start from the wf + N_det_max = max(n_det,5) + TOUCH N_det_max + + open(unit=10, file=trim(ezfio_filename)//'/mo_optimization/result_opt') + write(10,*) " Ndet E_cipsi E_opt Delta_e" + call state_average_energy(e_cipsi) + write(10,'(I10, 3F15.7)') n_det, e_cipsi, e_cipsi, 0d0 + close(10) + + do while (not_converged) + print*,'' + print*,'======================' + print*,' Cipsi step:', nb_iter + print*,'======================' + print*,'' + print*,'********** cipsi step **********' + ! cispi calculation + call run_stochastic_cipsi + + ! State average energy after the cipsi step + call state_average_energy(e_cipsi) + + print*,'' + print*,'********** optimization step **********' + ! orbital optimization + call run_orb_opt_trust_v2 + + ! State average energy after the orbital optimization + call state_average_energy(e_opt) + + print*,'' + print*,'********** diff step **********' + ! Gain in energy + delta_e = e_opt - e_cipsi + print*, 'Gain in energy during the orbital optimization:', delta_e + + open(unit=10, file=trim(ezfio_filename)//'/mo_optimization/result_opt', position='append') + write(10,'(I10, 3F15.7)') n_det, e_cipsi, e_opt, delta_e + close(10) + + ! Exit + if (delta_e > 1d-12) then + print*, 'WARNING, something wrong happened' + print*, 'The gain (delta_e) in energy during the optimization process' + print*, 'is > 0, but it must be < 0' + print*, 'The program will exit' + exit + endif + + if (n_det > n_det_max_opt) then + print*, 'The number of determinants in the wf > n_det_max_opt' + print*, 'The program will exit' + exit + endif + + ! To double the number of determinants in the wf + N_det_max = int(dble(n_det * 2)*0.9) + TOUCH N_det_max + + nb_iter = nb_iter + 1 + enddo + +end + +#+END_SRC diff --git a/src/mo_optimization/org/orb_opt_trust_v2.org b/src/mo_optimization/org/orb_opt_trust_v2.org new file mode 100644 index 00000000..dbcd3c19 --- /dev/null +++ b/src/mo_optimization/org/orb_opt_trust_v2.org @@ -0,0 +1,349 @@ +* Orbital optimization program + +This is an optimization program for molecular orbitals. It produces +orbital rotations in order to lower the energy of a truncated wave +function. +This program just optimize the orbitals for a fixed number of +determinants. This optimization process must be repeated for different +number of determinants. + +#+BEGIN_SRC f90 :comments org :tangle orb_opt.irp.f +#+END_SRC + +* Main program : orb_opt_trust + +#+BEGIN_SRC f90 :comments org :tangle orb_opt.irp.f +program orb_opt + read_wf = .true. ! must be True for the orbital optimization !!! + TOUCH read_wf + io_mo_two_e_integrals = 'None' + TOUCH io_mo_two_e_integrals + call run_orb_opt_trust_v2 +end +#+END_SRC + +* Subroutine : run_orb_opt_trust + +#+BEGIN_SRC f90 :comments org :tangle run_orb_opt_trust_v2.irp.f + +#+END_SRC + +Subroutine to optimize the MOs using a trust region algorithm: +- choice of the method +- initialization +- optimization until convergence + +The optimization use the trust region algorithm, the different parts +are explained in the corresponding subroutine files. + +qp_edit: +| thresh_opt_max_elem_grad | +| optimization_max_nb_iter | +| optimization_method | + +Provided: +| mo_num | integer | number of MOs | +| ao_num | integer | number of AOs | +| N_states | integer | number of states | +| ci_energy(N_states) | double precision | CI energies | +| state_average_weight(N_states) | double precision | Weight of the different states | + +Variables: +| m | integer | number of active MOs | +| tmp_n | integer | m*(m-1)/2, number of MO parameters | +| tmp_n2 | integer | m*(m-1)/2 or 1 if the hessian is diagonal | +| v_grad(tmp_n) | double precision | gradient | +| H(tmp_n,tmp_n) | double precision | hessian (2D) | +| h_f(m,m,m,m) | double precision | hessian (4D) | +| e_val(m) | double precision | eigenvalues of the hessian | +| w(m,m) | double precision | eigenvectors of the hessian | +| x(m) | double precision | step given by the trust region | +| m_x(m,m) | double precision | step given by the trust region after | +| tmp_R(m,m) | double precision | rotation matrix for active MOs | +| R(mo_num,mo_num) | double precision | full rotation matrix | +| prev_mos(ao_num,mo_num) | double precision | previous MOs (before the rotation) | +| new_mos(ao_num,mo_num) | double precision | new MOs (after the roration) | +| delta | double precision | radius of the trust region | +| rho | double precision | agreement between the model and the exact function | +| max_elem | double precision | maximum element in the gradient | +| i | integer | index | +| tmp_i,tmp_j | integer | indexes in the subspace containing only | +| | | the active MOs | +| converged | logical | convergence of the algorithm | +| cancel_step | logical | if the step must be cancelled | +| nb_iter | integer | number of iterations (accepted) | +| nb_diag | integer | number of diagonalizations of the CI matrix | +| nb_cancel | integer | number of cancelled steps for the actual iteration | +| nb_cancel_tot | integer | total number of cancel steps | +| info | integer | if 0 ok, else problem in the diagonalization of | +| | | the hessian with the Lapack routine | +| criterion | double precision | energy at a given step | +| prev_criterion | double precision | energy before the rotation | +| criterion_model | double precision | estimated energy after the rotation using | +| | | a Taylor series | +| must_exit | logical | To exit the trust region algorithm when | +| | | criterion - criterion_model is too small | +| enforce_step_cancellation | logical | To force the cancellation of the step if the | +| | | error in the rotation matrix is too large | + +#+BEGIN_SRC f90 :comments org :tangle run_orb_opt_trust_v2.irp.f +subroutine run_orb_opt_trust_v2 + + include 'constants.h' + + implicit none + + BEGIN_DOC + ! Orbital optimization + END_DOC + + ! Variables + + double precision, allocatable :: R(:,:) + double precision, allocatable :: H(:,:),h_f(:,:,:,:) + double precision, allocatable :: v_grad(:) + double precision, allocatable :: prev_mos(:,:),new_mos(:,:) + integer :: info + integer :: n + integer :: i,j,p,q,k + double precision :: max_elem_grad, delta, rho, norm_grad, normalization_factor + logical :: cancel_step + integer :: nb_iter, nb_diag, nb_cancel, nb_cancel_tot, nb_sub_iter + double precision :: t1, t2, t3 + double precision :: prev_criterion, criterion, criterion_model + logical :: not_converged, must_exit, enforce_step_cancellation + integer :: m, tmp_n, tmp_i, tmp_j, tmp_k, tmp_n2 + integer,allocatable :: tmp_list(:), key(:) + double precision, allocatable :: tmp_m_x(:,:),tmp_R(:,:), tmp_x(:), W(:,:), e_val(:) + + PROVIDE mo_two_e_integrals_in_map ci_energy psi_det psi_coef +#+END_SRC + +** Allocation + +#+BEGIN_SRC f90 :comments org :tangle run_orb_opt_trust_v2.irp.f + allocate(R(mo_num,mo_num)) ! rotation matrix + allocate(prev_mos(ao_num,mo_num), new_mos(ao_num,mo_num)) ! old and new MOs + + ! Definition of m and tmp_n + m = dim_list_act_orb + tmp_n = m*(m-1)/2 + + allocate(tmp_list(m)) + allocate(tmp_R(m,m), tmp_m_x(m,m), tmp_x(tmp_n)) + allocate(e_val(tmp_n),key(tmp_n),v_grad(tmp_n)) + +#+END_SRC + +** Method + There are three different methods : + - the "full" hessian, which uses all the elements of the hessian + matrix" + - the "diagonal" hessian, which uses only the diagonal elements of the + hessian + - without the hessian (hessian = identity matrix) + +#+BEGIN_SRC f90 :comments org :tangle run_orb_opt_trust_v2.irp.f + !Display the method + print*, 'Method :', optimization_method + if (optimization_method == 'full') then + print*, 'Full hessian' + allocate(H(tmp_n,tmp_n), h_f(m,m,m,m),W(tmp_n,tmp_n)) + tmp_n2 = tmp_n + elseif (optimization_method == 'diag') then + print*,'Diagonal hessian' + allocate(H(tmp_n,1),W(tmp_n,1)) + tmp_n2 = 1 + elseif (optimization_method == 'none') then + print*,'No hessian' + allocate(H(tmp_n,1),W(tmp_n,1)) + tmp_n2 = 1 + else + print*,'Unknown optimization_method, please select full, diag or none' + call abort + endif + print*, 'Absolute value of the hessian:', absolute_eig +#+END_SRC + +** Calculations +*** Algorithm + +Here is the main algorithm of the optimization: +- First of all we initialize some parameters and we compute the + criterion (the ci energy) before doing any MO rotations +- We compute the gradient and the hessian for the active MOs +- We diagonalize the hessian +- We compute a step and loop to reduce the radius of the + trust region (and the size of the step by the way) until the step is + accepted +- We repeat the process until the convergence + NB: the convergence criterion can be changed + +#+BEGIN_SRC f90 :comments org :tangle run_orb_opt_trust_v2.irp.f + ! Loop until the convergence of the optimization + ! call diagonalize_ci + + !### Initialization ### + nb_iter = 0 + rho = 0.5d0 + not_converged = .True. + tmp_list = list_act ! Optimization of the active MOs + nb_cancel_tot = 0 + + ! Renormalization of the weights of the states + call state_weight_normalization + + ! Compute the criterion before the loop + call state_average_energy(prev_criterion) + + do while (not_converged) + print*,'' + print*,'******************' + print*,'Iteration', nb_iter + print*,'******************' + print*,'' + + ! Gradient + call gradient_list_opt(tmp_n, m, tmp_list, v_grad, max_elem_grad, norm_grad) + + ! Hessian + if (optimization_method == 'full') then + ! Full hessian + call hessian_list_opt(tmp_n, m, tmp_list, H, h_f) + + ! Diagonalization of the hessian + call diagonalization_hessian(tmp_n, H, e_val, w) + + elseif (optimization_method == 'diag') then + ! Diagonal hessian + call diag_hessian_list_opt(tmp_n, m, tmp_list, H) + else + ! Identity matrix + do tmp_i = 1, tmp_n + H(tmp_i,1) = 1d0 + enddo + endif + + if (optimization_method /= 'full') then + ! Sort + do tmp_i = 1, tmp_n + key(tmp_i) = tmp_i + e_val(tmp_i) = H(tmp_i,1) + enddo + call dsort(e_val,key,tmp_n) + + ! Eigenvalues and eigenvectors + do tmp_i = 1, tmp_n + w(tmp_i,1) = dble(key(tmp_i)) + enddo + + endif + + ! Init before the internal loop + cancel_step = .True. ! To enter in the loop just after + nb_cancel = 0 + nb_sub_iter = 0 + + ! Loop to reduce the trust radius until the criterion decreases and rho >= thresh_rho + do while (cancel_step) + print*,'' + print*,'-----------------------------' + print*,'Iteration: ', nb_iter + print*,'Sub iteration:', nb_sub_iter + print*,'Max elem grad:', max_elem_grad + print*,'-----------------------------' + + ! Hessian,gradient,Criterion -> x + call trust_region_step_w_expected_e(tmp_n,tmp_n2,H,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,tmp_x,must_exit) + + if (must_exit) then + print*,'step_in_trust_region sends: Exit' + exit + endif + + ! 1D tmp -> 2D tmp + call vec_to_mat_v2(tmp_n, m, tmp_x, tmp_m_x) + + ! Rotation matrix for the active MOs + call rotation_matrix(tmp_m_x, m, tmp_R, m, m, info, enforce_step_cancellation) + + ! Security to ensure an unitary transformation + if (enforce_step_cancellation) then + print*, 'Step cancellation, too large error in the rotation matrix' + rho = 0d0 + cycle + endif + + ! tmp_R to R, subspace to full space + call sub_to_full_rotation_matrix(m, tmp_list, tmp_R, R) + + ! MO rotations + call apply_mo_rotation(R, prev_mos) + + ! Update of the energy before the diagonalization of the hamiltonian + call clear_mo_map + TOUCH mo_coef psi_det psi_coef ci_energy two_e_dm_mo + call state_average_energy(criterion) + + ! Criterion -> step accepted or rejected + call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, criterion_model, rho, cancel_step) + + ! Cancellation of the step if necessary + if (cancel_step) then + mo_coef = prev_mos + call save_mos() + nb_cancel = nb_cancel + 1 + nb_cancel_tot = nb_cancel_tot + 1 + else + ! Diagonalization of the hamiltonian + FREE ci_energy! To enforce the recomputation + call diagonalize_ci + call save_wavefunction_unsorted + + ! Energy obtained after the diagonalization of the CI matrix + call state_average_energy(prev_criterion) + endif + + nb_sub_iter = nb_sub_iter + 1 + enddo + call save_mos() !### depend of the time for 1 iteration + + ! To exit the external loop if must_exit = .True. + if (must_exit) then + exit + endif + + ! Step accepted, nb iteration + 1 + nb_iter = nb_iter + 1 + + ! External loop exit conditions + if (DABS(max_elem_grad) < thresh_opt_max_elem_grad) then + print*,'Converged: DABS(max_elem_grad) < thresh_opt_max_elem_grad' + not_converged = .False. + endif + if (nb_iter >= optimization_max_nb_iter) then + print*,'Not converged: nb_iter >= optimization_max_nb_iter' + not_converged = .False. + endif + + if (.not. not_converged) then + print*,'#############################' + print*,' End of the optimization' + print*,'#############################' + endif + enddo + +#+END_SRC + +** Deallocation, end + +#+BEGIN_SRC f90 :comments org :tangle run_orb_opt_trust_v2.irp.f + deallocate(v_grad,H,R,W,e_val) + deallocate(prev_mos,new_mos) + if (optimization_method == 'full') then + deallocate(h_f) + endif + +end +#+END_SRC + diff --git a/src/mo_optimization/org/state_average_energy.org b/src/mo_optimization/org/state_average_energy.org new file mode 100644 index 00000000..b779845a --- /dev/null +++ b/src/mo_optimization/org/state_average_energy.org @@ -0,0 +1,73 @@ +* State average energy + +Calculation of the state average energy from the integrals and the +density matrices. + +\begin{align*} +E = \sum_{ij} h_{ij} \gamma_{ij} + \frac{1}{2} v_{ij}^{kl} \Gamma_{ij}^{kl} +\end{align*} +$h_{ij}$: mono-electronic integral +$\gamma_{ij}$: one electron density matrix +$v_{ij}^{kl}$: bi-electronic integral +$\Gamma_{ij}^{kl}$: two electrons density matrix + +TODO: OMP version + +PROVIDED: +| mo_one_e_integrals | double precision | mono-electronic integrals | +| get_two_e_integral | double precision | bi-electronic integrals | +| one_e_dm_mo | double precision | one electron density matrix | +| two_e_dm_mo | double precision | two electrons density matrix | +| nuclear_repulsion | double precision | nuclear repulsion | +| mo_num | integer | number of MOs | + +Output: +| energy | double precision | state average energy | + +Internal: +| mono_e | double precision | mono-electronic energy | +| bi_e | double precision | bi-electronic energy | +| i,j,k,l | integer | indexes to loop over the MOs | + +#+BEGIN_SRC f90 :comments org :tangle state_average_energy.irp.f +subroutine state_average_energy(energy) + + implicit none + + double precision, intent(out) :: energy + + double precision :: get_two_e_integral + double precision :: mono_e, bi_e + integer :: i,j,k,l + + ! mono electronic part + mono_e = 0d0 + do j = 1, mo_num + do i = 1, mo_num + mono_e = mono_e + mo_one_e_integrals(i,j) * one_e_dm_mo(i,j) + enddo + enddo + + ! bi electronic part + bi_e = 0d0 + do l = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do i = 1, mo_num + bi_e = bi_e + get_two_e_integral(i,j,k,l,mo_integrals_map) * two_e_dm_mo(i,j,k,l) + enddo + enddo + enddo + enddo + + ! State average energy + energy = mono_e + 0.5d0 * bi_e + nuclear_repulsion + + ! Check + !call print_energy_components + + print*,'State average energy:', energy + !print*,ci_energy + +end +#+END_SRC diff --git a/src/mo_optimization/org/state_weight_normalization.org b/src/mo_optimization/org/state_weight_normalization.org new file mode 100644 index 00000000..492ad3d4 --- /dev/null +++ b/src/mo_optimization/org/state_weight_normalization.org @@ -0,0 +1,31 @@ +#+BEGIN_SRC f90 :comments org :tangle state_weight_normalization.irp.f +subroutine state_weight_normalization + + implicit none + + BEGIN_DOC + ! Renormalization of the state weights or enforcing state average + ! weights for orbital optimization + END_DOC + + integer :: i + double precision :: normalization_factor + + ! To normalize the sum of the state weights + normalization_factor = 0d0 + do i = 1, N_states + normalization_factor = normalization_factor + state_average_weight(i) + enddo + normalization_factor = 1d0 / normalization_factor + + do i = 1, N_states + state_average_weight(i) = state_average_weight(i) * normalization_factor + enddo + TOUCH state_average_weight + + print*, 'Number of states:', N_states + print*, 'State average weights:' + print*, state_average_weight(:) + +end +#+END_SRC diff --git a/src/mo_optimization/org/update_parameters.org b/src/mo_optimization/org/update_parameters.org new file mode 100644 index 00000000..cd9b9595 --- /dev/null +++ b/src/mo_optimization/org/update_parameters.org @@ -0,0 +1,16 @@ +Subroutine toupdate the parameters. +Ex: TOUCH mo_coef ... + +#+BEGIN_SRC f90 :comments org :tangle update_parameters.irp.f +subroutine update_parameters() + + implicit none + + !### TODO + ! Touch yours parameters + call clear_mo_map + TOUCH mo_coef psi_det psi_coef + call diagonalize_ci + call save_wavefunction_unsorted +end +#+END_SRC diff --git a/src/mo_optimization/org/update_st_av_ci_energy.org b/src/mo_optimization/org/update_st_av_ci_energy.org new file mode 100644 index 00000000..2dc7f3ee --- /dev/null +++ b/src/mo_optimization/org/update_st_av_ci_energy.org @@ -0,0 +1,26 @@ +* Update the CI state average energy + +Computes the state average energy +\begin{align*} +E =\sum_{i=1}^{N_{states}} E_i . w_i +\end{align*} + +$E_i$: energy of state i +$w_i$: weight of state i + +#+BEGIN_SRC f90 :comments org :tangle update_st_av_ci_energy.irp.f +subroutine update_st_av_ci_energy(energy) + + implicit none + + double precision, intent(out) :: energy + integer :: i + + energy = 0d0 + do i = 1, N_states + energy = energy + ci_energy(i) * state_average_weight(i) + enddo + + print*, 'ci_energy :', energy +end +#+END_SRC diff --git a/src/mo_optimization/run_orb_opt_trust_v2.irp.f b/src/mo_optimization/run_orb_opt_trust_v2.irp.f new file mode 100644 index 00000000..e1431255 --- /dev/null +++ b/src/mo_optimization/run_orb_opt_trust_v2.irp.f @@ -0,0 +1,317 @@ +! Subroutine : run_orb_opt_trust + + + + + + +! Subroutine to optimize the MOs using a trust region algorithm: +! - choice of the method +! - initialization +! - optimization until convergence + +! The optimization use the trust region algorithm, the different parts +! are explained in the corresponding subroutine files. + +! qp_edit: +! | thresh_opt_max_elem_grad | +! | optimization_max_nb_iter | +! | optimization_method | + +! Provided: +! | mo_num | integer | number of MOs | +! | ao_num | integer | number of AOs | +! | N_states | integer | number of states | +! | ci_energy(N_states) | double precision | CI energies | +! | state_average_weight(N_states) | double precision | Weight of the different states | + +! Variables: +! | m | integer | number of active MOs | +! | tmp_n | integer | m*(m-1)/2, number of MO parameters | +! | tmp_n2 | integer | m*(m-1)/2 or 1 if the hessian is diagonal | +! | v_grad(tmp_n) | double precision | gradient | +! | H(tmp_n,tmp_n) | double precision | hessian (2D) | +! | h_f(m,m,m,m) | double precision | hessian (4D) | +! | e_val(m) | double precision | eigenvalues of the hessian | +! | w(m,m) | double precision | eigenvectors of the hessian | +! | x(m) | double precision | step given by the trust region | +! | m_x(m,m) | double precision | step given by the trust region after | +! | tmp_R(m,m) | double precision | rotation matrix for active MOs | +! | R(mo_num,mo_num) | double precision | full rotation matrix | +! | prev_mos(ao_num,mo_num) | double precision | previous MOs (before the rotation) | +! | new_mos(ao_num,mo_num) | double precision | new MOs (after the roration) | +! | delta | double precision | radius of the trust region | +! | rho | double precision | agreement between the model and the exact function | +! | max_elem | double precision | maximum element in the gradient | +! | i | integer | index | +! | tmp_i,tmp_j | integer | indexes in the subspace containing only | +! | | | the active MOs | +! | converged | logical | convergence of the algorithm | +! | cancel_step | logical | if the step must be cancelled | +! | nb_iter | integer | number of iterations (accepted) | +! | nb_diag | integer | number of diagonalizations of the CI matrix | +! | nb_cancel | integer | number of cancelled steps for the actual iteration | +! | nb_cancel_tot | integer | total number of cancel steps | +! | info | integer | if 0 ok, else problem in the diagonalization of | +! | | | the hessian with the Lapack routine | +! | criterion | double precision | energy at a given step | +! | prev_criterion | double precision | energy before the rotation | +! | criterion_model | double precision | estimated energy after the rotation using | +! | | | a Taylor series | +! | must_exit | logical | To exit the trust region algorithm when | +! | | | criterion - criterion_model is too small | +! | enforce_step_cancellation | logical | To force the cancellation of the step if the | +! | | | error in the rotation matrix is too large | + + +subroutine run_orb_opt_trust_v2 + + include 'constants.h' + + implicit none + + BEGIN_DOC + ! Orbital optimization + END_DOC + + ! Variables + + double precision, allocatable :: R(:,:) + double precision, allocatable :: H(:,:),h_f(:,:,:,:) + double precision, allocatable :: v_grad(:) + double precision, allocatable :: prev_mos(:,:),new_mos(:,:) + integer :: info + integer :: n + integer :: i,j,p,q,k + double precision :: max_elem_grad, delta, rho, norm_grad, normalization_factor + logical :: cancel_step + integer :: nb_iter, nb_diag, nb_cancel, nb_cancel_tot, nb_sub_iter + double precision :: t1, t2, t3 + double precision :: prev_criterion, criterion, criterion_model + logical :: not_converged, must_exit, enforce_step_cancellation + integer :: m, tmp_n, tmp_i, tmp_j, tmp_k, tmp_n2 + integer,allocatable :: tmp_list(:), key(:) + double precision, allocatable :: tmp_m_x(:,:),tmp_R(:,:), tmp_x(:), W(:,:), e_val(:) + + PROVIDE mo_two_e_integrals_in_map ci_energy psi_det psi_coef + +! Allocation + + +allocate(R(mo_num,mo_num)) ! rotation matrix +allocate(prev_mos(ao_num,mo_num), new_mos(ao_num,mo_num)) ! old and new MOs + +! Definition of m and tmp_n +m = dim_list_act_orb +tmp_n = m*(m-1)/2 + +allocate(tmp_list(m)) +allocate(tmp_R(m,m), tmp_m_x(m,m), tmp_x(tmp_n)) +allocate(e_val(tmp_n),key(tmp_n),v_grad(tmp_n)) + +! Method +! There are three different methods : +! - the "full" hessian, which uses all the elements of the hessian +! matrix" +! - the "diagonal" hessian, which uses only the diagonal elements of the +! hessian +! - without the hessian (hessian = identity matrix) + + +!Display the method + print*, 'Method :', optimization_method +if (optimization_method == 'full') then + print*, 'Full hessian' + allocate(H(tmp_n,tmp_n), h_f(m,m,m,m),W(tmp_n,tmp_n)) + tmp_n2 = tmp_n +elseif (optimization_method == 'diag') then + print*,'Diagonal hessian' + allocate(H(tmp_n,1),W(tmp_n,1)) + tmp_n2 = 1 +elseif (optimization_method == 'none') then + print*,'No hessian' + allocate(H(tmp_n,1),W(tmp_n,1)) + tmp_n2 = 1 +else + print*,'Unknown optimization_method, please select full, diag or none' + call abort +endif +print*, 'Absolute value of the hessian:', absolute_eig + +! Algorithm + +! Here is the main algorithm of the optimization: +! - First of all we initialize some parameters and we compute the +! criterion (the ci energy) before doing any MO rotations +! - We compute the gradient and the hessian for the active MOs +! - We diagonalize the hessian +! - We compute a step and loop to reduce the radius of the +! trust region (and the size of the step by the way) until the step is +! accepted +! - We repeat the process until the convergence +! NB: the convergence criterion can be changed + + +! Loop until the convergence of the optimization +! call diagonalize_ci + +!### Initialization ### +nb_iter = 0 +rho = 0.5d0 +not_converged = .True. +tmp_list = list_act ! Optimization of the active MOs +nb_cancel_tot = 0 + +! Renormalization of the weights of the states +call state_weight_normalization + +! Compute the criterion before the loop +call state_average_energy(prev_criterion) + +do while (not_converged) + print*,'' + print*,'******************' + print*,'Iteration', nb_iter + print*,'******************' + print*,'' + + ! Gradient + call gradient_list_opt(tmp_n, m, tmp_list, v_grad, max_elem_grad, norm_grad) + + ! Hessian + if (optimization_method == 'full') then + ! Full hessian + call hessian_list_opt(tmp_n, m, tmp_list, H, h_f) + + ! Diagonalization of the hessian + call diagonalization_hessian(tmp_n, H, e_val, w) + + elseif (optimization_method == 'diag') then + ! Diagonal hessian + call diag_hessian_list_opt(tmp_n, m, tmp_list, H) + else + ! Identity matrix + do tmp_i = 1, tmp_n + H(tmp_i,1) = 1d0 + enddo + endif + + if (optimization_method /= 'full') then + ! Sort + do tmp_i = 1, tmp_n + key(tmp_i) = tmp_i + e_val(tmp_i) = H(tmp_i,1) + enddo + call dsort(e_val,key,tmp_n) + + ! Eigenvalues and eigenvectors + do tmp_i = 1, tmp_n + w(tmp_i,1) = dble(key(tmp_i)) + enddo + + endif + + ! Init before the internal loop + cancel_step = .True. ! To enter in the loop just after + nb_cancel = 0 + nb_sub_iter = 0 + + ! Loop to reduce the trust radius until the criterion decreases and rho >= thresh_rho + do while (cancel_step) + print*,'' + print*,'-----------------------------' + print*,'Iteration: ', nb_iter + print*,'Sub iteration:', nb_sub_iter + print*,'Max elem grad:', max_elem_grad + print*,'-----------------------------' + + ! Hessian,gradient,Criterion -> x + call trust_region_step_w_expected_e(tmp_n,tmp_n2,H,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,tmp_x,must_exit) + + if (must_exit) then + print*,'step_in_trust_region sends: Exit' + exit + endif + + ! 1D tmp -> 2D tmp + call vec_to_mat_v2(tmp_n, m, tmp_x, tmp_m_x) + + ! Rotation matrix for the active MOs + call rotation_matrix(tmp_m_x, m, tmp_R, m, m, info, enforce_step_cancellation) + + ! Security to ensure an unitary transformation + if (enforce_step_cancellation) then + print*, 'Step cancellation, too large error in the rotation matrix' + rho = 0d0 + cycle + endif + + ! tmp_R to R, subspace to full space + call sub_to_full_rotation_matrix(m, tmp_list, tmp_R, R) + + ! MO rotations + call apply_mo_rotation(R, prev_mos) + + ! Update of the energy before the diagonalization of the hamiltonian + call clear_mo_map + TOUCH mo_coef psi_det psi_coef ci_energy two_e_dm_mo + call state_average_energy(criterion) + + ! Criterion -> step accepted or rejected + call trust_region_is_step_cancelled(nb_iter, prev_criterion, criterion, criterion_model, rho, cancel_step) + + ! Cancellation of the step if necessary + if (cancel_step) then + mo_coef = prev_mos + call save_mos() + nb_cancel = nb_cancel + 1 + nb_cancel_tot = nb_cancel_tot + 1 + else + ! Diagonalization of the hamiltonian + FREE ci_energy! To enforce the recomputation + call diagonalize_ci + call save_wavefunction_unsorted + + ! Energy obtained after the diagonalization of the CI matrix + call state_average_energy(prev_criterion) + endif + + nb_sub_iter = nb_sub_iter + 1 + enddo + call save_mos() !### depend of the time for 1 iteration + + ! To exit the external loop if must_exit = .True. + if (must_exit) then + exit + endif + + ! Step accepted, nb iteration + 1 + nb_iter = nb_iter + 1 + + ! External loop exit conditions + if (DABS(max_elem_grad) < thresh_opt_max_elem_grad) then + print*,'Converged: DABS(max_elem_grad) < thresh_opt_max_elem_grad' + not_converged = .False. + endif + if (nb_iter >= optimization_max_nb_iter) then + print*,'Not converged: nb_iter >= optimization_max_nb_iter' + not_converged = .False. + endif + + if (.not. not_converged) then + print*,'#############################' + print*,' End of the optimization' + print*,'#############################' + endif +enddo + +! Deallocation, end + + +deallocate(v_grad,H,R,W,e_val) + deallocate(prev_mos,new_mos) + if (optimization_method == 'full') then + deallocate(h_f) + endif + +end diff --git a/src/mo_optimization/save_energy.irp.f b/src/mo_optimization/save_energy.irp.f new file mode 100644 index 00000000..5dac8da9 --- /dev/null +++ b/src/mo_optimization/save_energy.irp.f @@ -0,0 +1,9 @@ +subroutine save_energy(E,pt2) + implicit none + BEGIN_DOC +! Saves the energy in |EZFIO|. + END_DOC + double precision, intent(in) :: E(N_states), pt2(N_states) + call ezfio_set_fci_energy(E(1:N_states)) + call ezfio_set_fci_energy_pt2(E(1:N_states)+pt2(1:N_states)) +end diff --git a/src/mo_optimization/state_average_energy.irp.f b/src/mo_optimization/state_average_energy.irp.f new file mode 100644 index 00000000..2cd063da --- /dev/null +++ b/src/mo_optimization/state_average_energy.irp.f @@ -0,0 +1,72 @@ +! State average energy + +! Calculation of the state average energy from the integrals and the +! density matrices. + +! \begin{align*} +! E = \sum_{ij} h_{ij} \gamma_{ij} + \frac{1}{2} v_{ij}^{kl} \Gamma_{ij}^{kl} +! \end{align*} +! $h_{ij}$: mono-electronic integral +! $\gamma_{ij}$: one electron density matrix +! $v_{ij}^{kl}$: bi-electronic integral +! $\Gamma_{ij}^{kl}$: two electrons density matrix + +! TODO: OMP version + +! PROVIDED: +! | mo_one_e_integrals | double precision | mono-electronic integrals | +! | get_two_e_integral | double precision | bi-electronic integrals | +! | one_e_dm_mo | double precision | one electron density matrix | +! | two_e_dm_mo | double precision | two electrons density matrix | +! | nuclear_repulsion | double precision | nuclear repulsion | +! | mo_num | integer | number of MOs | + +! Output: +! | energy | double precision | state average energy | + +! Internal: +! | mono_e | double precision | mono-electronic energy | +! | bi_e | double precision | bi-electronic energy | +! | i,j,k,l | integer | indexes to loop over the MOs | + + +subroutine state_average_energy(energy) + + implicit none + + double precision, intent(out) :: energy + + double precision :: get_two_e_integral + double precision :: mono_e, bi_e + integer :: i,j,k,l + + ! mono electronic part + mono_e = 0d0 + do j = 1, mo_num + do i = 1, mo_num + mono_e = mono_e + mo_one_e_integrals(i,j) * one_e_dm_mo(i,j) + enddo + enddo + + ! bi electronic part + bi_e = 0d0 + do l = 1, mo_num + do k = 1, mo_num + do j = 1, mo_num + do i = 1, mo_num + bi_e = bi_e + get_two_e_integral(i,j,k,l,mo_integrals_map) * two_e_dm_mo(i,j,k,l) + enddo + enddo + enddo + enddo + + ! State average energy + energy = mono_e + 0.5d0 * bi_e + nuclear_repulsion + + ! Check + !call print_energy_components + + print*,'State average energy:', energy + !print*,ci_energy + +end diff --git a/src/mo_optimization/state_weight_normalization.irp.f b/src/mo_optimization/state_weight_normalization.irp.f new file mode 100644 index 00000000..27d30af7 --- /dev/null +++ b/src/mo_optimization/state_weight_normalization.irp.f @@ -0,0 +1,29 @@ +subroutine state_weight_normalization + + implicit none + + BEGIN_DOC + ! Renormalization of the state weights or enforcing state average + ! weights for orbital optimization + END_DOC + + integer :: i + double precision :: normalization_factor + + ! To normalize the sum of the state weights + normalization_factor = 0d0 + do i = 1, N_states + normalization_factor = normalization_factor + state_average_weight(i) + enddo + normalization_factor = 1d0 / normalization_factor + + do i = 1, N_states + state_average_weight(i) = state_average_weight(i) * normalization_factor + enddo + TOUCH state_average_weight + + print*, 'Number of states:', N_states + print*, 'State average weights:' + print*, state_average_weight(:) + +end diff --git a/src/mo_optimization/update_parameters.irp.f b/src/mo_optimization/update_parameters.irp.f new file mode 100644 index 00000000..88e8fc34 --- /dev/null +++ b/src/mo_optimization/update_parameters.irp.f @@ -0,0 +1,15 @@ +! Subroutine toupdate the parameters. +! Ex: TOUCH mo_coef ... + + +subroutine update_parameters() + + implicit none + + !### TODO + ! Touch yours parameters + call clear_mo_map + TOUCH mo_coef psi_det psi_coef + call diagonalize_ci + call save_wavefunction_unsorted +end diff --git a/src/mo_optimization/update_st_av_ci_energy.irp.f b/src/mo_optimization/update_st_av_ci_energy.irp.f new file mode 100644 index 00000000..18b72502 --- /dev/null +++ b/src/mo_optimization/update_st_av_ci_energy.irp.f @@ -0,0 +1,25 @@ +! Update the CI state average energy + +! Computes the state average energy +! \begin{align*} +! E =\sum_{i=1}^{N_{states}} E_i . w_i +! \end{align*} + +! $E_i$: energy of state i +! $w_i$: weight of state i + + +subroutine update_st_av_ci_energy(energy) + + implicit none + + double precision, intent(out) :: energy + integer :: i + + energy = 0d0 + do i = 1, N_states + energy = energy + ci_energy(i) * state_average_weight(i) + enddo + + print*, 'ci_energy :', energy +end diff --git a/src/utils_trust_region/README.md b/src/utils_trust_region/README.md new file mode 100644 index 00000000..72bfefef --- /dev/null +++ b/src/utils_trust_region/README.md @@ -0,0 +1,11 @@ +# Utils trust region + +The documentation can be found in the org files. + +# Org files +The org files are stored in the directory org in order to avoid overwriting on user changes. +The org files can be modified, to export the change to the source code, run +``` +./TANGLE_org_mode.sh +mv *.irp.f ../. +``` diff --git a/src/utils_trust_region/README.rst b/src/utils_trust_region/README.rst deleted file mode 100644 index 6a0689b6..00000000 --- a/src/utils_trust_region/README.rst +++ /dev/null @@ -1,5 +0,0 @@ -============ -trust_region -============ - -The documentation can be found in the org files. diff --git a/src/utils_trust_region/algo_trust.irp.f b/src/utils_trust_region/algo_trust.irp.f index eac17275..933d8eff 100644 --- a/src/utils_trust_region/algo_trust.irp.f +++ b/src/utils_trust_region/algo_trust.irp.f @@ -133,19 +133,19 @@ ! | must_exit | logical | If the program must exit the loop | -subroutine trust_region_step_w_expected_e(n,H,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,x,must_exit) +subroutine trust_region_step_w_expected_e(n,n2,H,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,x,must_exit) include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute the step and the expected criterion/energy after the step - END_DOC + !END_DOC implicit none ! in - integer, intent(in) :: n, nb_iter - double precision, intent(in) :: H(n,n), W(n,n), v_grad(n) + integer, intent(in) :: n,n2, nb_iter + double precision, intent(in) :: H(n,n2), W(n,n2), v_grad(n) double precision, intent(in) :: rho, prev_criterion ! inout @@ -160,9 +160,9 @@ subroutine trust_region_step_w_expected_e(n,H,W,e_val,v_grad,prev_criterion,rho, must_exit = .False. - call trust_region_step(n,nb_iter,v_grad,rho,e_val,W,x,delta) + call trust_region_step(n,n2,nb_iter,v_grad,rho,e_val,W,x,delta) - call trust_region_expected_e(n,v_grad,H,x,prev_criterion,criterion_model) + call trust_region_expected_e(n,n2,v_grad,H,x,prev_criterion,criterion_model) ! exit if DABS(prev_criterion - criterion_model) < 1d-12 if (DABS(prev_criterion - criterion_model) < thresh_model) then @@ -210,9 +210,9 @@ subroutine trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, cri include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute if the step should be cancelled - END_DOC + !END_DOC implicit none diff --git a/src/utils_trust_region/apply_mo_rotation.irp.f b/src/utils_trust_region/apply_mo_rotation.irp.f index e274ec11..a313769d 100644 --- a/src/utils_trust_region/apply_mo_rotation.irp.f +++ b/src/utils_trust_region/apply_mo_rotation.irp.f @@ -25,9 +25,9 @@ subroutine apply_mo_rotation(R,prev_mos) include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute the new MOs knowing the rotation matrix - END_DOC + !END_DOC implicit none @@ -60,12 +60,12 @@ subroutine apply_mo_rotation(R,prev_mos) prev_mos = mo_coef mo_coef = new_mos - !if (debug) then - ! print*,'New mo_coef : ' - ! do i = 1, mo_num - ! write(*,'(100(F10.5))') mo_coef(i,:) - ! enddo - !endif + if (debug) then + print*,'New mo_coef : ' + do i = 1, mo_num + write(*,'(100(F10.5))') mo_coef(i,:) + enddo + endif ! Save the new MOs and change the label mo_label = 'MCSCF' diff --git a/src/utils_trust_region/org/TANGLE_org_mode.sh b/src/utils_trust_region/org/TANGLE_org_mode.sh new file mode 100755 index 00000000..059cbe7d --- /dev/null +++ b/src/utils_trust_region/org/TANGLE_org_mode.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +list='ls *.org' +for element in $list +do + emacs --batch $element -f org-babel-tangle +done diff --git a/src/utils_trust_region/algo_trust.org b/src/utils_trust_region/org/algo_trust.org similarity index 96% rename from src/utils_trust_region/algo_trust.org rename to src/utils_trust_region/org/algo_trust.org index aa836f98..01e99c29 100644 --- a/src/utils_trust_region/algo_trust.org +++ b/src/utils_trust_region/org/algo_trust.org @@ -132,19 +132,19 @@ Output: | must_exit | logical | If the program must exit the loop | #+BEGIN_SRC f90 :comments org :tangle algo_trust.irp.f -subroutine trust_region_step_w_expected_e(n,H,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,x,must_exit) +subroutine trust_region_step_w_expected_e(n,n2,H,W,e_val,v_grad,prev_criterion,rho,nb_iter,delta,criterion_model,x,must_exit) include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute the step and the expected criterion/energy after the step - END_DOC + !END_DOC implicit none ! in - integer, intent(in) :: n, nb_iter - double precision, intent(in) :: H(n,n), W(n,n), v_grad(n) + integer, intent(in) :: n,n2, nb_iter + double precision, intent(in) :: H(n,n2), W(n,n2), v_grad(n) double precision, intent(in) :: rho, prev_criterion ! inout @@ -159,9 +159,9 @@ subroutine trust_region_step_w_expected_e(n,H,W,e_val,v_grad,prev_criterion,rho, must_exit = .False. - call trust_region_step(n,nb_iter,v_grad,rho,e_val,W,x,delta) + call trust_region_step(n,n2,nb_iter,v_grad,rho,e_val,W,x,delta) - call trust_region_expected_e(n,v_grad,H,x,prev_criterion,criterion_model) + call trust_region_expected_e(n,n2,v_grad,H,x,prev_criterion,criterion_model) ! exit if DABS(prev_criterion - criterion_model) < 1d-12 if (DABS(prev_criterion - criterion_model) < thresh_model) then @@ -208,9 +208,9 @@ subroutine trust_region_is_step_cancelled(nb_iter,prev_criterion, criterion, cri include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute if the step should be cancelled - END_DOC + !END_DOC implicit none @@ -310,7 +310,7 @@ subroutine algo_trust_template(tmp_n, tmp_list_size, tmp_list) print*,'-----------------------------' ! Hessian,gradient,Criterion -> x - call trust_region_step_w_expected_e(tmp_n, H_PROVIDER, W, e_val, g_PROVIDER, & + call trust_region_step_w_expected_e(tmp_n,tmp_n, H_PROVIDER, W, e_val, g_PROVIDER, & prev_criterion, rho, nb_iter, delta, criterion_model, tmp_x, must_exit) if (must_exit) then @@ -489,7 +489,7 @@ subroutine algo_trust_cartesian_template(tmp_n) print*,'-----------------------------' ! Hessian,gradient,Criterion -> x - call trust_region_step_w_expected_e(tmp_n, H_PROVIDER, W, e_val, g_PROVIDER, & + call trust_region_step_w_expected_e(tmp_n,tmp_n, H_PROVIDER, W, e_val, g_PROVIDER, & prev_criterion, rho, nb_iter, delta, criterion_model, tmp_x, must_exit) if (must_exit) then diff --git a/src/utils_trust_region/apply_mo_rotation.org b/src/utils_trust_region/org/apply_mo_rotation.org similarity index 92% rename from src/utils_trust_region/apply_mo_rotation.org rename to src/utils_trust_region/org/apply_mo_rotation.org index 918581b7..955997e9 100644 --- a/src/utils_trust_region/apply_mo_rotation.org +++ b/src/utils_trust_region/org/apply_mo_rotation.org @@ -25,9 +25,9 @@ subroutine apply_mo_rotation(R,prev_mos) include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute the new MOs knowing the rotation matrix - END_DOC + !END_DOC implicit none @@ -60,12 +60,12 @@ subroutine apply_mo_rotation(R,prev_mos) prev_mos = mo_coef mo_coef = new_mos - !if (debug) then - ! print*,'New mo_coef : ' - ! do i = 1, mo_num - ! write(*,'(100(F10.5))') mo_coef(i,:) - ! enddo - !endif + if (debug) then + print*,'New mo_coef : ' + do i = 1, mo_num + write(*,'(100(F10.5))') mo_coef(i,:) + enddo + endif ! Save the new MOs and change the label mo_label = 'MCSCF' diff --git a/src/utils_trust_region/mat_to_vec_index.org b/src/utils_trust_region/org/mat_to_vec_index.org similarity index 100% rename from src/utils_trust_region/mat_to_vec_index.org rename to src/utils_trust_region/org/mat_to_vec_index.org diff --git a/src/utils_trust_region/rotation_matrix.org b/src/utils_trust_region/org/rotation_matrix.org similarity index 97% rename from src/utils_trust_region/rotation_matrix.org rename to src/utils_trust_region/org/rotation_matrix.org index 73ba0298..3b2ff437 100644 --- a/src/utils_trust_region/rotation_matrix.org +++ b/src/utils_trust_region/org/rotation_matrix.org @@ -61,10 +61,10 @@ subroutine rotation_matrix(A,LDA,R,LDR,n,info,enforce_step_cancellation) implicit none - BEGIN_DOC + !BEGIN_DOC ! Rotation matrix to rotate the molecular orbitals. ! If the rotation is too large the transformation is not unitary and must be cancelled. - END_DOC + !END_DOC include 'pi.h' @@ -188,7 +188,7 @@ subroutine rotation_matrix(A,LDA,R,LDR,n,info,enforce_step_cancellation) endif enddo enddo - print*,'max element in A', max_elem_A + !print*,'max element in A', max_elem_A if (ABS(max_elem_A) > 2 * pi) then print*,'' @@ -220,18 +220,16 @@ subroutine rotation_matrix(A,LDA,R,LDR,n,info,enforce_step_cancellation) lwork = 3*n-1 allocate(work(lwork,n)) - print*,'Starting diagonalization ...' + !print*,'Starting diagonalization ...' call dsyev('V','U',n,W,size(W,1),e_val,work,lwork,info2) deallocate(work) - if (info2 == 0) then - print*, 'Diagonalization : Done' - elseif (info2 < 0) then + if (info2 < 0) then print*, 'WARNING: error in the diagonalization' print*, 'Illegal value of the ', info2,'-th parameter' - else + elseif (info2 >0) then print*, "WARNING: Diagonalization failed to converge" endif #+END_SRC @@ -308,7 +306,7 @@ subroutine rotation_matrix(A,LDA,R,LDR,n,info,enforce_step_cancellation) max_elem = tau_m1(i,i) endif enddo - print*,'max elem tau^-1:', max_elem + !print*,'max elem tau^-1:', max_elem ! Debug !print*,'eigenvalues:' @@ -380,7 +378,7 @@ subroutine rotation_matrix(A,LDA,R,LDR,n,info,enforce_step_cancellation) call dgemm('N','T',n,n,n,1d0,R,size(R,1),R,size(R,1),-1d0,RR_t,size(RR_t,1)) norm = dnrm2(n*n,RR_t,1) - print*, 'Rotation matrix check, norm R.R^T = ', norm + !print*, 'Rotation matrix check, norm R.R^T = ', norm ! Debug !if (debug) then @@ -404,9 +402,9 @@ subroutine rotation_matrix(A,LDA,R,LDR,n,info,enforce_step_cancellation) enddo print*, 'Max error in R.R^T:', max_elem - print*, 'e_val(1):', e_val(1) - print*, 'e_val(n):', e_val(n) - print*, 'max elem in A:', max_elem_A + !print*, 'e_val(1):', e_val(1) + !print*, 'e_val(n):', e_val(n) + !print*, 'max elem in A:', max_elem_A if (ABS(max_elem) > 1d-12) then print*, 'WARNING: max error in R.R^T > 1d-12' diff --git a/src/utils_trust_region/org/rotation_matrix_iterative.org b/src/utils_trust_region/org/rotation_matrix_iterative.org new file mode 100644 index 00000000..f6cc9909 --- /dev/null +++ b/src/utils_trust_region/org/rotation_matrix_iterative.org @@ -0,0 +1,136 @@ +* Rotation matrix with the iterative method + +\begin{align*} +\textbf{R} = \sum_{k=0}^{\infty} \frac{1}{k!} \textbf{X}^k +\end{align*} + +!!! Doesn't work !!! + +#+BEGIN_SRC f90 :comments org :tangle rotation_matrix_iterative.irp.f +subroutine rotation_matrix_iterative(m,X,R) + + implicit none + + ! in + integer, intent(in) :: m + double precision, intent(in) :: X(m,m) + + ! out + double precision, intent(out) :: R(m,m) + + ! internal + double precision :: max_elem, pre_factor + double precision :: t1,t2,t3 + integer :: k,l,i,j + logical :: not_converged + double precision, allocatable :: RRT(:,:), A(:,:), B(:,:) + + ! Functions + integer :: factorial + + print*,'---rotation_matrix_iterative---' + call wall_time(t1) + + allocate(RRT(m,m),A(m,m),B(m,m)) + + ! k = 0 + R = 0d0 + do i = 1, m + R(i,i) = 1d0 + enddo + + ! k = 1 + R = R + X + + k = 2 + + not_converged = .True. + + do while (not_converged) + + pre_factor = 1d0/DBLE(factorial(k)) + if (pre_factor < 1d-15) then + print*,'pre factor=', pre_factor,'< 1d-15, exit' + exit + endif + + A = X + B = 0d0 + do l = 1, k-1 + call dgemm('N','N',m,m,m,1d0,X,size(X,1),A,size(A,1),0d0,B,size(B,1)) + A = B + enddo + + !print*,'B' + !do i = 1, m + ! print*,B(i,:) * 1d0/DBLE(factorial(k)) + !enddo + + R = R + pre_factor * B + + k = k + 1 + call dgemm('T','N',m,m,m,1d0,R,size(R,1),R,size(R,1),0d0,RRT,size(RRT,1)) + + !print*,'R' + !do i = 1, m + ! write(*,'(10(E12.5))') R(i,:) + !enddo + + do i = 1, m + RRT(i,i) = RRT(i,i) - 1d0 + enddo + + !print*,'RRT' + !do i = 1, m + ! write(*,'(10(E12.5))') RRT(i,:) + !enddo + + max_elem = 0d0 + do j = 1, m + do i = 1, m + if (dabs(RRT(i,j)) > max_elem) then + max_elem = dabs(RRT(i,j)) + endif + enddo + enddo + + print*, 'Iteration:', k + print*, 'Max error in R:', max_elem + + if (max_elem < 1d-12) then + not_converged = .False. + endif + + enddo + + deallocate(RRT,A,B) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in rotation matrix iterative:', t3 + print*,'---End roration_matrix_iterative---' + + +print*,'Does not work yet, abort' +call abort + +end +#+END_SRC + +** Factorial +#+BEGIN_SRC f90 :comments org :tangle rotation_matrix_iterative.irp.f +function factorial(n) + + implicit none + + integer, intent(in) :: n + integer :: factorial, k + + factorial = 1 + + do k = 1, n + factorial = factorial * k + enddo + +end +#+END_SRC diff --git a/src/utils_trust_region/sub_to_full_rotation_matrix.org b/src/utils_trust_region/org/sub_to_full_rotation_matrix.org similarity index 98% rename from src/utils_trust_region/sub_to_full_rotation_matrix.org rename to src/utils_trust_region/org/sub_to_full_rotation_matrix.org index 16434dc8..f0cf0bfc 100644 --- a/src/utils_trust_region/sub_to_full_rotation_matrix.org +++ b/src/utils_trust_region/org/sub_to_full_rotation_matrix.org @@ -32,9 +32,9 @@ Internal: #+BEGIN_SRC f90 :comments org :tangle sub_to_full_rotation_matrix.irp.f subroutine sub_to_full_rotation_matrix(m,tmp_list,tmp_R,R) - BEGIN_DOC + !BEGIN_DOC ! Compute the full rotation matrix from a smaller one - END_DOC + !END_DOC implicit none diff --git a/src/utils_trust_region/trust_region_expected_e.org b/src/utils_trust_region/org/trust_region_expected_e.org similarity index 66% rename from src/utils_trust_region/trust_region_expected_e.org rename to src/utils_trust_region/org/trust_region_expected_e.org index 58c8f804..9d2868fa 100644 --- a/src/utils_trust_region/trust_region_expected_e.org +++ b/src/utils_trust_region/org/trust_region_expected_e.org @@ -10,11 +10,12 @@ E_{k+1} = E_{k} + \textbf{g}_k^{T} \cdot \textbf{x}_{k+1} + \frac{1}{2} \cdot \t \end{align*} Input: -| n | integer | m*(m-1)/2 | -| v_grad(n) | double precision | gradient | -| H(n,n) | double precision | hessian | -| x(n) | double precision | Step in the trust region | -| prev_energy | double precision | previous energy | +| n | integer | m*(m-1)/2 | +| n2 | integer | m*(m-1)/2 or 1 if the hessian is diagonal | +| v_grad(n) | double precision | gradient | +| H(n,n) | double precision | hessian | +| x(n) | double precision | Step in the trust region | +| prev_energy | double precision | previous energy | Output: | e_model | double precision | predicted energy after the rotation of the MOs | @@ -29,21 +30,21 @@ Function: | ddot | double precision | dot product (Lapack) | #+BEGIN_SRC f90 :comments org :tangle trust_region_expected_e.irp.f -subroutine trust_region_expected_e(n,v_grad,H,x,prev_energy,e_model) +subroutine trust_region_expected_e(n,n2,v_grad,H,x,prev_energy,e_model) include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute the expected criterion/energy after the application of the step x - END_DOC + !END_DOC implicit none ! Variables ! in - integer, intent(in) :: n - double precision, intent(in) :: v_grad(n),H(n,n),x(n) + integer, intent(in) :: n,n2 + double precision, intent(in) :: v_grad(n),H(n,n2),x(n) double precision, intent(in) :: prev_energy ! out @@ -80,27 +81,34 @@ TODO: remove the dot products part_1 = ddot(n,v_grad,1,x,1) !if (debug) then - print*,'g.x : ', part_1 - !endif - + ! print*,'g.x : ', part_1 + !endif + ! Product H.x - call dgemv('N',n,n,1d0,H,size(H,1),x,1,0d0,part_2a,1) + if (n == n2) then + call dgemv('N',n,n,1d0,H,size(H,1),x,1,0d0,part_2a,1) + else + ! If the hessian is diagonal + do i = 1, n + part_2a(i) = H(i,1) * x(i) + enddo + endif ! Product 1/2 . x^T.H.x part_2 = 0.5d0 * ddot(n,x,1,part_2a,1) !if (debug) then - print*,'1/2*x^T.H.x : ', part_2 + ! print*,'1/2*x^T.H.x : ', part_2 !endif - print*,'prev_energy', prev_energy ! Sum e_model = prev_energy + part_1 + part_2 ! Writing the predicted energy - print*, 'Predicted energy after the rotation : ', e_model - print*, 'Previous energy - predicted energy:', prev_energy - e_model + print*, 'prev_energy: ', prev_energy + print*, 'Predicted energy after the rotation:', e_model + print*, 'Previous energy - predicted energy: ', prev_energy - e_model ! Can be deleted, already in another subroutine if (DABS(prev_energy - e_model) < 1d-12 ) then @@ -115,7 +123,6 @@ TODO: remove the dot products print*,'Time in trust e model:', t3 print*,'---End trust_e_model---' - print*,'' end subroutine #+END_SRC diff --git a/src/utils_trust_region/trust_region_optimal_lambda.org b/src/utils_trust_region/org/trust_region_optimal_lambda.org similarity index 94% rename from src/utils_trust_region/trust_region_optimal_lambda.org rename to src/utils_trust_region/org/trust_region_optimal_lambda.org index b39c9a10..ff454cb6 100644 --- a/src/utils_trust_region/trust_region_optimal_lambda.org +++ b/src/utils_trust_region/org/trust_region_optimal_lambda.org @@ -153,9 +153,9 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Research the optimal lambda to constrain the step size in the trust region - END_DOC + !END_DOC implicit none @@ -195,18 +195,17 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) print*,'' print*,'---Trust_newton---' - print*,'' call wall_time(t1) ! version_lambda_search ! 1 -> ||x||^2 - delta^2 = 0, ! 2 -> 1/||x||^2 - 1/delta^2 = 0 (better) - if (version_lambda_search == 1) then - print*, 'Research of the optimal lambda by solving ||x||^2 - delta^2 = 0' - else - print*, 'Research of the optimal lambda by solving 1/||x||^2 - 1/delta^2 = 0' - endif + !if (version_lambda_search == 1) then + ! print*, 'Research of the optimal lambda by solving ||x||^2 - delta^2 = 0' + !else + ! print*, 'Research of the optimal lambda by solving 1/||x||^2 - 1/delta^2 = 0' + !endif ! Version 2 is normally better #+END_SRC @@ -215,21 +214,21 @@ Resolution with the Newton method: #+BEGIN_SRC f90 :comments org :tangle trust_region_optimal_lambda.irp.f ! Initialization epsilon = 1d-4 - lambda =MAX(0d0, -e_val(1)) + lambda = max(0d0, -e_val(1)) ! Pre research of lambda to start near the optimal lambda ! by adding a constant epsilon and changing the constant to ! have ||x(lambda + epsilon)|| ~ delta, before setting ! lambda = lambda + epsilon - print*, 'Pre research of lambda:' - print*,'Initial lambda =', lambda + !print*, 'Pre research of lambda:' + !print*,'Initial lambda =', lambda f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) - print*,'||x(lambda)||=', dsqrt(f_N),'delta=',delta + !print*,'||x(lambda)||=', dsqrt(f_N),'delta=',delta i = 1 ! To increase lambda if (f_N > delta**2) then - print*,'Increasing lambda...' + !print*,'Increasing lambda...' do while (f_N > delta**2 .and. i <= nb_it_max_pre_search) ! Update the previous norm @@ -239,7 +238,7 @@ Resolution with the Newton method: ! New norm f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) - print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta + !print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta ! Security if (prev_f_N < f_N) then @@ -253,7 +252,7 @@ Resolution with the Newton method: ! To reduce lambda else - print*,'Reducing lambda...' + !print*,'Reducing lambda...' do while (f_N < delta**2 .and. i <= nb_it_max_pre_search) ! Update the previous norm @@ -263,7 +262,7 @@ Resolution with the Newton method: ! New norm f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) - print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta + !print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta ! Security if (prev_f_N > f_N) then @@ -276,27 +275,25 @@ Resolution with the Newton method: enddo endif - print*,'End of the pre research of lambda' + !print*,'End of the pre research of lambda' ! New value of lambda lambda = lambda + epsilon - print*, 'e_val(1):', e_val(1) - print*, 'Staring point, lambda =', lambda + !print*, 'e_val(1):', e_val(1) + !print*, 'Staring point, lambda =', lambda ! thresh_cc, threshold for the research of the optimal lambda ! Leaves the loop when ABS(1d0-||x||^2/delta^2) > thresh_cc ! thresh_rho_2, threshold to cancel the step in the research ! of the optimal lambda, the step is cancelled if rho_2 < thresh_rho_2 - print*,'Threshold for the CC:', thresh_cc - print*,'Threshold for rho_2:', thresh_rho_2 - - print*, 'w_1^T . g =', tmp_wtg(1) + + !print*,'Threshold for the CC:', thresh_cc + !print*,'Threshold for rho_2:', thresh_rho_2 + !print*, 'w_1^T . g =', tmp_wtg(1) ! Debug - !if (debug) then - ! print*, 'Iteration rho_2 lambda delta ||x|| |1-(||x||^2/delta^2)|' - !endif + !print*, 'Iteration rho_2 lambda delta ||x|| |1-(||x||^2/delta^2)|' ! Initialization i = 1 @@ -323,9 +320,9 @@ Resolution with the Newton method: ! Newton's method do while (i <= 100 .and. DABS(1d0-f_N/delta**2) > thresh_cc) - print*,'--------------------------------------' - print*,'Research of lambda, iteration:', i - print*,'--------------------------------------' + !print*,'--------------------------------------' + !print*,'Research of lambda, iteration:', i + !print*,'--------------------------------------' ! Update of f_N, f_R and the derivatives prev_f_N = f_N @@ -338,7 +335,7 @@ Resolution with the Newton method: d_1 = d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 d_2 = d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 endif - write(*,'(a,E12.5,a,E12.5)') ' 1st and 2nd derivative: ', d_1,', ', d_2 + !write(*,'(a,E12.5,a,E12.5)') ' 1st and 2nd derivative: ', d_1,', ', d_2 ! Newton's step y = -(1d0/DABS(d_2))*d_1 @@ -347,7 +344,7 @@ Resolution with the Newton method: if (DABS(y) > alpha) then y = alpha * (y/DABS(y)) ! preservation of the sign of y endif - write(*,'(a,E12.5)') ' Step length: ', y + !write(*,'(a,E12.5)') ' Step length: ', y ! Predicted value of (||x(lambda)||^2 - delta^2)^2, Taylor series model = prev_f_R + d_1 * y + 0.5d0 * d_2 * y**2 @@ -355,8 +352,8 @@ Resolution with the Newton method: ! Updates lambda prev_lambda = lambda lambda = prev_lambda + y - print*,'prev lambda:', prev_lambda - print*,'new lambda:', lambda + !print*,'prev lambda:', prev_lambda + !print*,'new lambda:', lambda ! Checks if lambda is in (-h_1, \infty) if (lambda > MAX(0d0, -e_val(1))) then @@ -370,18 +367,18 @@ Resolution with the Newton method: f_R = (1d0/f_N - 1d0/delta**2)**2 ! new value of (1/||x(lambda)||^2 -1/delta^2)^2 endif - if (version_lambda_search == 1) then - print*,'Previous value of (||x(lambda)||^2 - delta^2)^2:', prev_f_R - print*,'Actual value of (||x(lambda)||^2 - delta^2)^2:', f_R - print*,'Predicted value of (||x(lambda)||^2 - delta^2)^2:', model - else - print*,'Previous value of (1/||x(lambda)||^2 - 1/delta^2)^2:', prev_f_R - print*,'Actual value of (1/||x(lambda)||^2 - 1/delta^2)^2:', f_R - print*,'Predicted value of (1/||x(lambda)||^2 - 1/delta^2)^2:', model - endif + !if (version_lambda_search == 1) then + ! print*,'Previous value of (||x(lambda)||^2 - delta^2)^2:', prev_f_R + ! print*,'Actual value of (||x(lambda)||^2 - delta^2)^2:', f_R + ! print*,'Predicted value of (||x(lambda)||^2 - delta^2)^2:', model + !else + ! print*,'Previous value of (1/||x(lambda)||^2 - 1/delta^2)^2:', prev_f_R + ! print*,'Actual value of (1/||x(lambda)||^2 - 1/delta^2)^2:', f_R + ! print*,'Predicted value of (1/||x(lambda)||^2 - 1/delta^2)^2:', model + !endif - print*,'previous - actual:', prev_f_R - f_R - print*,'previous - model:', prev_f_R - model + !print*,'previous - actual:', prev_f_R - f_R + !print*,'previous - model:', prev_f_R - model ! Check the gain if (DABS(prev_f_R - model) < thresh_model_2) then @@ -400,10 +397,10 @@ Resolution with the Newton method: ! Computes rho_2 rho_2 = (prev_f_R - f_R)/(prev_f_R - model) - print*,'rho_2:', rho_2 + !print*,'rho_2:', rho_2 else rho_2 = 0d0 ! in order to reduce the size of the trust region, alpha, until lambda is in (-h_1, \infty) - print*,'lambda < -e_val(1) ===> rho_2 = 0' + !print*,'lambda < -e_val(1) ===> rho_2 = 0' endif ! Evolution of the trust length, alpha @@ -416,20 +413,20 @@ Resolution with the Newton method: else alpha = 0.25d0 * alpha endif - write(*,'(a,E12.5)') ' New trust length alpha: ', alpha + !write(*,'(a,E12.5)') ' New trust length alpha: ', alpha ! cancellaion of the step if rho < 0.1 if (rho_2 < thresh_rho_2) then !0.1d0) then lambda = prev_lambda f_N = prev_f_N - print*,'Rho_2 <', thresh_rho_2,', cancellation of the step: lambda = prev_lambda' + !print*,'Rho_2 <', thresh_rho_2,', cancellation of the step: lambda = prev_lambda' endif - print*,'' - print*,'lambda, ||x||, delta:' - print*, lambda, dsqrt(f_N), delta - print*,'CC:', DABS(1d0 - f_N/delta**2) - print*,'' + !print*,'' + !print*,'lambda, ||x||, delta:' + !print*, lambda, dsqrt(f_N), delta + !print*,'CC:', DABS(1d0 - f_N/delta**2) + !print*,'' i = i + 1 enddo @@ -444,20 +441,19 @@ Resolution with the Newton method: print*,'' endif - print*,'Number of iterations :', i - print*,'Value of lambda :', lambda - print*,'Error on the trust region (1d0-f_N/delta**2) (Convergence criterion) :', 1d0-f_N/delta**2 - print*,'Error on the trust region (||x||^2 - delta^2)^2) :', (f_N - delta**2)**2 - print*,'Error on the trust region (1/||x||^2 - 1/delta^2)^2)', (1d0/f_N - 1d0/delta**2)**2 + print*,'Number of iterations:', i + print*,'Value of lambda:', lambda + !print*,'Error on the trust region (1d0-f_N/delta**2) (Convergence criterion) :', 1d0-f_N/delta**2 + print*,'Convergence criterion:', 1d0-f_N/delta**2 + !print*,'Error on the trust region (||x||^2 - delta^2)^2):', (f_N - delta**2)**2 + !print*,'Error on the trust region (1/||x||^2 - 1/delta^2)^2)', (1d0/f_N - 1d0/delta**2)**2 ! Time call wall_time(t2) t3 = t2 - t1 print*,'Time in trust_newton:', t3 - print*,'' print*,'---End trust_newton---' - print*,'' end subroutine #+END_SRC @@ -508,9 +504,9 @@ function d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) use omp_lib include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 - END_DOC + !END_DOC implicit none @@ -645,9 +641,9 @@ function d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) use omp_lib include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 - END_DOC + !END_DOC implicit none @@ -794,9 +790,9 @@ function f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute ||x(lambda)||^2 - END_DOC + !END_DOC implicit none @@ -906,9 +902,9 @@ function d1_norm_trust_region(n,e_val,w,v_grad,lambda,delta) include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 - END_DOC + !END_DOC implicit none @@ -1004,9 +1000,9 @@ function d2_norm_trust_region(n,e_val,w,v_grad,lambda,delta) include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 - END_DOC + !END_DOC implicit none @@ -1107,9 +1103,9 @@ function f_norm_trust_region(n,e_val,tmp_wtg,lambda) include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute ||x(lambda)||^2 - END_DOC + !END_DOC implicit none @@ -1190,9 +1186,9 @@ function d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) use omp_lib include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2 - END_DOC + !END_DOC implicit none @@ -1347,9 +1343,9 @@ function d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) use omp_lib include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2 - END_DOC + !END_DOC implicit none @@ -1506,9 +1502,9 @@ function d1_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta) include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2 - END_DOC + !END_DOC implicit none @@ -1601,9 +1597,9 @@ function d2_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta) include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2 - END_DOC + !END_DOC implicit none diff --git a/src/utils_trust_region/trust_region_rho.org b/src/utils_trust_region/org/trust_region_rho.org similarity index 85% rename from src/utils_trust_region/trust_region_rho.org rename to src/utils_trust_region/org/trust_region_rho.org index 9b25ee29..b669da8c 100644 --- a/src/utils_trust_region/trust_region_rho.org +++ b/src/utils_trust_region/org/trust_region_rho.org @@ -47,9 +47,9 @@ subroutine trust_region_rho(prev_energy, energy,e_model,rho) include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute rho, the agreement between the predicted criterion/energy and the real one - END_DOC + !END_DOC implicit none @@ -69,7 +69,7 @@ subroutine trust_region_rho(prev_energy, energy,e_model,rho) print*,'' print*,'---Rho_model---' - call wall_time(t1) + !call wall_time(t1) #+END_SRC ** Rho @@ -93,31 +93,30 @@ If the step (k+1) is accepted, then the "previous energy" becomes E(k+1) rho = (prev_energy - energy) / (prev_energy - e_model) - print*, 'previous energy, prev_energy :', prev_energy - print*, 'predicted energy, e_model :', e_model - print*, 'real energy, energy :', energy - print*, 'prev_energy - energy :', prev_energy - energy - print*, 'prev_energy - e_model :', prev_energy - e_model - print*, 'Rho :', rho - print*, 'Threshold for rho:', thresh_rho + !print*, 'previous energy, prev_energy:', prev_energy + !print*, 'predicted energy, e_model:', e_model + !print*, 'real energy, energy:', energy + !print*, 'prev_energy - energy:', prev_energy - energy + !print*, 'prev_energy - e_model:', prev_energy - e_model + print*, 'Rho:', rho + !print*, 'Threshold for rho:', thresh_rho ! Modification of prev_energy in function of rho if (rho < thresh_rho) then !0.1) then ! the step is cancelled print*, 'Rho <', thresh_rho,', the previous energy does not changed' - print*, 'prev_energy :', prev_energy + !print*, 'prev_energy :', prev_energy else ! the step is accepted prev_energy = energy - print*, 'Rho >=', thresh_rho,', energy -> prev_energy :', energy + print*, 'Rho >=', thresh_rho,', energy -> prev_energy:', energy endif - call wall_time(t2) - t3 = t2 - t1 - print*,'Time in rho model:', t3 + !call wall_time(t2) + !t3 = t2 - t1 + !print*,'Time in rho model:', t3 print*,'---End rho_model---' - print*,'' end subroutine #+END_SRC diff --git a/src/utils_trust_region/trust_region_step.org b/src/utils_trust_region/org/trust_region_step.org similarity index 89% rename from src/utils_trust_region/trust_region_step.org rename to src/utils_trust_region/org/trust_region_step.org index 331453a3..0e5f090f 100644 --- a/src/utils_trust_region/trust_region_step.org +++ b/src/utils_trust_region/org/trust_region_step.org @@ -341,9 +341,10 @@ Provided: Cf. qp_edit in orbital optimization section, for some constants/thresholds Input: -| m | integer | number of MOs | +| m | integer | number of MOs | | n | integer | m*(m-1)/2 | -| H(n, n) | double precision | hessian | +| n2 | integer | m*(m-1)/2 or 1 if the hessian is diagonal | +| H(n,n2) | double precision | hessian | | v_grad(n) | double precision | gradient | | e_val(n) | double precision | eigenvalues of the hessian | | W(n, n) | double precision | eigenvectors of the hessian | @@ -371,23 +372,23 @@ Function: | f_norm_trust_region_omp | double precision | compute the value of norm(x(lambda)^2) | #+BEGIN_SRC f90 :comments org :tangle trust_region_step.irp.f -subroutine trust_region_step(n,nb_iter,v_grad,rho,e_val,w,x,delta) +subroutine trust_region_step(n,n2,nb_iter,v_grad,rho,e_val,w,x,delta) include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compuet the step in the trust region - END_DOC + !END_DOC implicit none ! Variables ! in - integer, intent(in) :: n + integer, intent(in) :: n,n2 double precision, intent(in) :: v_grad(n), rho integer, intent(inout) :: nb_iter - double precision, intent(in) :: e_val(n), w(n,n) + double precision, intent(in) :: e_val(n), w(n,n2) ! inout double precision, intent(inout) :: delta @@ -434,11 +435,19 @@ avoid_saddle is actually a test to avoid saddle points ! List of w^T.g, to avoid the recomputation tmp_wtg = 0d0 - do j = 1, n - do i = 1, n - tmp_wtg(j) = tmp_wtg(j) + w(i,j) * v_grad(i) + if (n == n2) then + do j = 1, n + do i = 1, n + tmp_wtg(j) = tmp_wtg(j) + w(i,j) * v_grad(i) + enddo enddo - enddo + else + ! For the diagonal case + do j = 1, n + k = int(w(j,1)+1d-15) + tmp_wtg(j) = v_grad(k) + enddo + endif ! Replacement of the small tmp_wtg corresponding to a negative eigenvalue ! in the case of avoid_saddle @@ -465,18 +474,18 @@ avoid_saddle is actually a test to avoid saddle points tmp_wtg(1) = 0d0 endif - endif + endif ! Norm^2 of x, ||x||^2 norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0) ! We just use this norm for the nb_iter = 0 in order to initialize the trust radius delta ! We don't care about the sign of the eigenvalue we just want the size of the step in a normal Newton-Raphson algorithm ! Anyway if the step is too big it will be reduced - print*,'||x||^2 :', norm2_x + !print*,'||x||^2 :', norm2_x ! Norm^2 of the gradient, ||v_grad||^2 norm2_g = (dnrm2(n,v_grad,1))**2 - print*,'||grad||^2 :', norm2_g + !print*,'||grad||^2 :', norm2_g #+END_SRC *** Trust radius initialization @@ -530,7 +539,7 @@ To avoid too big trust region we put a maximum size. delta = 1d10 endif - print*, 'Delta :', delta + !print*, 'Delta :', delta #+END_SRC *** Calculation of the optimal lambda @@ -550,26 +559,26 @@ You will find more details at the beginning ! Research of lambda to solve ||x(lambda)|| = Delta ! Display - print*, 'e_val(1) = ', e_val(1) - print*, 'w_1^T.g =', tmp_wtg(1) + !print*, 'e_val(1) = ', e_val(1) + !print*, 'w_1^T.g =', tmp_wtg(1) ! H positive definite if (e_val(1) > - thresh_eig) then norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0) - print*, '||x(0)||=', dsqrt(norm2_x) - print*, 'Delta=', delta + !print*, '||x(0)||=', dsqrt(norm2_x) + !print*, 'Delta=', delta ! H positive definite, ||x(lambda = 0)|| <= Delta if (dsqrt(norm2_x) <= delta) then - print*, 'H positive definite, ||x(lambda = 0)|| <= Delta' - print*, 'lambda = 0, no lambda optimization' + !print*, 'H positive definite, ||x(lambda = 0)|| <= Delta' + !print*, 'lambda = 0, no lambda optimization' lambda = 0d0 ! H positive definite, ||x(lambda = 0)|| > Delta else ! Constraint solution - print*, 'H positive definite, ||x(lambda = 0)|| > Delta' - print*,'Computation of the optimal lambda...' + !print*, 'H positive definite, ||x(lambda = 0)|| > Delta' + !print*,'Computation of the optimal lambda...' call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) endif @@ -577,14 +586,14 @@ You will find more details at the beginning else if (DABS(tmp_wtg(1)) < thresh_wtg) then norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg, - e_val(1)) - print*, 'w_1^T.g <', thresh_wtg,', ||x(lambda = -e_val(1))|| =', dsqrt(norm2_x) + !print*, 'w_1^T.g <', thresh_wtg,', ||x(lambda = -e_val(1))|| =', dsqrt(norm2_x) endif ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta if (dsqrt(norm2_x) <= delta .and. DABS(tmp_wtg(1)) < thresh_wtg) then ! Add e_val(1) in order to have (H - e_val(1) I) positive definite - print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta' - print*, 'lambda = -e_val(1), no lambda optimization' + !print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta' + !print*, 'lambda = -e_val(1), no lambda optimization' lambda = - e_val(1) ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta @@ -592,12 +601,12 @@ You will find more details at the beginning ! H indefinite, w_1^T.g =/= 0 else ! Constraint solution/ add lambda - if (DABS(tmp_wtg(1)) < thresh_wtg) then - print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta' - else - print*, 'H indefinite, w_1^T.g =/= 0' - endif - print*, 'Computation of the optimal lambda...' + !if (DABS(tmp_wtg(1)) < thresh_wtg) then + ! print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta' + !else + ! print*, 'H indefinite, w_1^T.g =/= 0' + !endif + !print*, 'Computation of the optimal lambda...' call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) endif @@ -627,28 +636,53 @@ We compute x in function of lambda using its formula : ! Calculation of the step x - ! Normal version - if (.not. absolute_eig) then + if (n == n2) then + ! Normal version + if (.not. absolute_eig) then - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - do j = 1, n - x(j) = x(j) - tmp_wtg(i) * W(j,i) / (e_val(i) + lambda) - enddo - endif - enddo + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + do j = 1, n + x(j) = x(j) - tmp_wtg(i) * W(j,i) / (e_val(i) + lambda) + enddo + endif + enddo - ! Version to use the absolute value of the eigenvalues + ! Version to use the absolute value of the eigenvalues + else + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig) then + do j = 1, n + x(j) = x(j) - tmp_wtg(i) * W(j,i) / (DABS(e_val(i)) + lambda) + enddo + endif + enddo + + endif else + ! If the hessian is diagonal + ! Normal version + if (.not. absolute_eig) then - do i = 1, n - if (DABS(e_val(i)) > thresh_eig) then - do j = 1, n - x(j) = x(j) - tmp_wtg(i) * W(j,i) / (DABS(e_val(i)) + lambda) - enddo - endif - enddo + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + j = int(w(i,1) + 1d-15) + x(j) = - tmp_wtg(i) * 1d0 / (e_val(i) + lambda) + endif + enddo + ! Version to use the absolute value of the eigenvalues + else + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig) then + j = int(w(i,1) + 1d-15) + x(j) = - tmp_wtg(i) * 1d0 / (DABS(e_val(i)) + lambda) + endif + enddo + + endif endif double precision :: beta, norm_x @@ -719,7 +753,6 @@ antisymmetric matrix m_x cf. "mat_to_vec_index" and "vec_to_mat_index". print*,'======================' print*,'---End trust_region---' print*,'======================' - print*,'' end #+END_SRC diff --git a/src/utils_trust_region/vec_to_mat_index.org b/src/utils_trust_region/org/vec_to_mat_index.org similarity index 98% rename from src/utils_trust_region/vec_to_mat_index.org rename to src/utils_trust_region/org/vec_to_mat_index.org index 0a09fa86..13b1b5ee 100644 --- a/src/utils_trust_region/vec_to_mat_index.org +++ b/src/utils_trust_region/org/vec_to_mat_index.org @@ -36,10 +36,10 @@ subroutine vec_to_mat_index(i,p,q) include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute the indexes (p,q) of the element in the lower diagonal matrix knowing ! its index i a vector - END_DOC + !END_DOC implicit none diff --git a/src/utils_trust_region/vec_to_mat_v2.org b/src/utils_trust_region/org/vec_to_mat_v2.org similarity index 96% rename from src/utils_trust_region/vec_to_mat_v2.org rename to src/utils_trust_region/org/vec_to_mat_v2.org index 4e358a88..4ce5f5e1 100644 --- a/src/utils_trust_region/vec_to_mat_v2.org +++ b/src/utils_trust_region/org/vec_to_mat_v2.org @@ -8,9 +8,9 @@ Can be done in OMP (for the first part and with omp critical for the second) #+BEGIN_SRC f90 :comments org :tangle vec_to_mat_v2.irp.f subroutine vec_to_mat_v2(n,m,v_x,m_x) - BEGIN_DOC + !BEGIN_DOC ! Vector to antisymmetric matrix - END_DOC + !END_DOC implicit none diff --git a/src/utils_trust_region/pi.h b/src/utils_trust_region/pi.h index bbfabfec..2c36a9f0 100644 --- a/src/utils_trust_region/pi.h +++ b/src/utils_trust_region/pi.h @@ -1,2 +1,2 @@ - !logical, parameter :: debug=.False. + logical, parameter :: debug=.False. double precision, parameter :: pi = 3.1415926535897932d0 diff --git a/src/utils_trust_region/rotation_matrix.irp.f b/src/utils_trust_region/rotation_matrix.irp.f index 4738fd67..827af8c0 100644 --- a/src/utils_trust_region/rotation_matrix.irp.f +++ b/src/utils_trust_region/rotation_matrix.irp.f @@ -61,10 +61,10 @@ subroutine rotation_matrix(A,LDA,R,LDR,n,info,enforce_step_cancellation) implicit none - BEGIN_DOC + !BEGIN_DOC ! Rotation matrix to rotate the molecular orbitals. ! If the rotation is too large the transformation is not unitary and must be cancelled. - END_DOC + !END_DOC include 'pi.h' @@ -187,7 +187,7 @@ do j = 1, n endif enddo enddo -print*,'max element in A', max_elem_A +!print*,'max element in A', max_elem_A if (ABS(max_elem_A) > 2 * pi) then print*,'' @@ -215,18 +215,16 @@ W=B lwork = 3*n-1 allocate(work(lwork,n)) -print*,'Starting diagonalization ...' +!print*,'Starting diagonalization ...' call dsyev('V','U',n,W,size(W,1),e_val,work,lwork,info2) deallocate(work) -if (info2 == 0) then - print*, 'Diagonalization : Done' -elseif (info2 < 0) then +if (info2 < 0) then print*, 'WARNING: error in the diagonalization' print*, 'Illegal value of the ', info2,'-th parameter' -else +elseif (info2 >0) then print*, "WARNING: Diagonalization failed to converge" endif @@ -302,7 +300,7 @@ do i = 1, n max_elem = tau_m1(i,i) endif enddo -print*,'max elem tau^-1:', max_elem +!print*,'max elem tau^-1:', max_elem ! Debug !print*,'eigenvalues:' @@ -373,7 +371,7 @@ enddo call dgemm('N','T',n,n,n,1d0,R,size(R,1),R,size(R,1),-1d0,RR_t,size(RR_t,1)) norm = dnrm2(n*n,RR_t,1) -print*, 'Rotation matrix check, norm R.R^T = ', norm +!print*, 'Rotation matrix check, norm R.R^T = ', norm ! Debug !if (debug) then @@ -396,9 +394,9 @@ do j = 1, n enddo print*, 'Max error in R.R^T:', max_elem -print*, 'e_val(1):', e_val(1) -print*, 'e_val(n):', e_val(n) -print*, 'max elem in A:', max_elem_A +!print*, 'e_val(1):', e_val(1) +!print*, 'e_val(n):', e_val(n) +!print*, 'max elem in A:', max_elem_A if (ABS(max_elem) > 1d-12) then print*, 'WARNING: max error in R.R^T > 1d-12' diff --git a/src/utils_trust_region/rotation_matrix_iterative.irp.f b/src/utils_trust_region/rotation_matrix_iterative.irp.f new file mode 100644 index 00000000..f268df04 --- /dev/null +++ b/src/utils_trust_region/rotation_matrix_iterative.irp.f @@ -0,0 +1,134 @@ +! Rotation matrix with the iterative method + +! \begin{align*} +! \textbf{R} = \sum_{k=0}^{\infty} \frac{1}{k!} \textbf{X}^k +! \end{align*} + +! !!! Doesn't work !!! + + +subroutine rotation_matrix_iterative(m,X,R) + + implicit none + + ! in + integer, intent(in) :: m + double precision, intent(in) :: X(m,m) + + ! out + double precision, intent(out) :: R(m,m) + + ! internal + double precision :: max_elem, pre_factor + double precision :: t1,t2,t3 + integer :: k,l,i,j + logical :: not_converged + double precision, allocatable :: RRT(:,:), A(:,:), B(:,:) + + ! Functions + integer :: factorial + + print*,'---rotation_matrix_iterative---' + call wall_time(t1) + + allocate(RRT(m,m),A(m,m),B(m,m)) + + ! k = 0 + R = 0d0 + do i = 1, m + R(i,i) = 1d0 + enddo + + ! k = 1 + R = R + X + + k = 2 + + not_converged = .True. + + do while (not_converged) + + pre_factor = 1d0/DBLE(factorial(k)) + if (pre_factor < 1d-15) then + print*,'pre factor=', pre_factor,'< 1d-15, exit' + exit + endif + + A = X + B = 0d0 + do l = 1, k-1 + call dgemm('N','N',m,m,m,1d0,X,size(X,1),A,size(A,1),0d0,B,size(B,1)) + A = B + enddo + + !print*,'B' + !do i = 1, m + ! print*,B(i,:) * 1d0/DBLE(factorial(k)) + !enddo + + R = R + pre_factor * B + + k = k + 1 + call dgemm('T','N',m,m,m,1d0,R,size(R,1),R,size(R,1),0d0,RRT,size(RRT,1)) + + !print*,'R' + !do i = 1, m + ! write(*,'(10(E12.5))') R(i,:) + !enddo + + do i = 1, m + RRT(i,i) = RRT(i,i) - 1d0 + enddo + + !print*,'RRT' + !do i = 1, m + ! write(*,'(10(E12.5))') RRT(i,:) + !enddo + + max_elem = 0d0 + do j = 1, m + do i = 1, m + if (dabs(RRT(i,j)) > max_elem) then + max_elem = dabs(RRT(i,j)) + endif + enddo + enddo + + print*, 'Iteration:', k + print*, 'Max error in R:', max_elem + + if (max_elem < 1d-12) then + not_converged = .False. + endif + + enddo + + deallocate(RRT,A,B) + + call wall_time(t2) + t3 = t2 - t1 + print*,'Time in rotation matrix iterative:', t3 + print*,'---End roration_matrix_iterative---' + + +print*,'Does not work yet, abort' +call abort + +end + +! Factorial + +function factorial(n) + + implicit none + + integer, intent(in) :: n + integer :: factorial, k + + factorial = 1 + + do k = 1, n + factorial = factorial * k + enddo + +end diff --git a/src/utils_trust_region/sub_to_full_rotation_matrix.irp.f b/src/utils_trust_region/sub_to_full_rotation_matrix.irp.f index bdd1f6ba..75d04352 100644 --- a/src/utils_trust_region/sub_to_full_rotation_matrix.irp.f +++ b/src/utils_trust_region/sub_to_full_rotation_matrix.irp.f @@ -32,9 +32,9 @@ subroutine sub_to_full_rotation_matrix(m,tmp_list,tmp_R,R) - BEGIN_DOC + !BEGIN_DOC ! Compute the full rotation matrix from a smaller one - END_DOC + !END_DOC implicit none diff --git a/src/utils_trust_region/trust_region_expected_e.irp.f b/src/utils_trust_region/trust_region_expected_e.irp.f index b7d849d1..ad5ad2f9 100644 --- a/src/utils_trust_region/trust_region_expected_e.irp.f +++ b/src/utils_trust_region/trust_region_expected_e.irp.f @@ -10,11 +10,12 @@ ! \end{align*} ! Input: -! | n | integer | m*(m-1)/2 | -! | v_grad(n) | double precision | gradient | -! | H(n,n) | double precision | hessian | -! | x(n) | double precision | Step in the trust region | -! | prev_energy | double precision | previous energy | +! | n | integer | m*(m-1)/2 | +! | n2 | integer | m*(m-1)/2 or 1 if the hessian is diagonal | +! | v_grad(n) | double precision | gradient | +! | H(n,n) | double precision | hessian | +! | x(n) | double precision | Step in the trust region | +! | prev_energy | double precision | previous energy | ! Output: ! | e_model | double precision | predicted energy after the rotation of the MOs | @@ -29,21 +30,21 @@ ! | ddot | double precision | dot product (Lapack) | -subroutine trust_region_expected_e(n,v_grad,H,x,prev_energy,e_model) +subroutine trust_region_expected_e(n,n2,v_grad,H,x,prev_energy,e_model) include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute the expected criterion/energy after the application of the step x - END_DOC + !END_DOC implicit none ! Variables ! in - integer, intent(in) :: n - double precision, intent(in) :: v_grad(n),H(n,n),x(n) + integer, intent(in) :: n,n2 + double precision, intent(in) :: v_grad(n),H(n,n2),x(n) double precision, intent(in) :: prev_energy ! out @@ -79,27 +80,34 @@ subroutine trust_region_expected_e(n,v_grad,H,x,prev_energy,e_model) part_1 = ddot(n,v_grad,1,x,1) !if (debug) then - print*,'g.x : ', part_1 - !endif - + ! print*,'g.x : ', part_1 + !endif + ! Product H.x - call dgemv('N',n,n,1d0,H,size(H,1),x,1,0d0,part_2a,1) + if (n == n2) then + call dgemv('N',n,n,1d0,H,size(H,1),x,1,0d0,part_2a,1) + else + ! If the hessian is diagonal + do i = 1, n + part_2a(i) = H(i,1) * x(i) + enddo + endif ! Product 1/2 . x^T.H.x part_2 = 0.5d0 * ddot(n,x,1,part_2a,1) !if (debug) then - print*,'1/2*x^T.H.x : ', part_2 + ! print*,'1/2*x^T.H.x : ', part_2 !endif - print*,'prev_energy', prev_energy ! Sum e_model = prev_energy + part_1 + part_2 ! Writing the predicted energy - print*, 'Predicted energy after the rotation : ', e_model - print*, 'Previous energy - predicted energy:', prev_energy - e_model + print*, 'prev_energy: ', prev_energy + print*, 'Predicted energy after the rotation:', e_model + print*, 'Previous energy - predicted energy: ', prev_energy - e_model ! Can be deleted, already in another subroutine if (DABS(prev_energy - e_model) < 1d-12 ) then @@ -114,6 +122,5 @@ subroutine trust_region_expected_e(n,v_grad,H,x,prev_energy,e_model) print*,'Time in trust e model:', t3 print*,'---End trust_e_model---' - print*,'' end subroutine diff --git a/src/utils_trust_region/trust_region_optimal_lambda.irp.f b/src/utils_trust_region/trust_region_optimal_lambda.irp.f index f71bb405..b7dcf875 100644 --- a/src/utils_trust_region/trust_region_optimal_lambda.irp.f +++ b/src/utils_trust_region/trust_region_optimal_lambda.irp.f @@ -153,9 +153,9 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Research the optimal lambda to constrain the step size in the trust region - END_DOC + !END_DOC implicit none @@ -195,18 +195,17 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) print*,'' print*,'---Trust_newton---' - print*,'' call wall_time(t1) ! version_lambda_search ! 1 -> ||x||^2 - delta^2 = 0, ! 2 -> 1/||x||^2 - 1/delta^2 = 0 (better) - if (version_lambda_search == 1) then - print*, 'Research of the optimal lambda by solving ||x||^2 - delta^2 = 0' - else - print*, 'Research of the optimal lambda by solving 1/||x||^2 - 1/delta^2 = 0' - endif + !if (version_lambda_search == 1) then + ! print*, 'Research of the optimal lambda by solving ||x||^2 - delta^2 = 0' + !else + ! print*, 'Research of the optimal lambda by solving 1/||x||^2 - 1/delta^2 = 0' + !endif ! Version 2 is normally better @@ -216,21 +215,21 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) ! Initialization epsilon = 1d-4 - lambda =MAX(0d0, -e_val(1)) + lambda = max(0d0, -e_val(1)) ! Pre research of lambda to start near the optimal lambda ! by adding a constant epsilon and changing the constant to ! have ||x(lambda + epsilon)|| ~ delta, before setting ! lambda = lambda + epsilon - print*, 'Pre research of lambda:' - print*,'Initial lambda =', lambda + !print*, 'Pre research of lambda:' + !print*,'Initial lambda =', lambda f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) - print*,'||x(lambda)||=', dsqrt(f_N),'delta=',delta + !print*,'||x(lambda)||=', dsqrt(f_N),'delta=',delta i = 1 ! To increase lambda if (f_N > delta**2) then - print*,'Increasing lambda...' + !print*,'Increasing lambda...' do while (f_N > delta**2 .and. i <= nb_it_max_pre_search) ! Update the previous norm @@ -240,7 +239,7 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) ! New norm f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) - print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta + !print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta ! Security if (prev_f_N < f_N) then @@ -254,7 +253,7 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) ! To reduce lambda else - print*,'Reducing lambda...' + !print*,'Reducing lambda...' do while (f_N < delta**2 .and. i <= nb_it_max_pre_search) ! Update the previous norm @@ -264,7 +263,7 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) ! New norm f_N = f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda + epsilon) - print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta + !print*, 'lambda', lambda + epsilon, '||x||', dsqrt(f_N), 'delta', delta ! Security if (prev_f_N > f_N) then @@ -277,27 +276,25 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) enddo endif - print*,'End of the pre research of lambda' + !print*,'End of the pre research of lambda' ! New value of lambda lambda = lambda + epsilon - print*, 'e_val(1):', e_val(1) - print*, 'Staring point, lambda =', lambda + !print*, 'e_val(1):', e_val(1) + !print*, 'Staring point, lambda =', lambda ! thresh_cc, threshold for the research of the optimal lambda ! Leaves the loop when ABS(1d0-||x||^2/delta^2) > thresh_cc ! thresh_rho_2, threshold to cancel the step in the research ! of the optimal lambda, the step is cancelled if rho_2 < thresh_rho_2 - print*,'Threshold for the CC:', thresh_cc - print*,'Threshold for rho_2:', thresh_rho_2 - - print*, 'w_1^T . g =', tmp_wtg(1) + + !print*,'Threshold for the CC:', thresh_cc + !print*,'Threshold for rho_2:', thresh_rho_2 + !print*, 'w_1^T . g =', tmp_wtg(1) ! Debug - !if (debug) then - ! print*, 'Iteration rho_2 lambda delta ||x|| |1-(||x||^2/delta^2)|' - !endif + !print*, 'Iteration rho_2 lambda delta ||x|| |1-(||x||^2/delta^2)|' ! Initialization i = 1 @@ -324,9 +321,9 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) ! Newton's method do while (i <= 100 .and. DABS(1d0-f_N/delta**2) > thresh_cc) - print*,'--------------------------------------' - print*,'Research of lambda, iteration:', i - print*,'--------------------------------------' + !print*,'--------------------------------------' + !print*,'Research of lambda, iteration:', i + !print*,'--------------------------------------' ! Update of f_N, f_R and the derivatives prev_f_N = f_N @@ -339,7 +336,7 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) d_1 = d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! first derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 d_2 = d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) ! second derivative of (1/||x(lambda)||^2 - 1/delta^2)^2 endif - write(*,'(a,E12.5,a,E12.5)') ' 1st and 2nd derivative: ', d_1,', ', d_2 + !write(*,'(a,E12.5,a,E12.5)') ' 1st and 2nd derivative: ', d_1,', ', d_2 ! Newton's step y = -(1d0/DABS(d_2))*d_1 @@ -348,7 +345,7 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) if (DABS(y) > alpha) then y = alpha * (y/DABS(y)) ! preservation of the sign of y endif - write(*,'(a,E12.5)') ' Step length: ', y + !write(*,'(a,E12.5)') ' Step length: ', y ! Predicted value of (||x(lambda)||^2 - delta^2)^2, Taylor series model = prev_f_R + d_1 * y + 0.5d0 * d_2 * y**2 @@ -356,8 +353,8 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) ! Updates lambda prev_lambda = lambda lambda = prev_lambda + y - print*,'prev lambda:', prev_lambda - print*,'new lambda:', lambda + !print*,'prev lambda:', prev_lambda + !print*,'new lambda:', lambda ! Checks if lambda is in (-h_1, \infty) if (lambda > MAX(0d0, -e_val(1))) then @@ -371,18 +368,18 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) f_R = (1d0/f_N - 1d0/delta**2)**2 ! new value of (1/||x(lambda)||^2 -1/delta^2)^2 endif - if (version_lambda_search == 1) then - print*,'Previous value of (||x(lambda)||^2 - delta^2)^2:', prev_f_R - print*,'Actual value of (||x(lambda)||^2 - delta^2)^2:', f_R - print*,'Predicted value of (||x(lambda)||^2 - delta^2)^2:', model - else - print*,'Previous value of (1/||x(lambda)||^2 - 1/delta^2)^2:', prev_f_R - print*,'Actual value of (1/||x(lambda)||^2 - 1/delta^2)^2:', f_R - print*,'Predicted value of (1/||x(lambda)||^2 - 1/delta^2)^2:', model - endif + !if (version_lambda_search == 1) then + ! print*,'Previous value of (||x(lambda)||^2 - delta^2)^2:', prev_f_R + ! print*,'Actual value of (||x(lambda)||^2 - delta^2)^2:', f_R + ! print*,'Predicted value of (||x(lambda)||^2 - delta^2)^2:', model + !else + ! print*,'Previous value of (1/||x(lambda)||^2 - 1/delta^2)^2:', prev_f_R + ! print*,'Actual value of (1/||x(lambda)||^2 - 1/delta^2)^2:', f_R + ! print*,'Predicted value of (1/||x(lambda)||^2 - 1/delta^2)^2:', model + !endif - print*,'previous - actual:', prev_f_R - f_R - print*,'previous - model:', prev_f_R - model + !print*,'previous - actual:', prev_f_R - f_R + !print*,'previous - model:', prev_f_R - model ! Check the gain if (DABS(prev_f_R - model) < thresh_model_2) then @@ -401,10 +398,10 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) ! Computes rho_2 rho_2 = (prev_f_R - f_R)/(prev_f_R - model) - print*,'rho_2:', rho_2 + !print*,'rho_2:', rho_2 else rho_2 = 0d0 ! in order to reduce the size of the trust region, alpha, until lambda is in (-h_1, \infty) - print*,'lambda < -e_val(1) ===> rho_2 = 0' + !print*,'lambda < -e_val(1) ===> rho_2 = 0' endif ! Evolution of the trust length, alpha @@ -417,20 +414,20 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) else alpha = 0.25d0 * alpha endif - write(*,'(a,E12.5)') ' New trust length alpha: ', alpha + !write(*,'(a,E12.5)') ' New trust length alpha: ', alpha ! cancellaion of the step if rho < 0.1 if (rho_2 < thresh_rho_2) then !0.1d0) then lambda = prev_lambda f_N = prev_f_N - print*,'Rho_2 <', thresh_rho_2,', cancellation of the step: lambda = prev_lambda' + !print*,'Rho_2 <', thresh_rho_2,', cancellation of the step: lambda = prev_lambda' endif - print*,'' - print*,'lambda, ||x||, delta:' - print*, lambda, dsqrt(f_N), delta - print*,'CC:', DABS(1d0 - f_N/delta**2) - print*,'' + !print*,'' + !print*,'lambda, ||x||, delta:' + !print*, lambda, dsqrt(f_N), delta + !print*,'CC:', DABS(1d0 - f_N/delta**2) + !print*,'' i = i + 1 enddo @@ -445,20 +442,19 @@ subroutine trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) print*,'' endif - print*,'Number of iterations :', i - print*,'Value of lambda :', lambda - print*,'Error on the trust region (1d0-f_N/delta**2) (Convergence criterion) :', 1d0-f_N/delta**2 - print*,'Error on the trust region (||x||^2 - delta^2)^2) :', (f_N - delta**2)**2 - print*,'Error on the trust region (1/||x||^2 - 1/delta^2)^2)', (1d0/f_N - 1d0/delta**2)**2 + print*,'Number of iterations:', i + print*,'Value of lambda:', lambda + !print*,'Error on the trust region (1d0-f_N/delta**2) (Convergence criterion) :', 1d0-f_N/delta**2 + print*,'Convergence criterion:', 1d0-f_N/delta**2 + !print*,'Error on the trust region (||x||^2 - delta^2)^2):', (f_N - delta**2)**2 + !print*,'Error on the trust region (1/||x||^2 - 1/delta^2)^2)', (1d0/f_N - 1d0/delta**2)**2 ! Time call wall_time(t2) t3 = t2 - t1 print*,'Time in trust_newton:', t3 - print*,'' print*,'---End trust_newton---' - print*,'' end subroutine @@ -508,9 +504,9 @@ function d1_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) use omp_lib include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 - END_DOC + !END_DOC implicit none @@ -644,9 +640,9 @@ function d2_norm_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) use omp_lib include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 - END_DOC + !END_DOC implicit none @@ -792,9 +788,9 @@ function f_norm_trust_region_omp(n,e_val,tmp_wtg,lambda) include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute ||x(lambda)||^2 - END_DOC + !END_DOC implicit none @@ -903,9 +899,9 @@ function d1_norm_trust_region(n,e_val,w,v_grad,lambda,delta) include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute the first derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 - END_DOC + !END_DOC implicit none @@ -1000,9 +996,9 @@ function d2_norm_trust_region(n,e_val,w,v_grad,lambda,delta) include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute the second derivative with respect to lambda of (||x(lambda)||^2 - Delta^2)^2 - END_DOC + !END_DOC implicit none @@ -1102,9 +1098,9 @@ function f_norm_trust_region(n,e_val,tmp_wtg,lambda) include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute ||x(lambda)||^2 - END_DOC + !END_DOC implicit none @@ -1184,9 +1180,9 @@ function d1_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) use omp_lib include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2 - END_DOC + !END_DOC implicit none @@ -1340,9 +1336,9 @@ function d2_norm_inverse_trust_region_omp(n,e_val,tmp_wtg,lambda,delta) use omp_lib include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2 - END_DOC + !END_DOC implicit none @@ -1498,9 +1494,9 @@ function d1_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta) include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute the first derivative of (1/||x||^2 - 1/Delta^2)^2 - END_DOC + !END_DOC implicit none @@ -1592,9 +1588,9 @@ function d2_norm_inverse_trust_region(n,e_val,w,v_grad,lambda,delta) include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute the second derivative of (1/||x||^2 - 1/Delta^2)^2 - END_DOC + !END_DOC implicit none diff --git a/src/utils_trust_region/trust_region_rho.irp.f b/src/utils_trust_region/trust_region_rho.irp.f index 45738736..11ab11e9 100644 --- a/src/utils_trust_region/trust_region_rho.irp.f +++ b/src/utils_trust_region/trust_region_rho.irp.f @@ -47,9 +47,9 @@ subroutine trust_region_rho(prev_energy, energy,e_model,rho) include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute rho, the agreement between the predicted criterion/energy and the real one - END_DOC + !END_DOC implicit none @@ -69,7 +69,7 @@ subroutine trust_region_rho(prev_energy, energy,e_model,rho) print*,'' print*,'---Rho_model---' - call wall_time(t1) + !call wall_time(t1) ! Rho ! \begin{equation} @@ -92,30 +92,29 @@ subroutine trust_region_rho(prev_energy, energy,e_model,rho) rho = (prev_energy - energy) / (prev_energy - e_model) - print*, 'previous energy, prev_energy :', prev_energy - print*, 'predicted energy, e_model :', e_model - print*, 'real energy, energy :', energy - print*, 'prev_energy - energy :', prev_energy - energy - print*, 'prev_energy - e_model :', prev_energy - e_model - print*, 'Rho :', rho - print*, 'Threshold for rho:', thresh_rho + !print*, 'previous energy, prev_energy:', prev_energy + !print*, 'predicted energy, e_model:', e_model + !print*, 'real energy, energy:', energy + !print*, 'prev_energy - energy:', prev_energy - energy + !print*, 'prev_energy - e_model:', prev_energy - e_model + print*, 'Rho:', rho + !print*, 'Threshold for rho:', thresh_rho ! Modification of prev_energy in function of rho if (rho < thresh_rho) then !0.1) then ! the step is cancelled print*, 'Rho <', thresh_rho,', the previous energy does not changed' - print*, 'prev_energy :', prev_energy + !print*, 'prev_energy :', prev_energy else ! the step is accepted prev_energy = energy - print*, 'Rho >=', thresh_rho,', energy -> prev_energy :', energy + print*, 'Rho >=', thresh_rho,', energy -> prev_energy:', energy endif - call wall_time(t2) - t3 = t2 - t1 - print*,'Time in rho model:', t3 + !call wall_time(t2) + !t3 = t2 - t1 + !print*,'Time in rho model:', t3 print*,'---End rho_model---' - print*,'' end subroutine diff --git a/src/utils_trust_region/trust_region_step.irp.f b/src/utils_trust_region/trust_region_step.irp.f index 42aa6ed4..54161a1c 100644 --- a/src/utils_trust_region/trust_region_step.irp.f +++ b/src/utils_trust_region/trust_region_step.irp.f @@ -341,9 +341,10 @@ ! Cf. qp_edit in orbital optimization section, for some constants/thresholds ! Input: -! | m | integer | number of MOs | +! | m | integer | number of MOs | ! | n | integer | m*(m-1)/2 | -! | H(n, n) | double precision | hessian | +! | n2 | integer | m*(m-1)/2 or 1 if the hessian is diagonal | +! | H(n,n2) | double precision | hessian | ! | v_grad(n) | double precision | gradient | ! | e_val(n) | double precision | eigenvalues of the hessian | ! | W(n, n) | double precision | eigenvectors of the hessian | @@ -371,23 +372,23 @@ ! | f_norm_trust_region_omp | double precision | compute the value of norm(x(lambda)^2) | -subroutine trust_region_step(n,nb_iter,v_grad,rho,e_val,w,x,delta) +subroutine trust_region_step(n,n2,nb_iter,v_grad,rho,e_val,w,x,delta) include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compuet the step in the trust region - END_DOC + !END_DOC implicit none ! Variables ! in - integer, intent(in) :: n + integer, intent(in) :: n,n2 double precision, intent(in) :: v_grad(n), rho integer, intent(inout) :: nb_iter - double precision, intent(in) :: e_val(n), w(n,n) + double precision, intent(in) :: e_val(n), w(n,n2) ! inout double precision, intent(inout) :: delta @@ -432,11 +433,19 @@ lambda = 0d0 ! List of w^T.g, to avoid the recomputation tmp_wtg = 0d0 -do j = 1, n - do i = 1, n - tmp_wtg(j) = tmp_wtg(j) + w(i,j) * v_grad(i) +if (n == n2) then + do j = 1, n + do i = 1, n + tmp_wtg(j) = tmp_wtg(j) + w(i,j) * v_grad(i) + enddo enddo -enddo +else + ! For the diagonal case + do j = 1, n + k = int(w(j,1)+1d-15) + tmp_wtg(j) = v_grad(k) + enddo +endif ! Replacement of the small tmp_wtg corresponding to a negative eigenvalue ! in the case of avoid_saddle @@ -463,18 +472,18 @@ if (avoid_saddle .and. e_val(1) < - thresh_eig) then tmp_wtg(1) = 0d0 endif -endif +endif ! Norm^2 of x, ||x||^2 norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0) ! We just use this norm for the nb_iter = 0 in order to initialize the trust radius delta ! We don't care about the sign of the eigenvalue we just want the size of the step in a normal Newton-Raphson algorithm ! Anyway if the step is too big it will be reduced -print*,'||x||^2 :', norm2_x +!print*,'||x||^2 :', norm2_x ! Norm^2 of the gradient, ||v_grad||^2 norm2_g = (dnrm2(n,v_grad,1))**2 -print*,'||grad||^2 :', norm2_g +!print*,'||grad||^2 :', norm2_g ! Trust radius initialization @@ -526,7 +535,7 @@ if (delta > 1d10) then delta = 1d10 endif -print*, 'Delta :', delta +!print*, 'Delta :', delta ! Calculation of the optimal lambda @@ -545,26 +554,26 @@ print*, 'Delta :', delta ! Research of lambda to solve ||x(lambda)|| = Delta ! Display -print*, 'e_val(1) = ', e_val(1) -print*, 'w_1^T.g =', tmp_wtg(1) +!print*, 'e_val(1) = ', e_val(1) +!print*, 'w_1^T.g =', tmp_wtg(1) ! H positive definite if (e_val(1) > - thresh_eig) then norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg,0d0) - print*, '||x(0)||=', dsqrt(norm2_x) - print*, 'Delta=', delta + !print*, '||x(0)||=', dsqrt(norm2_x) + !print*, 'Delta=', delta ! H positive definite, ||x(lambda = 0)|| <= Delta if (dsqrt(norm2_x) <= delta) then - print*, 'H positive definite, ||x(lambda = 0)|| <= Delta' - print*, 'lambda = 0, no lambda optimization' + !print*, 'H positive definite, ||x(lambda = 0)|| <= Delta' + !print*, 'lambda = 0, no lambda optimization' lambda = 0d0 ! H positive definite, ||x(lambda = 0)|| > Delta else ! Constraint solution - print*, 'H positive definite, ||x(lambda = 0)|| > Delta' - print*,'Computation of the optimal lambda...' + !print*, 'H positive definite, ||x(lambda = 0)|| > Delta' + !print*,'Computation of the optimal lambda...' call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) endif @@ -572,14 +581,14 @@ if (e_val(1) > - thresh_eig) then else if (DABS(tmp_wtg(1)) < thresh_wtg) then norm2_x = f_norm_trust_region_omp(n,e_val,tmp_wtg, - e_val(1)) - print*, 'w_1^T.g <', thresh_wtg,', ||x(lambda = -e_val(1))|| =', dsqrt(norm2_x) + !print*, 'w_1^T.g <', thresh_wtg,', ||x(lambda = -e_val(1))|| =', dsqrt(norm2_x) endif ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta if (dsqrt(norm2_x) <= delta .and. DABS(tmp_wtg(1)) < thresh_wtg) then ! Add e_val(1) in order to have (H - e_val(1) I) positive definite - print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta' - print*, 'lambda = -e_val(1), no lambda optimization' + !print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| <= Delta' + !print*, 'lambda = -e_val(1), no lambda optimization' lambda = - e_val(1) ! H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta @@ -587,12 +596,12 @@ else ! H indefinite, w_1^T.g =/= 0 else ! Constraint solution/ add lambda - if (DABS(tmp_wtg(1)) < thresh_wtg) then - print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta' - else - print*, 'H indefinite, w_1^T.g =/= 0' - endif - print*, 'Computation of the optimal lambda...' + !if (DABS(tmp_wtg(1)) < thresh_wtg) then + ! print*, 'H indefinite, w_1^T.g = 0, ||x(lambda = -e_val(1))|| > Delta' + !else + ! print*, 'H indefinite, w_1^T.g =/= 0' + !endif + !print*, 'Computation of the optimal lambda...' call trust_region_optimal_lambda(n,e_val,tmp_wtg,delta,lambda) endif @@ -621,28 +630,53 @@ x = 0d0 ! Calculation of the step x -! Normal version -if (.not. absolute_eig) then +if (n == n2) then + ! Normal version + if (.not. absolute_eig) then - do i = 1, n - if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then - do j = 1, n - x(j) = x(j) - tmp_wtg(i) * W(j,i) / (e_val(i) + lambda) - enddo - endif - enddo + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + do j = 1, n + x(j) = x(j) - tmp_wtg(i) * W(j,i) / (e_val(i) + lambda) + enddo + endif + enddo -! Version to use the absolute value of the eigenvalues + ! Version to use the absolute value of the eigenvalues + else + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig) then + do j = 1, n + x(j) = x(j) - tmp_wtg(i) * W(j,i) / (DABS(e_val(i)) + lambda) + enddo + endif + enddo + + endif else + ! If the hessian is diagonal + ! Normal version + if (.not. absolute_eig) then - do i = 1, n - if (DABS(e_val(i)) > thresh_eig) then - do j = 1, n - x(j) = x(j) - tmp_wtg(i) * W(j,i) / (DABS(e_val(i)) + lambda) - enddo - endif - enddo + do i = 1, n + if (DABS(e_val(i)) > thresh_eig .and. DABS(e_val(i)+lambda) > thresh_eig) then + j = int(w(i,1) + 1d-15) + x(j) = - tmp_wtg(i) * 1d0 / (e_val(i) + lambda) + endif + enddo + ! Version to use the absolute value of the eigenvalues + else + + do i = 1, n + if (DABS(e_val(i)) > thresh_eig) then + j = int(w(i,1) + 1d-15) + x(j) = - tmp_wtg(i) * 1d0 / (DABS(e_val(i)) + lambda) + endif + enddo + + endif endif double precision :: beta, norm_x @@ -711,6 +745,5 @@ deallocate(tmp_wtg) print*,'======================' print*,'---End trust_region---' print*,'======================' - print*,'' end diff --git a/src/utils_trust_region/vec_to_mat_index.irp.f b/src/utils_trust_region/vec_to_mat_index.irp.f index 5d68748b..a6d381f7 100644 --- a/src/utils_trust_region/vec_to_mat_index.irp.f +++ b/src/utils_trust_region/vec_to_mat_index.irp.f @@ -36,10 +36,10 @@ subroutine vec_to_mat_index(i,p,q) include 'pi.h' - BEGIN_DOC + !BEGIN_DOC ! Compute the indexes (p,q) of the element in the lower diagonal matrix knowing ! its index i a vector - END_DOC + !END_DOC implicit none diff --git a/src/utils_trust_region/vec_to_mat_v2.irp.f b/src/utils_trust_region/vec_to_mat_v2.irp.f index 9140b8d3..e184d3ba 100644 --- a/src/utils_trust_region/vec_to_mat_v2.irp.f +++ b/src/utils_trust_region/vec_to_mat_v2.irp.f @@ -8,9 +8,9 @@ subroutine vec_to_mat_v2(n,m,v_x,m_x) - BEGIN_DOC + !BEGIN_DOC ! Vector to antisymmetric matrix - END_DOC + !END_DOC implicit none