diff --git a/RELEASE_NOTES.org b/RELEASE_NOTES.org index 7b9483bf..9b579146 100644 --- a/RELEASE_NOTES.org +++ b/RELEASE_NOTES.org @@ -8,6 +8,7 @@ - Use OpamPack for OCaml - Configure adapted for ARM - Added many types of integrals + - Accelerated four-index transformation *** TODO: take from dev - [ ] Added GTOs with complex exponent diff --git a/external/qp2-dependencies b/external/qp2-dependencies index f40bde09..b8cd5815 160000 --- a/external/qp2-dependencies +++ b/external/qp2-dependencies @@ -1 +1 @@ -Subproject commit f40bde0925808bbec0424b57bfcef1b26473a1c8 +Subproject commit b8cd5815bce14c9b880e3c5ea3d5fc2652f5955c diff --git a/src/cipsi/cipsi.irp.f b/src/cipsi/cipsi.irp.f index da77a527..6e715531 100644 --- a/src/cipsi/cipsi.irp.f +++ b/src/cipsi/cipsi.irp.f @@ -70,8 +70,8 @@ subroutine run_cipsi do while ( & (N_det < N_det_max) .and. & - (sum(abs(pt2_data % pt2(1:N_states)) * state_average_weight(1:N_states)) > pt2_max) .and. & - (sum(abs(pt2_data % variance(1:N_states)) * state_average_weight(1:N_states)) > variance_max) .and. & + (maxval(abs(pt2_data % pt2(1:N_states))) > pt2_max) .and. & + (maxval(abs(pt2_data % variance(1:N_states))) > variance_max) .and. & (correlation_energy_ratio <= correlation_energy_ratio_max) & ) write(*,'(A)') '--------------------------------------------------------------------------------' diff --git a/src/cipsi/run_pt2_slave.irp.f b/src/cipsi/run_pt2_slave.irp.f index b57546ef..debae596 100644 --- a/src/cipsi/run_pt2_slave.irp.f +++ b/src/cipsi/run_pt2_slave.irp.f @@ -31,11 +31,12 @@ subroutine run_pt2_slave(thread,iproc,energy) double precision, intent(in) :: energy(N_states_diag) integer, intent(in) :: thread, iproc - if (N_det > 100000 ) then - call run_pt2_slave_large(thread,iproc,energy) - else - call run_pt2_slave_small(thread,iproc,energy) - endif + call run_pt2_slave_large(thread,iproc,energy) +! if (N_det > 100000 ) then +! call run_pt2_slave_large(thread,iproc,energy) +! else +! call run_pt2_slave_small(thread,iproc,energy) +! endif end subroutine run_pt2_slave_small(thread,iproc,energy) @@ -66,6 +67,7 @@ subroutine run_pt2_slave_small(thread,iproc,energy) double precision, external :: memory_of_double, memory_of_int integer :: bsize ! Size of selection buffers +! logical :: sending allocate(task_id(pt2_n_tasks_max), task(pt2_n_tasks_max)) allocate(pt2_data(pt2_n_tasks_max), i_generator(pt2_n_tasks_max), subset(pt2_n_tasks_max)) @@ -162,11 +164,6 @@ end subroutine subroutine run_pt2_slave_large(thread,iproc,energy) use selection_types use f77_zmq - BEGIN_DOC -! This subroutine can miss important determinants when the PT2 is completely -! computed. It should be called only for large workloads where the PT2 is -! interrupted before the end - END_DOC implicit none double precision, intent(in) :: energy(N_states_diag) @@ -192,12 +189,8 @@ subroutine run_pt2_slave_large(thread,iproc,energy) integer :: bsize ! Size of selection buffers logical :: sending - double precision :: time_shift - PROVIDE global_selection_buffer global_selection_buffer_lock - call random_number(time_shift) - time_shift = time_shift*15.d0 zmq_to_qp_run_socket = new_zmq_to_qp_run_socket() @@ -215,9 +208,6 @@ subroutine run_pt2_slave_large(thread,iproc,energy) sending = .False. done = .False. - double precision :: time0, time1 - call wall_time(time0) - time0 = time0+time_shift do while (.not.done) integer, external :: get_tasks_from_taskserver @@ -244,28 +234,25 @@ subroutine run_pt2_slave_large(thread,iproc,energy) ASSERT (b%N == bsize) endif + double precision :: time0, time1 + call wall_time(time0) call pt2_alloc(pt2_data,N_states) b%cur = 0 call select_connected(i_generator,energy,pt2_data,b,subset,pt2_F(i_generator)) + call wall_time(time1) integer, external :: tasks_done_to_taskserver if (tasks_done_to_taskserver(zmq_to_qp_run_socket,worker_id,task_id,n_tasks) == -1) then done = .true. endif call sort_selection_buffer(b) - - call wall_time(time1) -! if (time1-time0 > 15.d0) then - call omp_set_lock(global_selection_buffer_lock) - global_selection_buffer%mini = b%mini - call merge_selection_buffers(b,global_selection_buffer) - b%cur=0 - call omp_unset_lock(global_selection_buffer_lock) - call wall_time(time0) -! endif - call push_pt2_results_async_recv(zmq_socket_push,b%mini,sending) - if ( iproc == 1 .or. i_generator < 100 .or. done) then + call omp_set_lock(global_selection_buffer_lock) + global_selection_buffer%mini = b%mini + call merge_selection_buffers(b,global_selection_buffer) + b%cur=0 + call omp_unset_lock(global_selection_buffer_lock) + if ( iproc == 1 ) then call omp_set_lock(global_selection_buffer_lock) call push_pt2_results_async_send(zmq_socket_push, (/i_generator/), (/pt2_data/), global_selection_buffer, (/task_id/), 1,sending) global_selection_buffer%cur = 0 diff --git a/src/cipsi/stochastic_cipsi.irp.f b/src/cipsi/stochastic_cipsi.irp.f index 5fc9db0f..781fcda6 100644 --- a/src/cipsi/stochastic_cipsi.irp.f +++ b/src/cipsi/stochastic_cipsi.irp.f @@ -69,8 +69,8 @@ subroutine run_stochastic_cipsi do while ( & (N_det < N_det_max) .and. & - (sum(abs(pt2_data % pt2(1:N_states)) * state_average_weight(1:N_states)) > pt2_max) .and. & - (sum(abs(pt2_data % variance(1:N_states)) * state_average_weight(1:N_states)) > variance_max) .and. & + (maxval(abs(pt2_data % pt2(1:N_states))) > pt2_max) .and. & + (maxval(abs(pt2_data % variance(1:N_states))) > variance_max) .and. & (correlation_energy_ratio <= correlation_energy_ratio_max) & ) write(*,'(A)') '--------------------------------------------------------------------------------' diff --git a/src/hartree_fock/scf.irp.f b/src/hartree_fock/scf.irp.f index 3226073d..a7ac9fe4 100644 --- a/src/hartree_fock/scf.irp.f +++ b/src/hartree_fock/scf.irp.f @@ -49,7 +49,6 @@ subroutine create_guess if (.not.exists) then mo_label = 'Guess' if (mo_guess_type == "HCore") then - mo_coef = ao_ortho_lowdin_coef call restore_symmetry(ao_num,mo_num,mo_coef,size(mo_coef,1),1.d-10) TOUCH mo_coef call mo_as_eigvectors_of_mo_matrix(mo_one_e_integrals, & diff --git a/src/mo_two_e_erf_ints/map_integrals_erf.irp.f b/src/mo_two_e_erf_ints/map_integrals_erf.irp.f index 73050ec5..3405ec2b 100644 --- a/src/mo_two_e_erf_ints/map_integrals_erf.irp.f +++ b/src/mo_two_e_erf_ints/map_integrals_erf.irp.f @@ -235,11 +235,11 @@ subroutine get_mo_two_e_integrals_erf_ij(k,l,sze,out_array,map) logical :: integral_is_in_map if (key_kind == 8) then - call i8radix_sort(hash,iorder,kk,-1) + call i8sort(hash,iorder,kk) else if (key_kind == 4) then - call iradix_sort(hash,iorder,kk,-1) + call isort(hash,iorder,kk) else if (key_kind == 2) then - call i2radix_sort(hash,iorder,kk,-1) + call i2sort(hash,iorder,kk) endif call map_get_many(mo_integrals_erf_map, hash, tmp_val, kk) @@ -290,11 +290,11 @@ subroutine get_mo_two_e_integrals_erf_i1j1(k,l,sze,out_array,map) logical :: integral_is_in_map if (key_kind == 8) then - call i8radix_sort(hash,iorder,kk,-1) + call i8sort(hash,iorder,kk) else if (key_kind == 4) then - call iradix_sort(hash,iorder,kk,-1) + call isort(hash,iorder,kk) else if (key_kind == 2) then - call i2radix_sort(hash,iorder,kk,-1) + call i2sort(hash,iorder,kk) endif call map_get_many(mo_integrals_erf_map, hash, tmp_val, kk) diff --git a/src/mo_two_e_ints/core_quantities.irp.f b/src/mo_two_e_ints/core_quantities.irp.f index b764a1a6..3642365e 100644 --- a/src/mo_two_e_ints/core_quantities.irp.f +++ b/src/mo_two_e_ints/core_quantities.irp.f @@ -53,7 +53,7 @@ BEGIN_PROVIDER [ double precision, h_core_ri, (mo_num, mo_num) ] enddo do k=1,mo_num do i=1,mo_num - h_core_ri(i,j) = h_core_ri(i,j) - 0.5 * big_array_exchange_integrals(k,i,j) + h_core_ri(i,j) = h_core_ri(i,j) - 0.5d0 * big_array_exchange_integrals(k,i,j) enddo enddo enddo diff --git a/src/mo_two_e_ints/mo_bi_integrals.irp.f b/src/mo_two_e_ints/mo_bi_integrals.irp.f index d58932ce..ae299e9f 100644 --- a/src/mo_two_e_ints/mo_bi_integrals.irp.f +++ b/src/mo_two_e_ints/mo_bi_integrals.irp.f @@ -53,7 +53,11 @@ BEGIN_PROVIDER [ logical, mo_two_e_integrals_in_map ] ! call four_idx_novvvv call four_idx_novvvv_old else - call add_integrals_to_map(full_ijkl_bitmask_4) + if (dble(ao_num)**4 * 32.d-9 < dble(qp_max_mem)) then + call four_idx_dgemm + else + call add_integrals_to_map(full_ijkl_bitmask_4) + endif endif call wall_time(wall_2) @@ -77,6 +81,94 @@ BEGIN_PROVIDER [ logical, mo_two_e_integrals_in_map ] END_PROVIDER +subroutine four_idx_dgemm + implicit none + integer :: p,q,r,s,i,j,k,l + double precision, allocatable :: a1(:,:,:,:) + double precision, allocatable :: a2(:,:,:,:) + + allocate (a1(ao_num,ao_num,ao_num,ao_num)) + + print *, 'Getting AOs' + !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(q,r,s) + do s=1,ao_num + do r=1,ao_num + do q=1,ao_num + call get_ao_two_e_integrals(q,r,s,ao_num,a1(1,q,r,s)) + enddo + enddo + enddo + !$OMP END PARALLEL DO + + print *, '1st transformation' + ! 1st transformation + allocate (a2(ao_num,ao_num,ao_num,mo_num)) + call dgemm('T','N', (ao_num*ao_num*ao_num), mo_num, ao_num, 1.d0, a1, ao_num, mo_coef, ao_num, 0.d0, a2, (ao_num*ao_num*ao_num)) + + ! 2nd transformation + print *, '2nd transformation' + deallocate (a1) + allocate (a1(ao_num,ao_num,mo_num,mo_num)) + call dgemm('T','N', (ao_num*ao_num*mo_num), mo_num, ao_num, 1.d0, a2, ao_num, mo_coef, ao_num, 0.d0, a1, (ao_num*ao_num*mo_num)) + + ! 3rd transformation + print *, '3rd transformation' + deallocate (a2) + allocate (a2(ao_num,mo_num,mo_num,mo_num)) + call dgemm('T','N', (ao_num*mo_num*mo_num), mo_num, ao_num, 1.d0, a1, ao_num, mo_coef, ao_num, 0.d0, a2, (ao_num*mo_num*mo_num)) + + ! 4th transformation + print *, '4th transformation' + deallocate (a1) + allocate (a1(mo_num,mo_num,mo_num,mo_num)) + call dgemm('T','N', (mo_num*mo_num*mo_num), mo_num, ao_num, 1.d0, a2, ao_num, mo_coef, ao_num, 0.d0, a1, (mo_num*mo_num*mo_num)) + + deallocate (a2) + + integer :: n_integrals, size_buffer + integer(key_kind) , allocatable :: buffer_i(:) + real(integral_kind), allocatable :: buffer_value(:) + size_buffer = min(ao_num*ao_num*ao_num,16000000) + + !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i,j,k,l,buffer_value,buffer_i,n_integrals) + allocate ( buffer_i(size_buffer), buffer_value(size_buffer) ) + + n_integrals = 0 + !$OMP DO + do l=1,mo_num + do k=1,mo_num + do j=1,l + do i=1,k + if (abs(a1(i,j,k,l)) < mo_integrals_threshold) then + cycle + endif + n_integrals += 1 + buffer_value(n_integrals) = a1(i,j,k,l) + !DIR$ FORCEINLINE + call mo_two_e_integrals_index(i,j,k,l,buffer_i(n_integrals)) + if (n_integrals == size_buffer) then + call map_append(mo_integrals_map, buffer_i, buffer_value, n_integrals) + n_integrals = 0 + endif + enddo + enddo + enddo + enddo + !$OMP END DO + + call map_append(mo_integrals_map, buffer_i, buffer_value, n_integrals) + + deallocate(buffer_i, buffer_value) + !$OMP END PARALLEL + + deallocate (a1) + + call map_unique(mo_integrals_map) + + integer*8 :: get_mo_map_size, mo_map_size + mo_map_size = get_mo_map_size() + +end subroutine subroutine add_integrals_to_map(mask_ijkl) use bitmasks @@ -153,24 +245,26 @@ subroutine add_integrals_to_map(mask_ijkl) return endif - size_buffer = min(ao_num*ao_num*ao_num,16000000) + call wall_time(wall_1) + + size_buffer = min(ao_num*ao_num*ao_num,8000000) print*, 'Buffers : ', 8.*(mo_num*(n_j)*(n_k+1) + mo_num+& ao_num+ao_num*ao_num+ size_buffer*3)/(1024*1024), 'MB / core' - double precision :: accu_bis - accu_bis = 0.d0 - call wall_time(wall_1) - !$OMP PARALLEL PRIVATE(l1,k1,j1,i1,i2,i3,i4,i,j,k,l,c, ii1,kmax, & !$OMP two_e_tmp_0_idx, two_e_tmp_0, two_e_tmp_1,two_e_tmp_2,two_e_tmp_3,& !$OMP buffer_i,buffer_value,n_integrals,wall_2,i0,j0,k0,l0, & - !$OMP wall_0,thread_num,accu_bis) & + !$OMP wall_0,thread_num) & !$OMP DEFAULT(NONE) & !$OMP SHARED(size_buffer,ao_num,mo_num,n_i,n_j,n_k,n_l, & !$OMP mo_coef_transp, & !$OMP mo_coef_transp_is_built, list_ijkl, & !$OMP mo_coef_is_built, wall_1, & !$OMP mo_coef,mo_integrals_threshold,mo_integrals_map) + + thread_num = 0 + !$ thread_num = omp_get_thread_num() + n_integrals = 0 wall_0 = wall_1 allocate(two_e_tmp_3(mo_num, n_j, n_k), & @@ -181,8 +275,6 @@ subroutine add_integrals_to_map(mask_ijkl) buffer_i(size_buffer), & buffer_value(size_buffer) ) - thread_num = 0 - !$ thread_num = omp_get_thread_num() !$OMP DO SCHEDULE(guided) do l1 = 1,ao_num two_e_tmp_3 = 0.d0 @@ -340,10 +432,10 @@ subroutine add_integrals_to_map(mask_ijkl) !$OMP END DO NOWAIT deallocate (two_e_tmp_1,two_e_tmp_2,two_e_tmp_3) - integer :: index_needed - - call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,& - real(mo_integrals_threshold,integral_kind)) + if (n_integrals > 0) then + call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,& + real(mo_integrals_threshold,integral_kind)) + endif deallocate(buffer_i, buffer_value) !$OMP END PARALLEL call map_merge(mo_integrals_map) @@ -433,12 +525,10 @@ subroutine add_integrals_to_map_three_indices(mask_ijk) call wall_time(wall_1) call cpu_time(cpu_1) - double precision :: accu_bis - accu_bis = 0.d0 !$OMP PARALLEL PRIVATE(m,l1,k1,j1,i1,i2,i3,i4,i,j,k,l,c, ii1,kmax, & !$OMP two_e_tmp_0_idx, two_e_tmp_0, two_e_tmp_1,two_e_tmp_2,two_e_tmp_3,& !$OMP buffer_i,buffer_value,n_integrals,wall_2,i0,j0,k0,l0, & - !$OMP wall_0,thread_num,accu_bis) & + !$OMP wall_0,thread_num) & !$OMP DEFAULT(NONE) & !$OMP SHARED(size_buffer,ao_num,mo_num,n_i,n_j,n_k, & !$OMP mo_coef_transp, & @@ -636,8 +726,6 @@ subroutine add_integrals_to_map_three_indices(mask_ijk) !$OMP END DO NOWAIT deallocate (two_e_tmp_1,two_e_tmp_2,two_e_tmp_3) - integer :: index_needed - call insert_into_mo_integrals_map(n_integrals,buffer_i,buffer_value,& real(mo_integrals_threshold,integral_kind)) deallocate(buffer_i, buffer_value) diff --git a/src/utils/constants.include.F b/src/utils/constants.include.F index 297a839e..d1727701 100644 --- a/src/utils/constants.include.F +++ b/src/utils/constants.include.F @@ -14,6 +14,7 @@ double precision, parameter :: thresh = 1.d-15 double precision, parameter :: cx_lda = -0.73855876638202234d0 double precision, parameter :: c_2_4_3 = 2.5198420997897464d0 double precision, parameter :: cst_lda = -0.93052573634909996d0 -double precision, parameter :: c_4_3 = 1.3333333333333333d0 -double precision, parameter :: c_1_3 = 0.3333333333333333d0 - +double precision, parameter :: c_4_3 = 4.d0/3.d0 +double precision, parameter :: c_1_3 = 1.d0/3.d0 +double precision, parameter :: sq_op5 = dsqrt(0.5d0) +double precision, parameter :: dlog_2pi = dlog(2.d0*dacos(-1.d0)) diff --git a/src/utils/map_module.f90 b/src/utils/map_module.f90 index 98e73470..ceaec874 100644 --- a/src/utils/map_module.f90 +++ b/src/utils/map_module.f90 @@ -238,11 +238,11 @@ subroutine cache_map_sort(map) iorder(i) = i enddo if (cache_key_kind == 2) then - call i2radix_sort(map%key,iorder,map%n_elements,-1) + call i2sort(map%key,iorder,map%n_elements,-1) else if (cache_key_kind == 4) then - call iradix_sort(map%key,iorder,map%n_elements,-1) + call isort(map%key,iorder,map%n_elements,-1) else if (cache_key_kind == 8) then - call i8radix_sort(map%key,iorder,map%n_elements,-1) + call i8sort(map%key,iorder,map%n_elements,-1) endif if (integral_kind == 4) then call set_order(map%value,iorder,map%n_elements) diff --git a/src/utils/qsort.c b/src/utils/qsort.c new file mode 100644 index 00000000..c011b35a --- /dev/null +++ b/src/utils/qsort.c @@ -0,0 +1,373 @@ +/* [[file:~/qp2/src/utils/qsort.org::*Generated%20C%20file][Generated C file:1]] */ +#include +#include + +struct int16_t_comp { + int16_t x; + int32_t i; +}; + +int compare_int16_t( const void * l, const void * r ) +{ + const int16_t * restrict _l= l; + const int16_t * restrict _r= r; + if( *_l > *_r ) return 1; + if( *_l < *_r ) return -1; + return 0; +} + +void qsort_int16_t(int16_t* restrict A_in, int32_t* restrict iorder, int32_t isize) { + struct int16_t_comp* A = malloc(isize * sizeof(struct int16_t_comp)); + if (A == NULL) return; + + for (int i=0 ; i *_r ) return 1; + if( *_l < *_r ) return -1; + return 0; +} + +void qsort_int16_t_big(int16_t* restrict A_in, int64_t* restrict iorder, int64_t isize) { + struct int16_t_comp_big* A = malloc(isize * sizeof(struct int16_t_comp_big)); + if (A == NULL) return; + + for (int i=0 ; i *_r ) return 1; + if( *_l < *_r ) return -1; + return 0; +} + +void qsort_int32_t(int32_t* restrict A_in, int32_t* restrict iorder, int32_t isize) { + struct int32_t_comp* A = malloc(isize * sizeof(struct int32_t_comp)); + if (A == NULL) return; + + for (int i=0 ; i *_r ) return 1; + if( *_l < *_r ) return -1; + return 0; +} + +void qsort_int32_t_big(int32_t* restrict A_in, int64_t* restrict iorder, int64_t isize) { + struct int32_t_comp_big* A = malloc(isize * sizeof(struct int32_t_comp_big)); + if (A == NULL) return; + + for (int i=0 ; i *_r ) return 1; + if( *_l < *_r ) return -1; + return 0; +} + +void qsort_int64_t(int64_t* restrict A_in, int32_t* restrict iorder, int32_t isize) { + struct int64_t_comp* A = malloc(isize * sizeof(struct int64_t_comp)); + if (A == NULL) return; + + for (int i=0 ; i *_r ) return 1; + if( *_l < *_r ) return -1; + return 0; +} + +void qsort_int64_t_big(int64_t* restrict A_in, int64_t* restrict iorder, int64_t isize) { + struct int64_t_comp_big* A = malloc(isize * sizeof(struct int64_t_comp_big)); + if (A == NULL) return; + + for (int i=0 ; i *_r ) return 1; + if( *_l < *_r ) return -1; + return 0; +} + +void qsort_double(double* restrict A_in, int32_t* restrict iorder, int32_t isize) { + struct double_comp* A = malloc(isize * sizeof(struct double_comp)); + if (A == NULL) return; + + for (int i=0 ; i *_r ) return 1; + if( *_l < *_r ) return -1; + return 0; +} + +void qsort_double_big(double* restrict A_in, int64_t* restrict iorder, int64_t isize) { + struct double_comp_big* A = malloc(isize * sizeof(struct double_comp_big)); + if (A == NULL) return; + + for (int i=0 ; i *_r ) return 1; + if( *_l < *_r ) return -1; + return 0; +} + +void qsort_float(float* restrict A_in, int32_t* restrict iorder, int32_t isize) { + struct float_comp* A = malloc(isize * sizeof(struct float_comp)); + if (A == NULL) return; + + for (int i=0 ; i *_r ) return 1; + if( *_l < *_r ) return -1; + return 0; +} + +void qsort_float_big(float* restrict A_in, int64_t* restrict iorder, int64_t isize) { + struct float_comp_big* A = malloc(isize * sizeof(struct float_comp_big)); + if (A == NULL) return; + + for (int i=0 ; i *_r ) return 1; + if( *_l < *_r ) return -1; + return 0; +} + +void qsort_TYPE_big(TYPE* restrict A_in, int32_t* restrict iorder, int32_t isize) { + struct TYPE_comp_big* A = malloc(isize * sizeof(struct TYPE_comp_big)); + if (A == NULL) return; + + for (int i=0 ; i> +""" +for typ in ["int16_t", "int32_t", "int64_t", "double", "float"]: + print( data.replace("TYPE", typ).replace("_big", "") ) + print( data.replace("int32_t", "int64_t").replace("TYPE", typ) ) +#+end_src + +#+NAME: replaced_f +#+begin_src python :results output :noweb yes +data = """ +<> +""" +c1 = { + "int16_t": "i2", + "int32_t": "i", + "int64_t": "i8", + "double": "d", + "float": "" +} +c2 = { + "int16_t": "integer", + "int32_t": "integer", + "int64_t": "integer", + "double": "real", + "float": "real" +} + +for typ in ["int16_t", "int32_t", "int64_t", "double", "float"]: + print( data.replace("real",c2[typ]).replace("L",c1[typ]).replace("TYPE", typ).replace("_big", "") ) + print( data.replace("real",c2[typ]).replace("L",c1[typ]).replace("int32_t", "int64_t").replace("TYPE", typ) ) +#+end_src + +#+NAME: replaced_f2 +#+begin_src python :results output :noweb yes +data = """ +<> +""" +c1 = { + "int16_t": "i2", + "int32_t": "i", + "int64_t": "i8", + "double": "d", + "float": "" +} +c2 = { + "int16_t": "integer", + "int32_t": "integer", + "int64_t": "integer", + "double": "real", + "float": "real" +} + +for typ in ["int16_t", "int32_t", "int64_t", "double", "float"]: + print( data.replace("real",c2[typ]).replace("L",c1[typ]).replace("TYPE", typ).replace("_big", "") ) + print( data.replace("real",c2[typ]).replace("L",c1[typ]).replace("int32_t", "int64_t").replace("TYPE", typ) ) +#+end_src + +* Generated C file + +#+BEGIN_SRC c :comments link :tangle qsort.c :noweb yes +#include +#include +<> +#+END_SRC + +* Generated Fortran file + +#+BEGIN_SRC f90 :tangle qsort_module.f90 :noweb yes +module qsort_module + use iso_c_binding + + interface + <> + end interface + +end module qsort_module + +<> + +#+END_SRC + diff --git a/src/utils/qsort_module.f90 b/src/utils/qsort_module.f90 new file mode 100644 index 00000000..a72a4f9e --- /dev/null +++ b/src/utils/qsort_module.f90 @@ -0,0 +1,347 @@ +module qsort_module + use iso_c_binding + + interface + + subroutine i2sort_c(A, iorder, isize) bind(C, name="qsort_int16_t") + use iso_c_binding + integer(c_int32_t), value :: isize + integer(c_int32_t) :: iorder(isize) + integer (c_int16_t) :: A(isize) + end subroutine i2sort_c + + subroutine i2sort_noidx_c(A, isize) bind(C, name="qsort_int16_t_noidx") + use iso_c_binding + integer(c_int32_t), value :: isize + integer (c_int16_t) :: A(isize) + end subroutine i2sort_noidx_c + + + + subroutine i2sort_big_c(A, iorder, isize) bind(C, name="qsort_int16_t_big") + use iso_c_binding + integer(c_int64_t), value :: isize + integer(c_int64_t) :: iorder(isize) + integer (c_int16_t) :: A(isize) + end subroutine i2sort_big_c + + subroutine i2sort_noidx_big_c(A, isize) bind(C, name="qsort_int16_t_noidx_big") + use iso_c_binding + integer(c_int64_t), value :: isize + integer (c_int16_t) :: A(isize) + end subroutine i2sort_noidx_big_c + + + + subroutine isort_c(A, iorder, isize) bind(C, name="qsort_int32_t") + use iso_c_binding + integer(c_int32_t), value :: isize + integer(c_int32_t) :: iorder(isize) + integer (c_int32_t) :: A(isize) + end subroutine isort_c + + subroutine isort_noidx_c(A, isize) bind(C, name="qsort_int32_t_noidx") + use iso_c_binding + integer(c_int32_t), value :: isize + integer (c_int32_t) :: A(isize) + end subroutine isort_noidx_c + + + + subroutine isort_big_c(A, iorder, isize) bind(C, name="qsort_int32_t_big") + use iso_c_binding + integer(c_int64_t), value :: isize + integer(c_int64_t) :: iorder(isize) + integer (c_int32_t) :: A(isize) + end subroutine isort_big_c + + subroutine isort_noidx_big_c(A, isize) bind(C, name="qsort_int32_t_noidx_big") + use iso_c_binding + integer(c_int64_t), value :: isize + integer (c_int32_t) :: A(isize) + end subroutine isort_noidx_big_c + + + + subroutine i8sort_c(A, iorder, isize) bind(C, name="qsort_int64_t") + use iso_c_binding + integer(c_int32_t), value :: isize + integer(c_int32_t) :: iorder(isize) + integer (c_int64_t) :: A(isize) + end subroutine i8sort_c + + subroutine i8sort_noidx_c(A, isize) bind(C, name="qsort_int64_t_noidx") + use iso_c_binding + integer(c_int32_t), value :: isize + integer (c_int64_t) :: A(isize) + end subroutine i8sort_noidx_c + + + + subroutine i8sort_big_c(A, iorder, isize) bind(C, name="qsort_int64_t_big") + use iso_c_binding + integer(c_int64_t), value :: isize + integer(c_int64_t) :: iorder(isize) + integer (c_int64_t) :: A(isize) + end subroutine i8sort_big_c + + subroutine i8sort_noidx_big_c(A, isize) bind(C, name="qsort_int64_t_noidx_big") + use iso_c_binding + integer(c_int64_t), value :: isize + integer (c_int64_t) :: A(isize) + end subroutine i8sort_noidx_big_c + + + + subroutine dsort_c(A, iorder, isize) bind(C, name="qsort_double") + use iso_c_binding + integer(c_int32_t), value :: isize + integer(c_int32_t) :: iorder(isize) + real (c_double) :: A(isize) + end subroutine dsort_c + + subroutine dsort_noidx_c(A, isize) bind(C, name="qsort_double_noidx") + use iso_c_binding + integer(c_int32_t), value :: isize + real (c_double) :: A(isize) + end subroutine dsort_noidx_c + + + + subroutine dsort_big_c(A, iorder, isize) bind(C, name="qsort_double_big") + use iso_c_binding + integer(c_int64_t), value :: isize + integer(c_int64_t) :: iorder(isize) + real (c_double) :: A(isize) + end subroutine dsort_big_c + + subroutine dsort_noidx_big_c(A, isize) bind(C, name="qsort_double_noidx_big") + use iso_c_binding + integer(c_int64_t), value :: isize + real (c_double) :: A(isize) + end subroutine dsort_noidx_big_c + + + + subroutine sort_c(A, iorder, isize) bind(C, name="qsort_float") + use iso_c_binding + integer(c_int32_t), value :: isize + integer(c_int32_t) :: iorder(isize) + real (c_float) :: A(isize) + end subroutine sort_c + + subroutine sort_noidx_c(A, isize) bind(C, name="qsort_float_noidx") + use iso_c_binding + integer(c_int32_t), value :: isize + real (c_float) :: A(isize) + end subroutine sort_noidx_c + + + + subroutine sort_big_c(A, iorder, isize) bind(C, name="qsort_float_big") + use iso_c_binding + integer(c_int64_t), value :: isize + integer(c_int64_t) :: iorder(isize) + real (c_float) :: A(isize) + end subroutine sort_big_c + + subroutine sort_noidx_big_c(A, isize) bind(C, name="qsort_float_noidx_big") + use iso_c_binding + integer(c_int64_t), value :: isize + real (c_float) :: A(isize) + end subroutine sort_noidx_big_c + + + + end interface + +end module qsort_module + + +subroutine i2sort(A, iorder, isize) + use qsort_module + use iso_c_binding + integer(c_int32_t) :: isize + integer(c_int32_t) :: iorder(isize) + integer (c_int16_t) :: A(isize) + call i2sort_c(A, iorder, isize) +end subroutine i2sort + +subroutine i2sort_noidx(A, isize) + use iso_c_binding + use qsort_module + integer(c_int32_t) :: isize + integer (c_int16_t) :: A(isize) + call i2sort_noidx_c(A, isize) +end subroutine i2sort_noidx + + + +subroutine i2sort_big(A, iorder, isize) + use qsort_module + use iso_c_binding + integer(c_int64_t) :: isize + integer(c_int64_t) :: iorder(isize) + integer (c_int16_t) :: A(isize) + call i2sort_big_c(A, iorder, isize) +end subroutine i2sort_big + +subroutine i2sort_noidx_big(A, isize) + use iso_c_binding + use qsort_module + integer(c_int64_t) :: isize + integer (c_int16_t) :: A(isize) + call i2sort_noidx_big_c(A, isize) +end subroutine i2sort_noidx_big + + + +subroutine isort(A, iorder, isize) + use qsort_module + use iso_c_binding + integer(c_int32_t) :: isize + integer(c_int32_t) :: iorder(isize) + integer (c_int32_t) :: A(isize) + call isort_c(A, iorder, isize) +end subroutine isort + +subroutine isort_noidx(A, isize) + use iso_c_binding + use qsort_module + integer(c_int32_t) :: isize + integer (c_int32_t) :: A(isize) + call isort_noidx_c(A, isize) +end subroutine isort_noidx + + + +subroutine isort_big(A, iorder, isize) + use qsort_module + use iso_c_binding + integer(c_int64_t) :: isize + integer(c_int64_t) :: iorder(isize) + integer (c_int32_t) :: A(isize) + call isort_big_c(A, iorder, isize) +end subroutine isort_big + +subroutine isort_noidx_big(A, isize) + use iso_c_binding + use qsort_module + integer(c_int64_t) :: isize + integer (c_int32_t) :: A(isize) + call isort_noidx_big_c(A, isize) +end subroutine isort_noidx_big + + + +subroutine i8sort(A, iorder, isize) + use qsort_module + use iso_c_binding + integer(c_int32_t) :: isize + integer(c_int32_t) :: iorder(isize) + integer (c_int64_t) :: A(isize) + call i8sort_c(A, iorder, isize) +end subroutine i8sort + +subroutine i8sort_noidx(A, isize) + use iso_c_binding + use qsort_module + integer(c_int32_t) :: isize + integer (c_int64_t) :: A(isize) + call i8sort_noidx_c(A, isize) +end subroutine i8sort_noidx + + + +subroutine i8sort_big(A, iorder, isize) + use qsort_module + use iso_c_binding + integer(c_int64_t) :: isize + integer(c_int64_t) :: iorder(isize) + integer (c_int64_t) :: A(isize) + call i8sort_big_c(A, iorder, isize) +end subroutine i8sort_big + +subroutine i8sort_noidx_big(A, isize) + use iso_c_binding + use qsort_module + integer(c_int64_t) :: isize + integer (c_int64_t) :: A(isize) + call i8sort_noidx_big_c(A, isize) +end subroutine i8sort_noidx_big + + + +subroutine dsort(A, iorder, isize) + use qsort_module + use iso_c_binding + integer(c_int32_t) :: isize + integer(c_int32_t) :: iorder(isize) + real (c_double) :: A(isize) + call dsort_c(A, iorder, isize) +end subroutine dsort + +subroutine dsort_noidx(A, isize) + use iso_c_binding + use qsort_module + integer(c_int32_t) :: isize + real (c_double) :: A(isize) + call dsort_noidx_c(A, isize) +end subroutine dsort_noidx + + + +subroutine dsort_big(A, iorder, isize) + use qsort_module + use iso_c_binding + integer(c_int64_t) :: isize + integer(c_int64_t) :: iorder(isize) + real (c_double) :: A(isize) + call dsort_big_c(A, iorder, isize) +end subroutine dsort_big + +subroutine dsort_noidx_big(A, isize) + use iso_c_binding + use qsort_module + integer(c_int64_t) :: isize + real (c_double) :: A(isize) + call dsort_noidx_big_c(A, isize) +end subroutine dsort_noidx_big + + + +subroutine sort(A, iorder, isize) + use qsort_module + use iso_c_binding + integer(c_int32_t) :: isize + integer(c_int32_t) :: iorder(isize) + real (c_float) :: A(isize) + call sort_c(A, iorder, isize) +end subroutine sort + +subroutine sort_noidx(A, isize) + use iso_c_binding + use qsort_module + integer(c_int32_t) :: isize + real (c_float) :: A(isize) + call sort_noidx_c(A, isize) +end subroutine sort_noidx + + + +subroutine sort_big(A, iorder, isize) + use qsort_module + use iso_c_binding + integer(c_int64_t) :: isize + integer(c_int64_t) :: iorder(isize) + real (c_float) :: A(isize) + call sort_big_c(A, iorder, isize) +end subroutine sort_big + +subroutine sort_noidx_big(A, isize) + use iso_c_binding + use qsort_module + integer(c_int64_t) :: isize + real (c_float) :: A(isize) + call sort_noidx_big_c(A, isize) +end subroutine sort_noidx_big diff --git a/src/utils/sort.irp.f b/src/utils/sort.irp.f index ff40263c..089c3871 100644 --- a/src/utils/sort.irp.f +++ b/src/utils/sort.irp.f @@ -1,222 +1,4 @@ BEGIN_TEMPLATE - subroutine insertion_$Xsort (x,iorder,isize) - implicit none - BEGIN_DOC - ! Sort array x(isize) using the insertion sort algorithm. - ! iorder in input should be (1,2,3,...,isize), and in output - ! contains the new order of the elements. - END_DOC - integer,intent(in) :: isize - $type,intent(inout) :: x(isize) - integer,intent(inout) :: iorder(isize) - $type :: xtmp - integer :: i, i0, j, jmax - - do i=2,isize - xtmp = x(i) - i0 = iorder(i) - j=i-1 - do while (j>0) - if ((x(j) <= xtmp)) exit - x(j+1) = x(j) - iorder(j+1) = iorder(j) - j=j-1 - enddo - x(j+1) = xtmp - iorder(j+1) = i0 - enddo - end subroutine insertion_$Xsort - - subroutine quick_$Xsort(x, iorder, isize) - implicit none - BEGIN_DOC - ! Sort array x(isize) using the quicksort algorithm. - ! iorder in input should be (1,2,3,...,isize), and in output - ! contains the new order of the elements. - END_DOC - integer,intent(in) :: isize - $type,intent(inout) :: x(isize) - integer,intent(inout) :: iorder(isize) - integer, external :: omp_get_num_threads - call rec_$X_quicksort(x,iorder,isize,1,isize,nproc) - end - - recursive subroutine rec_$X_quicksort(x, iorder, isize, first, last, level) - implicit none - integer, intent(in) :: isize, first, last, level - integer,intent(inout) :: iorder(isize) - $type, intent(inout) :: x(isize) - $type :: c, tmp - integer :: itmp - integer :: i, j - - if(isize<2)return - - c = x( shiftr(first+last,1) ) - i = first - j = last - do - do while (x(i) < c) - i=i+1 - end do - do while (c < x(j)) - j=j-1 - end do - if (i >= j) exit - tmp = x(i) - x(i) = x(j) - x(j) = tmp - itmp = iorder(i) - iorder(i) = iorder(j) - iorder(j) = itmp - i=i+1 - j=j-1 - enddo - if ( ((i-first <= 10000).and.(last-j <= 10000)).or.(level<=0) ) then - if (first < i-1) then - call rec_$X_quicksort(x, iorder, isize, first, i-1,level/2) - endif - if (j+1 < last) then - call rec_$X_quicksort(x, iorder, isize, j+1, last,level/2) - endif - else - if (first < i-1) then - call rec_$X_quicksort(x, iorder, isize, first, i-1,level/2) - endif - if (j+1 < last) then - call rec_$X_quicksort(x, iorder, isize, j+1, last,level/2) - endif - endif - end - - subroutine heap_$Xsort(x,iorder,isize) - implicit none - BEGIN_DOC - ! Sort array x(isize) using the heap sort algorithm. - ! iorder in input should be (1,2,3,...,isize), and in output - ! contains the new order of the elements. - END_DOC - integer,intent(in) :: isize - $type,intent(inout) :: x(isize) - integer,intent(inout) :: iorder(isize) - - integer :: i, k, j, l, i0 - $type :: xtemp - - l = isize/2+1 - k = isize - do while (.True.) - if (l>1) then - l=l-1 - xtemp = x(l) - i0 = iorder(l) - else - xtemp = x(k) - i0 = iorder(k) - x(k) = x(1) - iorder(k) = iorder(1) - k = k-1 - if (k == 1) then - x(1) = xtemp - iorder(1) = i0 - exit - endif - endif - i=l - j = shiftl(l,1) - do while (j1) then - l=l-1 - xtemp = x(l) - i0 = iorder(l) - else - xtemp = x(k) - i0 = iorder(k) - x(k) = x(1) - iorder(k) = iorder(1) - k = k-1 - if (k == 1) then - x(1) = xtemp - iorder(1) = i0 - exit - endif - endif - i=l - j = shiftl(l,1) - do while (j0_8) - if (x(j)<=xtmp) exit - x(j+1_8) = x(j) - iorder(j+1_8) = iorder(j) - j = j-1_8 - enddo - x(j+1_8) = xtmp - iorder(j+1_8) = i0 - enddo - - end subroutine insertion_$Xsort_big - subroutine $Xset_order_big(x,iorder,isize) implicit none BEGIN_DOC @@ -565,223 +90,3 @@ SUBST [ X, type ] END_TEMPLATE -BEGIN_TEMPLATE - -recursive subroutine $Xradix_sort$big(x,iorder,isize,iradix) - implicit none - - BEGIN_DOC - ! Sort integer array x(isize) using the radix sort algorithm. - ! iorder in input should be (1,2,3,...,isize), and in output - ! contains the new order of the elements. - ! iradix should be -1 in input. - END_DOC - integer*$int_type, intent(in) :: isize - integer*$int_type, intent(inout) :: iorder(isize) - integer*$type, intent(inout) :: x(isize) - integer, intent(in) :: iradix - integer :: iradix_new - integer*$type, allocatable :: x2(:), x1(:) - integer*$type :: i4 ! data type - integer*$int_type, allocatable :: iorder1(:),iorder2(:) - integer*$int_type :: i0, i1, i2, i3, i ! index type - integer*$type :: mask - integer :: err - !DIR$ ATTRIBUTES ALIGN : 128 :: iorder1,iorder2, x2, x1 - - if (isize < 2) then - return - endif - - if (iradix == -1) then ! Sort Positive and negative - - allocate(x1(isize),iorder1(isize), x2(isize),iorder2(isize),stat=err) - if (err /= 0) then - print *, irp_here, ': Unable to allocate arrays' - stop - endif - - i1=1_$int_type - i2=1_$int_type - do i=1_$int_type,isize - if (x(i) < 0_$type) then - iorder1(i1) = iorder(i) - x1(i1) = -x(i) - i1 = i1+1_$int_type - else - iorder2(i2) = iorder(i) - x2(i2) = x(i) - i2 = i2+1_$int_type - endif - enddo - i1=i1-1_$int_type - i2=i2-1_$int_type - - do i=1_$int_type,i2 - iorder(i1+i) = iorder2(i) - x(i1+i) = x2(i) - enddo - deallocate(x2,iorder2,stat=err) - if (err /= 0) then - print *, irp_here, ': Unable to deallocate arrays x2, iorder2' - stop - endif - - - if (i1 > 1_$int_type) then - call $Xradix_sort$big(x1,iorder1,i1,-2) - do i=1_$int_type,i1 - x(i) = -x1(1_$int_type+i1-i) - iorder(i) = iorder1(1_$int_type+i1-i) - enddo - endif - - if (i2>1_$int_type) then - call $Xradix_sort$big(x(i1+1_$int_type),iorder(i1+1_$int_type),i2,-2) - endif - - deallocate(x1,iorder1,stat=err) - if (err /= 0) then - print *, irp_here, ': Unable to deallocate arrays x1, iorder1' - stop - endif - return - - else if (iradix == -2) then ! Positive - - ! Find most significant bit - - i0 = 0_$int_type - i4 = maxval(x) - - iradix_new = max($integer_size-1-leadz(i4),1) - mask = ibset(0_$type,iradix_new) - - allocate(x1(isize),iorder1(isize), x2(isize),iorder2(isize),stat=err) - if (err /= 0) then - print *, irp_here, ': Unable to allocate arrays' - stop - endif - - i1=1_$int_type - i2=1_$int_type - - do i=1_$int_type,isize - if (iand(mask,x(i)) == 0_$type) then - iorder1(i1) = iorder(i) - x1(i1) = x(i) - i1 = i1+1_$int_type - else - iorder2(i2) = iorder(i) - x2(i2) = x(i) - i2 = i2+1_$int_type - endif - enddo - i1=i1-1_$int_type - i2=i2-1_$int_type - - do i=1_$int_type,i1 - iorder(i0+i) = iorder1(i) - x(i0+i) = x1(i) - enddo - i0 = i0+i1 - i3 = i0 - deallocate(x1,iorder1,stat=err) - if (err /= 0) then - print *, irp_here, ': Unable to deallocate arrays x1, iorder1' - stop - endif - - - do i=1_$int_type,i2 - iorder(i0+i) = iorder2(i) - x(i0+i) = x2(i) - enddo - i0 = i0+i2 - deallocate(x2,iorder2,stat=err) - if (err /= 0) then - print *, irp_here, ': Unable to deallocate arrays x2, iorder2' - stop - endif - - - if (i3>1_$int_type) then - call $Xradix_sort$big(x,iorder,i3,iradix_new-1) - endif - - if (isize-i3>1_$int_type) then - call $Xradix_sort$big(x(i3+1_$int_type),iorder(i3+1_$int_type),isize-i3,iradix_new-1) - endif - - return - endif - - ASSERT (iradix >= 0) - - if (isize < 48) then - call insertion_$Xsort$big(x,iorder,isize) - return - endif - - - allocate(x2(isize),iorder2(isize),stat=err) - if (err /= 0) then - print *, irp_here, ': Unable to allocate arrays x1, iorder1' - stop - endif - - - mask = ibset(0_$type,iradix) - i0=1_$int_type - i1=1_$int_type - - do i=1_$int_type,isize - if (iand(mask,x(i)) == 0_$type) then - iorder(i0) = iorder(i) - x(i0) = x(i) - i0 = i0+1_$int_type - else - iorder2(i1) = iorder(i) - x2(i1) = x(i) - i1 = i1+1_$int_type - endif - enddo - i0=i0-1_$int_type - i1=i1-1_$int_type - - do i=1_$int_type,i1 - iorder(i0+i) = iorder2(i) - x(i0+i) = x2(i) - enddo - - deallocate(x2,iorder2,stat=err) - if (err /= 0) then - print *, irp_here, ': Unable to allocate arrays x2, iorder2' - stop - endif - - - if (iradix == 0) then - return - endif - - - if (i1>1_$int_type) then - call $Xradix_sort$big(x(i0+1_$int_type),iorder(i0+1_$int_type),i1,iradix-1) - endif - if (i0>1) then - call $Xradix_sort$big(x,iorder,i0,iradix-1) - endif - - end - -SUBST [ X, type, integer_size, is_big, big, int_type ] - i ; 4 ; 32 ; .False. ; ; 4 ;; - i8 ; 8 ; 64 ; .False. ; ; 4 ;; - i2 ; 2 ; 16 ; .False. ; ; 4 ;; - i ; 4 ; 32 ; .True. ; _big ; 8 ;; - i8 ; 8 ; 64 ; .True. ; _big ; 8 ;; -END_TEMPLATE - - - diff --git a/src/utils/units.irp.f b/src/utils/units.irp.f new file mode 100644 index 00000000..1850b28b --- /dev/null +++ b/src/utils/units.irp.f @@ -0,0 +1,22 @@ +BEGIN_PROVIDER [double precision, ha_to_ev] + + implicit none + BEGIN_DOC + ! Converstion from Hartree to eV + END_DOC + + ha_to_ev = 27.211396641308d0 + +END_PROVIDER + +BEGIN_PROVIDER [double precision, au_to_D] + + implicit none + BEGIN_DOC + ! Converstion from au to Debye + END_DOC + + au_to_D = 2.5415802529d0 + +END_PROVIDER + diff --git a/src/utils/util.irp.f b/src/utils/util.irp.f index ef846bdb..e7f00ce2 100644 --- a/src/utils/util.irp.f +++ b/src/utils/util.irp.f @@ -37,6 +37,10 @@ double precision function binom_func(i,j) else binom_func = dexp( logfact(i)-logfact(j)-logfact(i-j) ) endif + + ! To avoid .999999 numbers + binom_func = floor(binom_func + 0.5d0) + end @@ -132,7 +136,7 @@ double precision function logfact(n) enddo end function - +! --- BEGIN_PROVIDER [ double precision, fact_inv, (128) ] implicit none @@ -146,6 +150,29 @@ BEGIN_PROVIDER [ double precision, fact_inv, (128) ] enddo END_PROVIDER +! --- + +BEGIN_PROVIDER [ double precision, shiftfact_op5_inv, (128) ] + + BEGIN_DOC + ! + ! 1 / Gamma(n + 0.5) + ! + END_DOC + + implicit none + integer :: i + double precision :: tmp + + do i = 1, size(shiftfact_op5_inv) + !tmp = dgamma(dble(i) + 0.5d0) + tmp = gamma(dble(i) + 0.5d0) + shiftfact_op5_inv(i) = 1.d0 / tmp + enddo + +END_PROVIDER + +! --- double precision function dble_fact(n) implicit none @@ -300,12 +327,12 @@ subroutine wall_time(t) end BEGIN_PROVIDER [ integer, nproc ] - use omp_lib implicit none BEGIN_DOC ! Number of current OpenMP threads END_DOC + integer, external :: omp_get_num_threads nproc = 1 !$OMP PARALLEL !$OMP MASTER @@ -407,3 +434,28 @@ subroutine lowercase(txt,n) enddo end +subroutine v2_over_x(v,x,res) + + !BEGIN_DOC + ! Two by two diagonalization to avoid the divergence in v^2/x when x goes to 0 + !END_DOC + + implicit none + + double precision, intent(in) :: v, x + double precision, intent(out) :: res + + double precision :: delta_E, tmp, val + + res = 0d0 + delta_E = x + if (v == 0.d0) return + + val = 2d0 * v + tmp = dsqrt(delta_E * delta_E + val * val) + if (delta_E < 0.d0) then + tmp = -tmp + endif + res = 0.5d0 * (tmp - delta_E) + +end