9
1
mirror of https://github.com/QuantumPackage/qp2.git synced 2025-01-12 13:08:08 +01:00
qp2/bin/qp_mpirun

95 lines
2.0 KiB
Plaintext
Raw Normal View History

2019-01-25 11:39:31 +01:00
#!/bin/bash
function error() {
>&2 echo "$(basename $0): $@"
exit 2
}
set -e
PROG=$1
INPUT=$2
case ${PROG} in
-h|--help)
exec qp_run --help
;;
esac
NODES=($(mpirun hostname))
# Test that there is one MPI process per node
NPROC=$(echo ${NODES[@]} | tr ' ' '\n' | sort | wc -l)
NUNIQ=$(echo ${NODES[@]} | tr ' ' '\n' | sort | uniq | wc -l)
if [[ $NPROC != $NUNIQ ]] ; then
error "
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Error:
There are more than one process per host.
In your SLURM script file, use:
#SBATCH --nodes=$NPROC
#SBATCH --ntasks-per-node=1
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
"
exit -1
fi
# Check if quantum_package.rc is sourced
if [[ -z ${QP_ROOT} ]] ; then
echo "Error: quantum_package.rc is not sourced"
exit -1
fi
source ${QP_ROOT}/quantum_package.rc
# Get host names
MASTER_NODE=${NODES[0]}
SLAVE_NODES=$(echo ${NODES[@]:1}| tr ' ' ',')
if [[ $NPROC -gt 1 ]] ; then
echo "Master : $MASTER_NODE"
echo "Slaves : $SLAVE_NODES"
fi
# Check if the integrals can be read
qp set_file $INPUT
RW=$(qp get mo_two_e_ints io_mo_two_e_integrals)
if [[ $RW != Read ]] ; then
echo "
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Warning:
Two-electron integrals were not saved to disk in a previous run.
If the 4-index transformation takes time, you may consider
killing this job and running
qp_run four_idx_transform $INPUT
as a single-node job before re-submitting the current job.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
"
fi
rm --force -- "${INPUT}"/work/qp_run_address
set -x
mpiexec.hydra -n 1 -hosts "$MASTER_NODE" qp_run $PROG $INPUT &
if [[ $NPROC -gt 1 ]] ; then
while [[ ! -f $INPUT/work/qp_run_address ]] ; do
sleep 1
done
sleep 10
echo "Starting slaves"
mpiexec.hydra -n $((${SLURM_NTASKS}-1)) -hosts "$SLAVE_NODES" \
qp_run --slave="$PROG" $INPUT > $INPUT.slaves.out
fi
wait