Skip to content

Commit

Permalink
LLNL LC srun mystery resolution (#19425) (#19428)
Browse files Browse the repository at this point in the history
* bonus: allow newer vers of macos, target ventura (macos 13)

* llnl srun mystery resolution and host profile updates

* tweak to visit install open

* bengal update

* fix poodle batch default

* copy proper tarballs from lc user workspace
  • Loading branch information
cyrush committed Mar 27, 2024
1 parent eaf9954 commit 803c72d
Show file tree
Hide file tree
Showing 17 changed files with 125 additions and 48 deletions.
35 changes: 13 additions & 22 deletions src/resources/hosts/llnl/customlauncher
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,9 @@ class JobSubmitter_bsub_LLNL(JobSubmitter):
# Eric Brugger, Fri Feb 17 14:33:43 PST 2023
# Added logic to set the visitarch appropriately on toss4 x86_64 systems.
#
# Cyrus Harrison, Wed Mar 27 09:45:27 PDT 2024
# Cleanup and added toss4 logic needed to run on CTS-2 with srun.
#
###############################################################################

class LLNLLauncher(MainLauncher):
Expand All @@ -248,21 +251,6 @@ class LLNLLauncher(MainLauncher):
return IP

def Customize(self):
#
# BG/Q networking changes for parallel engine. We override the host
# with an IP address.
#
if self.parallelArgs.parallel and \
(self.generalArgs.exe_name.find("_par") != -1 or \
self.generalArgs.exe_name.find("_ser") != -1):
if self.sectorname() == "vulcanlac" or \
self.sectorname() == "rzuseqlac" or \
self.sectorname() == "seqlac":
self.generalArgs.host = self.GetIPAddress()
self.generalArgs.guesshost = 0
self.generalArgs.sshtunneling = 0
self.generalArgs.noloopback = 1

#
# Convert the host name to the ip address on jade.
#
Expand All @@ -283,27 +271,30 @@ class LLNLLauncher(MainLauncher):
elif self.generalArgs.host == "agate5.llnl.gov":
self.generalArgs.host = "130.106.204.3"

#
# TODO: This is for trinity, what do we need (if anything) for crossroads (xr-fe)?
#
# Set the LD_LIBRARY_PATH on trinity to include the directory
# with the standard C++ library used by the compiler used to
# compile visit.
#
if self.sectorname() == "tr-fe" or self.sectorname() == "nid":
ld_library_path = GETENV("LD_LIBRARY_PATH")
new_ld_library_path = self.joinpaths(["/opt/gcc/9.3.0/snos/lib64", ld_library_path])
SETENV("LD_LIBRARY_PATH", new_ld_library_path)

#
# Set the LD_LIBRARY_PATH to include the path to MPI on toss3 or
# toss4 x86_64 systems.
# Set the LD_LIBRARY_PATH to include the path to MPI on toss4 x86_64 systems.
#
sys_type = GETENV("SYS_TYPE")
if sys_type == "toss_3_x86_64" or sys_type == "toss_3_x86_64_ib":
mpi_ld_library_paths = ["/usr/tce/packages/mvapich2/mvapich2-2.2-intel-16.0.3/lib", "/usr/tce/packages/intel/intel-16.0.3/lib/intel64"]
SETENV("LD_LIBRARY_PATH", self.joinpaths(mpi_ld_library_paths))
elif sys_type == "toss_4_x86_64" or sys_type == "toss_4_x86_64_ib":
if sys_type == "toss_4_x86_64" or sys_type == "toss_4_x86_64_ib":
mpi_ld_library_paths = ["/usr/tce/packages/mvapich2-tce/mvapich2-2.3.6-gcc-10.3.1/lib"]
SETENV("LD_LIBRARY_PATH", self.joinpaths(mpi_ld_library_paths))
#######################
# srun env setting fix
#######################
# w/o these direct srun to debug queues will hang for task counts > 36 per node
SETENV("MV2_ENABLE_TOPO_AWARE_COLLECTIVES", 0)
SETENV("MV2_USE_SHMEM_COLL", 0)

# Unset LD_PRELOAD
UNSETENV("LD_PRELOAD")
Expand Down
89 changes: 89 additions & 0 deletions src/resources/hosts/llnl/host_llnl_dane.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
<?xml version="1.0"?>
<Object name="MachineProfile">
<Field name="host" type="string">dane.llnl.gov</Field>
<Field name="hostAliases" type="string">dane#.llnl.gov dane##.llnl.gov dane###.llnl.gov dane####.llnl.gov dane# dane## dane### dane####</Field>
<Field name="hostNickname" type="string">LLNL Dane</Field>
<Field name="directory" type="string">/usr/gapps/visit</Field>
<Field name="clientHostDetermination" type="string">ParsedFromSSHCLIENT</Field>
<Field name="tunnelSSH" type="bool">true</Field>
<Object name="LaunchProfile">
<Field name="profileName" type="string">serial</Field>
<Field name="timeout" type="int">480</Field>
<Field name="numProcessors" type="int">1</Field>
<Field name="numNodesSet" type="bool">false</Field>
<Field name="numNodes" type="int">0</Field>
<Field name="partitionSet" type="bool">false</Field>
<Field name="partition" type="string"></Field>
<Field name="bankSet" type="bool">false</Field>
<Field name="bank" type="string"></Field>
<Field name="timeLimitSet" type="bool">false</Field>
<Field name="timeLimit" type="string"></Field>
<Field name="launchMethodSet" type="bool">false</Field>
<Field name="launchMethod" type="string"></Field>
<Field name="forceStatic" type="bool">true</Field>
<Field name="forceDynamic" type="bool">false</Field>
<Field name="arguments" type="stringVector"></Field>
<Field name="parallel" type="bool">false</Field>
</Object>
<Object name="LaunchProfile">
<Field name="profileName" type="string">parallel interactive pdebug</Field>
<Field name="timeout" type="int">480</Field>
<Field name="numProcessors" type="int">56</Field>
<Field name="numNodesSet" type="bool">false</Field>
<Field name="numNodes" type="int">0</Field>
<Field name="partitionSet" type="bool">true</Field>
<Field name="partition" type="string">pdebug</Field>
<Field name="bankSet" type="bool">false</Field>
<Field name="bank" type="string"></Field>
<Field name="timeLimitSet" type="bool">false</Field>
<Field name="timeLimit" type="string"></Field>
<Field name="launchMethodSet" type="bool">true</Field>
<Field name="launchMethod" type="string">srun</Field>
<Field name="forceStatic" type="bool">true</Field>
<Field name="forceDynamic" type="bool">false</Field>
<Field name="active" type="bool">false</Field>
<Field name="launchArgsSet" type="bool">true</Field>
<Field name="parallel" type="bool">true</Field>
</Object>
<Object name="LaunchProfile">
<Field name="profileName" type="string">parallel batch pbatch</Field>
<Field name="timeout" type="int">480</Field>
<Field name="numProcessors" type="int">112</Field>
<Field name="numNodesSet" type="bool">true</Field>
<Field name="numNodes" type="int">1</Field>
<Field name="partitionSet" type="bool">true</Field>
<Field name="partition" type="string">pbatch</Field>
<Field name="bankSet" type="bool">true</Field>
<Field name="bank" type="string">wbronze</Field>
<Field name="timeLimitSet" type="bool">true</Field>
<Field name="timeLimit" type="string">30:00</Field>
<Field name="launchMethodSet" type="bool">true</Field>
<Field name="launchMethod" type="string">sbatch/srun</Field>
<Field name="forceStatic" type="bool">true</Field>
<Field name="forceDynamic" type="bool">false</Field>
<Field name="active" type="bool">false</Field>
<Field name="arguments" type="stringVector"></Field>
<Field name="parallel" type="bool">true</Field>
</Object>
<Object name="LaunchProfile">
<Field name="profileName" type="string">parallel sxterm</Field>
<Field name="timeout" type="int">480</Field>
<Field name="numProcessors" type="int">112</Field>
<Field name="numNodesSet" type="bool">false</Field>
<Field name="numNodes" type="int">0</Field>
<Field name="partitionSet" type="bool">false</Field>
<Field name="partition" type="string"></Field>
<Field name="bankSet" type="bool">false</Field>
<Field name="bank" type="string"></Field>
<Field name="timeLimitSet" type="bool">false</Field>
<Field name="timeLimit" type="string"></Field>
<Field name="launchMethodSet" type="bool">true</Field>
<Field name="launchMethod" type="string">srun</Field>
<Field name="forceStatic" type="bool">true</Field>
<Field name="forceDynamic" type="bool">false</Field>
<Field name="active" type="bool">false</Field>
<Field name="arguments" type="stringVector"> </Field>
<Field name="parallel" type="bool">true</Field>
</Object>
<Field name="activeProfile" type="int">0</Field>
</Object>
1 change: 0 additions & 1 deletion src/resources/hosts/llnl/host_llnl_pascal.xml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
<Field name="forceDynamic" type="bool">false</Field>
<Field name="arguments" type="stringVector"></Field>
<Field name="launchArgsSet" type="bool">true</Field>
<Field name="launchArgs" type="string">--exclusive</Field>
<Field name="parallel" type="bool">false</Field>
</Object>
<Object name="LaunchProfile">
Expand Down
3 changes: 1 addition & 2 deletions src/resources/hosts/llnl/host_llnl_poodle.xml
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,12 @@
<Field name="active" type="bool">false</Field>
<Field name="arguments" type="stringVector"></Field>
<Field name="launchArgsSet" type="bool">true</Field>
<Field name="launchArgs" type="string">--exclusive</Field>
<Field name="parallel" type="bool">true</Field>
</Object>
<Object name="LaunchProfile">
<Field name="profileName" type="string">parallel batch pbatch</Field>
<Field name="timeout" type="int">480</Field>
<Field name="numProcessors" type="int">112</Field>
<Field name="numProcessors" type="int">56</Field>
<Field name="numNodesSet" type="bool">true</Field>
<Field name="numNodes" type="int">1</Field>
<Field name="partitionSet" type="bool">true</Field>
Expand Down
1 change: 0 additions & 1 deletion src/resources/hosts/llnl/host_llnl_rzgenie.xml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@
<Field name="forceStatic" type="bool">true</Field>
<Field name="forceDynamic" type="bool">false</Field>
<Field name="active" type="bool">false</Field>
<Field name="arguments" type="stringVector">--exclusive</Field>
<Field name="parallel" type="bool">true</Field>
</Object>
<Object name="LaunchProfile">
Expand Down
1 change: 0 additions & 1 deletion src/resources/hosts/llnl/host_llnl_rzhound.xml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@
<Field name="active" type="bool">false</Field>
<Field name="arguments" type="stringVector"></Field>
<Field name="launchArgsSet" type="bool">true</Field>
<Field name="launchArgs" type="string">--exclusive</Field>
<Field name="parallel" type="bool">true</Field>
</Object>
<Object name="LaunchProfile">
Expand Down
1 change: 0 additions & 1 deletion src/resources/hosts/llnl/host_llnl_rztopaz.xml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@
<Field name="forceStatic" type="bool">true</Field>
<Field name="forceDynamic" type="bool">false</Field>
<Field name="active" type="bool">false</Field>
<Field name="arguments" type="stringVector">--exclusive</Field>
<Field name="parallel" type="bool">true</Field>
</Object>
<Object name="LaunchProfile">
Expand Down
1 change: 0 additions & 1 deletion src/resources/hosts/llnl/host_llnl_rztrona.xml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@
<Field name="forceStatic" type="bool">true</Field>
<Field name="forceDynamic" type="bool">false</Field>
<Field name="active" type="bool">false</Field>
<Field name="arguments" type="stringVector">--exclusive</Field>
<Field name="parallel" type="bool">true</Field>
</Object>
<Object name="LaunchProfile">
Expand Down
7 changes: 3 additions & 4 deletions src/resources/hosts/llnl/host_llnl_rzwhippet.xml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
<Object name="LaunchProfile">
<Field name="profileName" type="string">parallel interactive pdebug</Field>
<Field name="timeout" type="int">480</Field>
<Field name="numProcessors" type="int">36</Field>
<Field name="numProcessors" type="int">56</Field>
<Field name="numNodesSet" type="bool">true</Field>
<Field name="numNodes" type="int">1</Field>
<Field name="partitionSet" type="bool">true</Field>
Expand All @@ -43,13 +43,12 @@
<Field name="active" type="bool">false</Field>
<Field name="arguments" type="stringVector"></Field>
<Field name="launchArgsSet" type="bool">true</Field>
<Field name="launchArgs" type="string">--exclusive</Field>
<Field name="parallel" type="bool">true</Field>
</Object>
<Object name="LaunchProfile">
<Field name="profileName" type="string">parallel batch phighmem</Field>
<Field name="timeout" type="int">480</Field>
<Field name="numProcessors" type="int">8</Field>
<Field name="numProcessors" type="int">28</Field>
<Field name="numNodesSet" type="bool">true</Field>
<Field name="numNodes" type="int">1</Field>
<Field name="partitionSet" type="bool">true</Field>
Expand All @@ -69,7 +68,7 @@
<Object name="LaunchProfile">
<Field name="profileName" type="string">parallel sxterm</Field>
<Field name="timeout" type="int">480</Field>
<Field name="numProcessors" type="int">36</Field>
<Field name="numProcessors" type="int">56</Field>
<Field name="numNodesSet" type="bool">false</Field>
<Field name="numNodes" type="int">0</Field>
<Field name="partitionSet" type="bool">false</Field>
Expand Down
5 changes: 2 additions & 3 deletions src/resources/hosts/llnl_closed/host_llnl_closed_bengal.xml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
<Object name="LaunchProfile">
<Field name="profileName" type="string">parallel interactive pdebug</Field>
<Field name="timeout" type="int">480</Field>
<Field name="numProcessors" type="int">112</Field>
<Field name="numProcessors" type="int">56</Field>
<Field name="numNodesSet" type="bool">false</Field>
<Field name="numNodes" type="int">0</Field>
<Field name="partitionSet" type="bool">true</Field>
Expand All @@ -42,13 +42,12 @@
<Field name="forceStatic" type="bool">true</Field>
<Field name="forceDynamic" type="bool">false</Field>
<Field name="active" type="bool">false</Field>
<Field name="arguments" type="stringVector">--exclusive</Field>
<Field name="parallel" type="bool">true</Field>
</Object>
<Object name="LaunchProfile">
<Field name="profileName" type="string">parallel batch pbatch</Field>
<Field name="timeout" type="int">480</Field>
<Field name="numProcessors" type="int">112</Field>
<Field name="numProcessors" type="int">56</Field>
<Field name="numNodesSet" type="bool">true</Field>
<Field name="numNodes" type="int">1</Field>
<Field name="partitionSet" type="bool">true</Field>
Expand Down
1 change: 0 additions & 1 deletion src/resources/hosts/llnl_rz/host_llnl_rzgenie.xml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@
<Field name="forceStatic" type="bool">true</Field>
<Field name="forceDynamic" type="bool">false</Field>
<Field name="active" type="bool">false</Field>
<Field name="arguments" type="stringVector">--exclusive</Field>
<Field name="parallel" type="bool">true</Field>
</Object>
<Object name="LaunchProfile">
Expand Down
1 change: 0 additions & 1 deletion src/resources/hosts/llnl_rz/host_llnl_rzhound.xml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@
<Field name="active" type="bool">false</Field>
<Field name="arguments" type="stringVector"></Field>
<Field name="launchArgsSet" type="bool">true</Field>
<Field name="launchArgs" type="string">--exclusive</Field>
<Field name="parallel" type="bool">true</Field>
</Object>
<Object name="LaunchProfile">
Expand Down
1 change: 0 additions & 1 deletion src/resources/hosts/llnl_rz/host_llnl_rztopaz.xml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@
<Field name="forceStatic" type="bool">true</Field>
<Field name="forceDynamic" type="bool">false</Field>
<Field name="active" type="bool">false</Field>
<Field name="arguments" type="stringVector">--exclusive</Field>
<Field name="parallel" type="bool">true</Field>
</Object>
<Object name="LaunchProfile">
Expand Down
1 change: 0 additions & 1 deletion src/resources/hosts/llnl_rz/host_llnl_rztrona.xml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@
<Field name="forceStatic" type="bool">true</Field>
<Field name="forceDynamic" type="bool">false</Field>
<Field name="active" type="bool">false</Field>
<Field name="arguments" type="stringVector">--exclusive</Field>
<Field name="parallel" type="bool">true</Field>
</Object>
<Object name="LaunchProfile">
Expand Down
7 changes: 3 additions & 4 deletions src/resources/hosts/llnl_rz/host_llnl_rzwhippet.xml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
<Object name="LaunchProfile">
<Field name="profileName" type="string">parallel interactive pdebug</Field>
<Field name="timeout" type="int">480</Field>
<Field name="numProcessors" type="int">36</Field>
<Field name="numProcessors" type="int">56</Field>
<Field name="numNodesSet" type="bool">true</Field>
<Field name="numNodes" type="int">1</Field>
<Field name="partitionSet" type="bool">true</Field>
Expand All @@ -43,13 +43,12 @@
<Field name="active" type="bool">false</Field>
<Field name="arguments" type="stringVector"></Field>
<Field name="launchArgsSet" type="bool">true</Field>
<Field name="launchArgs" type="string">--exclusive</Field>
<Field name="parallel" type="bool">true</Field>
</Object>
<Object name="LaunchProfile">
<Field name="profileName" type="string">parallel batch phighmem</Field>
<Field name="timeout" type="int">480</Field>
<Field name="numProcessors" type="int">8</Field>
<Field name="numProcessors" type="int">28</Field>
<Field name="numNodesSet" type="bool">true</Field>
<Field name="numNodes" type="int">1</Field>
<Field name="partitionSet" type="bool">true</Field>
Expand All @@ -69,7 +68,7 @@
<Object name="LaunchProfile">
<Field name="profileName" type="string">parallel sxterm</Field>
<Field name="timeout" type="int">480</Field>
<Field name="numProcessors" type="int">36</Field>
<Field name="numProcessors" type="int">56</Field>
<Field name="numNodesSet" type="bool">false</Field>
<Field name="numNodes" type="int">0</Field>
<Field name="partitionSet" type="bool">false</Field>
Expand Down
10 changes: 9 additions & 1 deletion src/tools/dev/scripts/bv_support/bv_main.sh
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,7 @@ function initialize_build_visit()
# as such one must use parenthesis (( .. )) and not square brackets.
# i.e. if (( ${VER_MAJOR} < 8 )) ; then

# Square brackets are for contionals only. To make it a
# Square brackets are for conditionals only. To make it a
# conditional one must use "-lt"
# i.e. if [[ ${VER_MAJOR} -lt 8 ]] ; then

Expand All @@ -365,6 +365,14 @@ function initialize_build_visit()
export MACOSX_DEPLOYMENT_TARGET=11.0
elif [[ ${VER_MAJOR} == 21 ]] ; then
export MACOSX_DEPLOYMENT_TARGET=12.0
elif [[ ${VER_MAJOR} == 22 ]] ; then
export MACOSX_DEPLOYMENT_TARGET=13.0
elif [[ ${VER_MAJOR} == 23 ]] ; then
# keep 13 (ventura)
export MACOSX_DEPLOYMENT_TARGET=13.0
elif [[ ${VER_MAJOR} == 24 ]] ; then
# keep 13 (ventura)
export MACOSX_DEPLOYMENT_TARGET=13.0
else
echo "Unsupported Darwin major version, ${VER_MAJOR}."
echo "Maybe add a new case for MACOSX_DEPLOYMENT_TARGET"
Expand Down
8 changes: 5 additions & 3 deletions src/tools/dev/scripts/visit-install-open
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ set -e
test=no

user=`whoami`
result_dir=/usr/workspace/visit/visit/_release_builds/

#
# Set the user e-mail address.
Expand Down Expand Up @@ -327,7 +328,7 @@ if [ "$poodle" = "true" ]
then
if [ "$test" = "no" ]
then
cp /usr/tmp/$user/poodle/visitbuild/visit$ver2.linux-x86_64.tar.gz visit$ver2.linux-x86_64-poodle.tar.gz
cp $result_dir/visit$ver2.linux-x86_64-poodle.tar.gz .
chmod 750 poodle_install;./poodle_install
fi
fi
Expand Down Expand Up @@ -363,7 +364,7 @@ if [ "$lassen" = "true" ]
then
if [ "$test" = "no" ]
then
scp lassen708:/usr/tmp/$user/lassen/visitbuild/visit$ver2.linux-intel.tar.gz visit$ver2.linux-intel-lassen.tar.gz
cp $result_dir/visit$ver2.linux-intel-lassen.tar.gz .
chmod 750 lassen_install;./lassen_install
fi
fi
Expand Down Expand Up @@ -399,7 +400,8 @@ if [ "$rzwhippet" = "true" ]
then
if [ "$test" = "no" ]
then
chmod 750 rzwhippet_install;./rzwhippet_install
cp $result_dir/visit$ver2.linux-x86_64-rzwhippet.tar.gz .
chmod 750 rzwhippet_install;./rzwhippet_install
fi
fi

Expand Down

0 comments on commit 803c72d

Please sign in to comment.