From f4d951c7ddfc635707c14e0fe5a0628fd4aec0ac Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 1 Mar 2024 08:19:49 +0100 Subject: [PATCH] [susy2] rerun 18 tmad tests on itscrd90, all ok STARTED AT Fri Mar 1 03:29:12 AM CET 2024 ENDED AT Fri Mar 1 07:51:32 AM CET 2024 Status=0 24 /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt --- .../log_eemumu_mad_d_inl0_hrd0.txt | 210 +++++++-------- .../log_eemumu_mad_f_inl0_hrd0.txt | 214 +++++++-------- .../log_eemumu_mad_m_inl0_hrd0.txt | 202 +++++++------- .../log_ggtt_mad_d_inl0_hrd0.txt | 222 ++++++++-------- .../log_ggtt_mad_f_inl0_hrd0.txt | 242 ++++++++--------- .../log_ggtt_mad_m_inl0_hrd0.txt | 238 ++++++++--------- .../log_ggttg_mad_d_inl0_hrd0.txt | 216 +++++++-------- .../log_ggttg_mad_f_inl0_hrd0.txt | 242 ++++++++--------- .../log_ggttg_mad_m_inl0_hrd0.txt | 226 ++++++++-------- .../log_ggttgg_mad_d_inl0_hrd0.txt | 240 ++++++++--------- .../log_ggttgg_mad_f_inl0_hrd0.txt | 246 ++++++++--------- .../log_ggttgg_mad_m_inl0_hrd0.txt | 246 ++++++++--------- .../log_ggttggg_mad_d_inl0_hrd0.txt | 242 ++++++++--------- .../log_ggttggg_mad_f_inl0_hrd0.txt | 250 +++++++++--------- .../log_ggttggg_mad_m_inl0_hrd0.txt | 232 ++++++++-------- .../log_gqttq_mad_d_inl0_hrd0.txt | 226 ++++++++-------- .../log_gqttq_mad_f_inl0_hrd0.txt | 238 ++++++++--------- .../log_gqttq_mad_m_inl0_hrd0.txt | 234 ++++++++-------- 18 files changed, 2083 insertions(+), 2083 deletions(-) diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index 5792e7e600..fb2022a061 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -1,4 +1,4 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum CUDACPP_BUILDDIR='.' @@ -10,33 +10,33 @@ make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y make USEBUILDDIR=1 AVX=512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' OMP_NUM_THREADS= -DATE: 2024-02-05_22:17:58 +DATE: 2024-03-01_03:35:28 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 [UNWEIGHT] Wrote 3798 events (found 8192 events) - [COUNTERS] PROGRAM TOTAL : 0.6779s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6698s - [COUNTERS] Fortran MEs ( 1 ) : 0.0081s for 8192 events => throughput is 1.01E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6832s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6748s + [COUNTERS] Fortran MEs ( 1 ) : 0.0084s for 8192 events => throughput is 9.71E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1725s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1641s - [COUNTERS] Fortran MEs ( 1 ) : 0.0084s for 8192 events => throughput is 9.76E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.1761s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1679s + [COUNTERS] Fortran MEs ( 1 ) : 0.0082s for 8192 events => throughput is 9.99E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,8 +109,8 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000766E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3626s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2752s + [COUNTERS] PROGRAM TOTAL : 0.3673s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2800s [COUNTERS] Fortran MEs ( 1 ) : 0.0874s for 90112 events => throughput is 1.03E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** @@ -132,15 +132,15 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09338 [9.3382715404661545E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1765s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1695s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0070s for 8192 events => throughput is 1.17E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1811s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1745s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0066s for 8192 events => throughput is 1.24E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715404661532E-002) differ by less than 3E-14 (0.0) +OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715404661545E-002) differ by less than 3E-14 (2.220446049250313e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -165,29 +165,29 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09152 [9.1515602020000780E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09152 [9.1515602020000753E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3758s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2970s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0788s for 90112 events => throughput is 1.14E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3805s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3053s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0752s for 90112 events => throughput is 1.20E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602020000780E-002) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602020000753E-002) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.119401e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.174335e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.150011e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.235605e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1720s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1679s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0042s for 8192 events => throughput is 1.97E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1874s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1831s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0043s for 8192 events => throughput is 1.89E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000753E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3238s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2796s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0442s for 90112 events => throughput is 2.04E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3298s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2854s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0444s for 90112 events => throughput is 2.03E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -256,14 +256,14 @@ OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602020000753E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.960201e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.003456e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.056109e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.071261e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1691s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1662s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0029s for 8192 events => throughput is 2.78E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1738s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1708s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.75E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -317,29 +317,29 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09152 [9.1515602020000753E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09152 [9.1515602020000739E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3175s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2842s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0333s for 90112 events => throughput is 2.70E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3165s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2833s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0332s for 90112 events => throughput is 2.71E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602020000753E-002) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602020000739E-002) differ by less than 3E-14 (3.3306690738754696e-16) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.667896e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.590204e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.846362e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.724231e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1671s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1644s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0027s for 8192 events => throughput is 2.99E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1747s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1718s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0029s for 8192 events => throughput is 2.78E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -393,29 +393,29 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09152 [9.1515602020000753E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09152 [9.1515602020000739E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3086s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2780s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0306s for 90112 events => throughput is 2.95E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3170s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2851s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0318s for 90112 events => throughput is 2.83E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602020000753E-002) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602020000739E-002) differ by less than 3E-14 (3.3306690738754696e-16) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.811128e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.651963e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.083958e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.888816e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -436,15 +436,15 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715404661462E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1714s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1679s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0035s for 8192 events => throughput is 2.35E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1756s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1721s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0035s for 8192 events => throughput is 2.33E+06 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715404661462E-002) differ by less than 3E-14 (7.771561172376096e-16) +OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715404661532E-002) differ by less than 3E-14 (0.0) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000739E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3216s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2847s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0369s for 90112 events => throughput is 2.44E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3283s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2888s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0395s for 90112 events => throughput is 2.28E+06 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -484,14 +484,14 @@ OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602020000739E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.407049e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.333417e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.476907e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.247580e+06 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.5875s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5870s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.61E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.5894s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5889s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.58E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000753E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.7209s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7159s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0049s for 90112 events => throughput is 1.82E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7068s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7019s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0049s for 90112 events => throughput is 1.84E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -560,43 +560,43 @@ OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602020000753E-002 OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.167668e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.143768e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.968899e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.922192e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.735772e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.720542e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.440392e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.434610e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.710260e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.732238e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.012226e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.027929e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.703491e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.748145e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.128966e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.129848e+08 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index 2b4c81420f..130936da07 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -1,42 +1,42 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y make USEBUILDDIR=1 AVX=512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' OMP_NUM_THREADS= -DATE: 2024-02-05_22:18:15 +DATE: 2024-03-01_03:35:44 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 [UNWEIGHT] Wrote 3798 events (found 8192 events) - [COUNTERS] PROGRAM TOTAL : 0.6630s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6547s - [COUNTERS] Fortran MEs ( 1 ) : 0.0083s for 8192 events => throughput is 9.88E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7004s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6920s + [COUNTERS] Fortran MEs ( 1 ) : 0.0084s for 8192 events => throughput is 9.73E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1706s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1626s - [COUNTERS] Fortran MEs ( 1 ) : 0.0081s for 8192 events => throughput is 1.02E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1752s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1674s + [COUNTERS] Fortran MEs ( 1 ) : 0.0079s for 8192 events => throughput is 1.04E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000766E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3655s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2764s - [COUNTERS] Fortran MEs ( 1 ) : 0.0891s for 90112 events => throughput is 1.01E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3760s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2888s + [COUNTERS] Fortran MEs ( 1 ) : 0.0872s for 90112 events => throughput is 1.03E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -132,15 +132,15 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382700437610044E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09338 [9.3382703205998396E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1794s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1727s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0067s for 8192 events => throughput is 1.22E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1795s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1733s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0062s for 8192 events => throughput is 1.32E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382700437610044E-002) differ by less than 4E-4 (1.6027646465577305e-07) +OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382703205998396E-002) differ by less than 4E-4 (1.306308462512007e-07) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -165,29 +165,29 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09152 [9.1515587669165246E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09152 [9.1515590123565249E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3531s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2813s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0717s for 90112 events => throughput is 1.26E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3578s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2889s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0690s for 90112 events => throughput is 1.31E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515587669165246E-002) differ by less than 4E-4 (1.568129937012941e-07) +OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515590123565249E-002) differ by less than 4E-4 (1.2999352305698153e-07) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.212872e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.296058e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.231007e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.289423e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382700723828302E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1667s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1641s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0026s for 8192 events => throughput is 3.16E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1759s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1734s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0025s for 8192 events => throughput is 3.33E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515587612890761E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3047s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2770s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0277s for 90112 events => throughput is 3.25E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3141s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2860s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0281s for 90112 events => throughput is 3.21E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -256,14 +256,14 @@ OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515587612890761E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.211664e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.247103e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.224682e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.346461e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382700679354239E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1660s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1637s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0023s for 8192 events => throughput is 3.51E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1759s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1735s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0023s for 8192 events => throughput is 3.49E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515587619408464E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3027s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2781s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0246s for 90112 events => throughput is 3.66E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3181s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2923s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0258s for 90112 events => throughput is 3.50E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -332,14 +332,14 @@ OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515587619408464E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.652051e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.473027e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.835303e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.779574e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382700679354239E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1668s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1648s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0020s for 8192 events => throughput is 4.02E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1764s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1741s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0023s for 8192 events => throughput is 3.61E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515587619408464E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3014s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2784s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0231s for 90112 events => throughput is 3.90E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3134s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2887s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0247s for 90112 events => throughput is 3.65E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -408,14 +408,14 @@ OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515587619408464E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.831859e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.393313e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.986193e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.850238e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -436,15 +436,15 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382704356154977E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09338 [9.3382704335459282E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1670s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1647s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0022s for 8192 events => throughput is 3.71E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1750s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1726s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0024s for 8192 events => throughput is 3.46E+06 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382704356154977E-002) differ by less than 4E-4 (1.1831425661412709e-07) +OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382704335459282E-002) differ by less than 4E-4 (1.1853587900123586e-07) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -469,29 +469,29 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09152 [9.1515591292297929E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09152 [9.1515591296252558E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3074s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2840s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0235s for 90112 events => throughput is 3.84E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3156s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2901s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0255s for 90112 events => throughput is 3.53E+06 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515591292297929E-002) differ by less than 4E-4 (1.172226659074127e-07) +OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515591296252558E-002) differ by less than 4E-4 (1.1717945325173673e-07) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.842306e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.340689e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.784479e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.795181e+06 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382706077425631E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.5797s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5793s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.71E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.5865s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5861s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.74E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515592892887687E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.6986s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6939s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0047s for 90112 events => throughput is 1.93E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7069s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7024s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0045s for 90112 events => throughput is 1.98E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -560,43 +560,43 @@ OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515592892887687E-002 OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.684998e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.528794e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.020092e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.178202e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.026150e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.848804e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.045722e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.051133e+09 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.017663e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.014035e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.235265e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.222690e+09 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.404519e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.412951e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.457162e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.409232e+08 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt index 41ed20187f..da7367ae5e 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt @@ -1,42 +1,42 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none - +make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 + make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y make USEBUILDDIR=1 AVX=512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' OMP_NUM_THREADS= -DATE: 2024-02-05_22:18:32 +DATE: 2024-03-01_03:36:01 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 [UNWEIGHT] Wrote 3798 events (found 8192 events) - [COUNTERS] PROGRAM TOTAL : 0.6567s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6485s - [COUNTERS] Fortran MEs ( 1 ) : 0.0081s for 8192 events => throughput is 1.01E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7189s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7106s + [COUNTERS] Fortran MEs ( 1 ) : 0.0083s for 8192 events => throughput is 9.88E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1705s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1622s - [COUNTERS] Fortran MEs ( 1 ) : 0.0083s for 8192 events => throughput is 9.85E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.1780s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1693s + [COUNTERS] Fortran MEs ( 1 ) : 0.0086s for 8192 events => throughput is 9.48E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602020000766E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3615s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2738s - [COUNTERS] Fortran MEs ( 1 ) : 0.0876s for 90112 events => throughput is 1.03E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3702s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2823s + [COUNTERS] Fortran MEs ( 1 ) : 0.0879s for 90112 events => throughput is 1.03E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -132,15 +132,15 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715420701354E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09338 [9.3382715420701395E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1758s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1688s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0069s for 8192 events => throughput is 1.18E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1808s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1742s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0066s for 8192 events => throughput is 1.24E+06 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715420701354E-002) differ by less than 2E-4 (1.7176438049659737e-10) +OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715420701395E-002) differ by less than 2E-4 (1.7176482458580722e-10) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -167,9 +167,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602033080859E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3634s + [COUNTERS] PROGRAM TOTAL : 0.3592s [COUNTERS] Fortran Overhead ( 0 ) : 0.2860s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0774s for 90112 events => throughput is 1.16E+06 events/s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0732s for 90112 events => throughput is 1.23E+06 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -180,14 +180,14 @@ OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602033080859E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.142475e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.182030e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.160468e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.222787e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715420701354E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1706s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1667s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0038s for 8192 events => throughput is 2.13E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1746s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1707s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0040s for 8192 events => throughput is 2.07E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -243,9 +243,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602033080859E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3261s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2820s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0441s for 90112 events => throughput is 2.04E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3256s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2834s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0422s for 90112 events => throughput is 2.14E+06 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -256,14 +256,14 @@ OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602033080859E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.009384e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.086150e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.109517e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.131619e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715383664494E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1690s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1660s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.70E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1726s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1696s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.76E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602022697845E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3099s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2764s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0335s for 90112 events => throughput is 2.69E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3229s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2892s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0338s for 90112 events => throughput is 2.67E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -332,14 +332,14 @@ OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602022697845E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.614910e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.541763e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.770588e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.454900e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715383664494E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1663s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1635s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.93E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1830s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1799s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.61E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602022697845E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3312s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2971s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0342s for 90112 events => throughput is 2.64E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3167s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2848s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0319s for 90112 events => throughput is 2.83E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -408,14 +408,14 @@ OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602022697845E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.670770e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.677035e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.913155e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.872617e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715383664494E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1780s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1746s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0034s for 8192 events => throughput is 2.40E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1756s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1722s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0035s for 8192 events => throughput is 2.36E+06 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602022697845E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3388s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2997s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0390s for 90112 events => throughput is 2.31E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3270s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2890s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0380s for 90112 events => throughput is 2.37E+06 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -484,14 +484,14 @@ OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602022697845E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.342746e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.248118e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.449709e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.400436e+06 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09338 [9.3382715392009194E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.5833s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5828s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.63E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.5902s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5897s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.66E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09152 [9.1515602021089631E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.6969s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6920s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0049s for 90112 events => throughput is 1.84E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7064s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7016s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0048s for 90112 events => throughput is 1.87E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -560,43 +560,43 @@ OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602021089631E-002 OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.201225e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.153365e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.975595e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.922960e+08 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.723786e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.732117e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.507428e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.451486e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.698364e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.736678e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.060413e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.069247e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.736917e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.733211e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.170942e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.156375e+08 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index 204bcc3c17..657075d34f 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -1,42 +1,42 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=sse4 +make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y make USEBUILDDIR=1 AVX=512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-02-05_22:18:49 +DATE: 2024-03-01_03:36:18 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 2601 events (found 5405 events) - [COUNTERS] PROGRAM TOTAL : 0.7649s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7239s - [COUNTERS] Fortran MEs ( 1 ) : 0.0410s for 8192 events => throughput is 2.00E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8052s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7640s + [COUNTERS] Fortran MEs ( 1 ) : 0.0412s for 8192 events => throughput is 1.99E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,8 +84,8 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3910s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3503s + [COUNTERS] PROGRAM TOTAL : 0.3849s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3442s [COUNTERS] Fortran MEs ( 1 ) : 0.0408s for 8192 events => throughput is 2.01E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** @@ -107,11 +107,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105695279989099] fbridge_mode=0 + [XSECTION] Cross section = 47.11 [47.105695279989114] fbridge_mode=0 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.7562s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3060s - [COUNTERS] Fortran MEs ( 1 ) : 0.4502s for 90112 events => throughput is 2.00E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6297s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1833s + [COUNTERS] Fortran MEs ( 1 ) : 0.4464s for 90112 events => throughput is 2.02E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756647] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4206s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3840s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0366s for 8192 events => throughput is 2.24E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4282s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3910s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0373s for 8192 events => throughput is 2.20E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,27 +167,27 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989121] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.7084s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3082s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4002s for 90112 events => throughput is 2.25E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6901s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2834s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4066s for 90112 events => throughput is 2.22E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989099) and cpp (47.105695279989121) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (47.105695279989114) and cpp (47.105695279989121) differ by less than 3E-14 (2.220446049250313e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.211296e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.207121e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.248534e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.224007e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -208,15 +208,15 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094184803756647] fbridge_mode=1 + [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3938s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3718s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0221s for 8192 events => throughput is 3.71E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3927s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3711s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0217s for 8192 events => throughput is 3.78E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756640) and cpp (47.094184803756647) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (47.094184803756640) and cpp (47.094184803756640) differ by less than 3E-14 (0.0) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -243,27 +243,27 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989106] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.5214s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2881s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2333s for 90112 events => throughput is 3.86E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4997s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2628s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2369s for 90112 events => throughput is 3.80E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989099) and cpp (47.105695279989106) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (47.105695279989114) and cpp (47.105695279989106) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.878640e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.699229e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.806741e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.772412e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,15 +284,15 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=1 + [XSECTION] Cross section = 47.09 [47.094184803756626] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3752s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3621s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0131s for 8192 events => throughput is 6.24E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3811s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3679s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0132s for 8192 events => throughput is 6.20E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756640) and cpp (47.094184803756640) differ by less than 3E-14 (0.0) +OK! xsec from fortran (47.094184803756640) and cpp (47.094184803756626) differ by less than 3E-14 (3.3306690738754696e-16) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,27 +319,27 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989121] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.4310s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2853s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1457s for 90112 events => throughput is 6.18E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4003s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2548s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1455s for 90112 events => throughput is 6.19E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989099) and cpp (47.105695279989121) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (47.105695279989114) and cpp (47.105695279989121) differ by less than 3E-14 (2.220446049250313e-16) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.965700e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.020313e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.009550e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.141769e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -360,15 +360,15 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=1 + [XSECTION] Cross section = 47.09 [47.094184803756626] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3743s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3628s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0115s for 8192 events => throughput is 7.13E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3737s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3616s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0121s for 8192 events => throughput is 6.75E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756640) and cpp (47.094184803756640) differ by less than 3E-14 (0.0) +OK! xsec from fortran (47.094184803756640) and cpp (47.094184803756626) differ by less than 3E-14 (3.3306690738754696e-16) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -395,27 +395,27 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989121] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.4335s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3051s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1284s for 90112 events => throughput is 7.02E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3804s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2520s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1283s for 90112 events => throughput is 7.02E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989099) and cpp (47.105695279989121) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (47.105695279989114) and cpp (47.105695279989121) differ by less than 3E-14 (2.220446049250313e-16) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.031412e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.898875e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.139328e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.924828e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -436,15 +436,15 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=1 + [XSECTION] Cross section = 47.09 [47.094184803756626] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3843s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3667s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0176s for 8192 events => throughput is 4.67E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3919s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3726s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0193s for 8192 events => throughput is 4.24E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756640) and cpp (47.094184803756640) differ by less than 3E-14 (0.0) +OK! xsec from fortran (47.094184803756640) and cpp (47.094184803756626) differ by less than 3E-14 (3.3306690738754696e-16) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -471,27 +471,27 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989121] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.4856s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2935s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1921s for 90112 events => throughput is 4.69E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5267s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3034s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2233s for 90112 events => throughput is 4.04E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989099) and cpp (47.105695279989121) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (47.105695279989114) and cpp (47.105695279989121) differ by less than 3E-14 (2.220446049250313e-16) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.481319e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.791161e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.550589e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.782832e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,8 +514,8 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.7810s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7804s + [COUNTERS] PROGRAM TOTAL : 0.7828s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7823s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.45E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,56 +547,56 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279989121] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.7200s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7136s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0064s for 90112 events => throughput is 1.41E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.6782s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6718s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0064s for 90112 events => throughput is 1.42E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989099) and cpp (47.105695279989121) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (47.105695279989114) and cpp (47.105695279989121) differ by less than 3E-14 (2.220446049250313e-16) *** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.064090e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.045663e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.746111e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.714246e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.003251e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.010596e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.076004e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.071675e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.006674e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.000853e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.154292e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.152555e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.001053e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.001515e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.043495e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.100234e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index 5482dc0552..eb011c6697 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -1,42 +1,42 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=sse4 + +make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y make USEBUILDDIR=1 AVX=512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-02-05_22:19:16 +DATE: 2024-03-01_03:36:45 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 2601 events (found 5405 events) - [COUNTERS] PROGRAM TOTAL : 0.7588s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7181s - [COUNTERS] Fortran MEs ( 1 ) : 0.0407s for 8192 events => throughput is 2.01E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7779s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7373s + [COUNTERS] Fortran MEs ( 1 ) : 0.0406s for 8192 events => throughput is 2.02E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3951s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3530s - [COUNTERS] Fortran MEs ( 1 ) : 0.0421s for 8192 events => throughput is 1.95E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3888s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3479s + [COUNTERS] Fortran MEs ( 1 ) : 0.0409s for 8192 events => throughput is 2.00E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -107,11 +107,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105695279989099] fbridge_mode=0 + [XSECTION] Cross section = 47.11 [47.105695279989114] fbridge_mode=0 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.7598s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3088s - [COUNTERS] Fortran MEs ( 1 ) : 0.4511s for 90112 events => throughput is 2.00E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6449s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1962s + [COUNTERS] Fortran MEs ( 1 ) : 0.4487s for 90112 events => throughput is 2.01E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -132,15 +132,15 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094177233089695] fbridge_mode=1 + [XSECTION] Cross section = 47.09 [47.094179780921394] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4164s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3823s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0341s for 8192 events => throughput is 2.40E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4205s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3860s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0345s for 8192 events => throughput is 2.37E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756640) and cpp (47.094177233089695) differ by less than 4E-4 (1.6075587627728538e-07) +OK! xsec from fortran (47.094184803756640) and cpp (47.094179780921394) differ by less than 4E-4 (1.0665510541407741e-07) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -165,29 +165,29 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105686104543288] fbridge_mode=1 + [XSECTION] Cross section = 47.11 [47.105688579298537] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 2.2170s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8286s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3884s for 90112 events => throughput is 2.32E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6592s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2787s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3805s for 90112 events => throughput is 2.37E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989099) and cpp (47.105686104543288) differ by less than 4E-4 (1.9478421364738097e-07) +OK! xsec from fortran (47.105695279989114) and cpp (47.105688579298537) differ by less than 4E-4 (1.4224799227413598e-07) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.309794e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.351307e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.307910e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.338637e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -208,15 +208,15 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094173275857273] fbridge_mode=1 + [XSECTION] Cross section = 47.09 [47.094175850060040] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3765s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3624s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0141s for 8192 events => throughput is 5.80E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3859s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3711s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0148s for 8192 events => throughput is 5.54E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756640) and cpp (47.094173275857273) differ by less than 4E-4 (2.447839242414318e-07) +OK! xsec from fortran (47.094184803756640) and cpp (47.094175850060040) differ by less than 4E-4 (1.9012318908107062e-07) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -241,29 +241,29 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105682058834830] fbridge_mode=1 + [XSECTION] Cross section = 47.11 [47.105684763984058] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.4327s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2782s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1545s for 90112 events => throughput is 5.83E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4203s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2581s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1622s for 90112 events => throughput is 5.56E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989099) and cpp (47.105682058834830) differ by less than 4E-4 (2.8066997403985994e-07) +OK! xsec from fortran (47.105695279989114) and cpp (47.105684763984058) differ by less than 4E-4 (2.2324275217311396e-07) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.643949e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.210465e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.734827e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.317035e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,15 +284,15 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094171343713690] fbridge_mode=1 + [XSECTION] Cross section = 47.09 [47.094173652938650] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3633s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3557s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0076s for 8192 events => throughput is 1.07E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3679s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3602s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0078s for 8192 events => throughput is 1.05E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756640) and cpp (47.094171343713690) differ by less than 4E-4 (2.8581114641657024e-07) +OK! xsec from fortran (47.094184803756640) and cpp (47.094173652938650) differ by less than 4E-4 (2.3677696170398832e-07) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -317,29 +317,29 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105681519092386] fbridge_mode=1 + [XSECTION] Cross section = 47.11 [47.105684048677361] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.3622s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2772s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0850s for 90112 events => throughput is 1.06E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3368s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2515s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0853s for 90112 events => throughput is 1.06E+06 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989099) and cpp (47.105681519092386) differ by less than 4E-4 (2.9212808838607884e-07) +OK! xsec from fortran (47.105695279989114) and cpp (47.105684048677361) differ by less than 4E-4 (2.384278946498952e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.045248e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.038889e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.049590e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.040818e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -360,15 +360,15 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094171343713690] fbridge_mode=1 + [XSECTION] Cross section = 47.09 [47.094173652938650] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3614s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3544s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0070s for 8192 events => throughput is 1.17E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3685s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3613s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0073s for 8192 events => throughput is 1.13E+06 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756640) and cpp (47.094171343713690) differ by less than 4E-4 (2.8581114641657024e-07) +OK! xsec from fortran (47.094184803756640) and cpp (47.094173652938650) differ by less than 4E-4 (2.3677696170398832e-07) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -393,29 +393,29 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105681519092386] fbridge_mode=1 + [XSECTION] Cross section = 47.11 [47.105684048677361] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.3569s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2795s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0774s for 90112 events => throughput is 1.16E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.3377s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2584s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0793s for 90112 events => throughput is 1.14E+06 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989099) and cpp (47.105681519092386) differ by less than 4E-4 (2.9212808838607884e-07) +OK! xsec from fortran (47.105695279989114) and cpp (47.105684048677361) differ by less than 4E-4 (2.384278946498952e-07) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.147039e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.104729e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.161132e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.124265e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -436,15 +436,15 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094178385820562] fbridge_mode=1 + [XSECTION] Cross section = 47.09 [47.094178213275804] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3686s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3588s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0098s for 8192 events => throughput is 8.36E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3726s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3621s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0105s for 8192 events => throughput is 7.77E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756640) and cpp (47.094178385820562) differ by less than 4E-4 (1.3627873807209312e-07) +OK! xsec from fortran (47.094184803756640) and cpp (47.094178213275804) differ by less than 4E-4 (1.3994256109484127e-07) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -469,29 +469,29 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105688391077187] fbridge_mode=1 + [XSECTION] Cross section = 47.11 [47.105688407939567] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.3858s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2782s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1076s for 90112 events => throughput is 8.38E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3760s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2629s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1130s for 90112 events => throughput is 7.97E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989099) and cpp (47.105688391077187) differ by less than 4E-4 (1.46243715803962e-07) +OK! xsec from fortran (47.105695279989114) and cpp (47.105688407939567) differ by less than 4E-4 (1.4588574703822133e-07) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.950723e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.591310e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.149487e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.407728e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184344050284] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.7720s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7714s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.51E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7795s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7789s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.48E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,56 +547,56 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105694586476879] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.6987s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6934s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 90112 events => throughput is 1.68E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.7005s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6948s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0057s for 90112 events => throughput is 1.57E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989099) and cpp (47.105694586476879) differ by less than 4E-4 (1.4722470687011935e-08) +OK! xsec from fortran (47.105695279989114) and cpp (47.105694586476879) differ by less than 4E-4 (1.4722471020078842e-08) *** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.301322e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.201563e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.982927e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.986974e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.833883e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.810580e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.787018e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.774762e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.833466e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.802177e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.876783e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.847890e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.365328e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.368745e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.354802e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.422351e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt index 45fc68aa32..bef66309f6 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt @@ -1,42 +1,42 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx CUDACPP_BUILDDIR='.' - - make USEBUILDDIR=1 AVX=none + make USEBUILDDIR=1 AVX=sse4 + make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y make USEBUILDDIR=1 AVX=512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-02-05_22:19:42 +DATE: 2024-03-01_03:37:12 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 2601 events (found 5405 events) - [COUNTERS] PROGRAM TOTAL : 0.7623s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7212s - [COUNTERS] Fortran MEs ( 1 ) : 0.0411s for 8192 events => throughput is 1.99E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7917s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7505s + [COUNTERS] Fortran MEs ( 1 ) : 0.0413s for 8192 events => throughput is 1.99E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3907s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3500s - [COUNTERS] Fortran MEs ( 1 ) : 0.0406s for 8192 events => throughput is 2.02E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3956s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3546s + [COUNTERS] Fortran MEs ( 1 ) : 0.0410s for 8192 events => throughput is 2.00E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -107,11 +107,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105695279989099] fbridge_mode=0 + [XSECTION] Cross section = 47.11 [47.105695279989114] fbridge_mode=0 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.7512s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3026s - [COUNTERS] Fortran MEs ( 1 ) : 0.4486s for 90112 events => throughput is 2.01E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6496s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1993s + [COUNTERS] Fortran MEs ( 1 ) : 0.4503s for 90112 events => throughput is 2.00E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -132,15 +132,15 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094186141863887] fbridge_mode=1 + [XSECTION] Cross section = 47.09 [47.094186141863901] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4230s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3861s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0369s for 8192 events => throughput is 2.22E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4256s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3880s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0377s for 8192 events => throughput is 2.18E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756640) and cpp (47.094186141863887) differ by less than 2E-4 (2.841342827686333e-08) +OK! xsec from fortran (47.094184803756640) and cpp (47.094186141863901) differ by less than 2E-4 (2.8413428720952538e-08) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -167,27 +167,27 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105696630006634] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.7095s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3034s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4061s for 90112 events => throughput is 2.22E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6990s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2867s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4123s for 90112 events => throughput is 2.19E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989099) and cpp (47.105696630006634) differ by less than 2E-4 (2.8659327133695456e-08) +OK! xsec from fortran (47.105695279989114) and cpp (47.105696630006634) differ by less than 2E-4 (2.865932691165085e-08) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.202321e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.185122e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.194899e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.177902e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -208,15 +208,15 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094186141863887] fbridge_mode=1 + [XSECTION] Cross section = 47.09 [47.094186141863901] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3881s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3677s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0204s for 8192 events => throughput is 4.02E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3989s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3779s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0210s for 8192 events => throughput is 3.91E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756640) and cpp (47.094186141863887) differ by less than 2E-4 (2.841342827686333e-08) +OK! xsec from fortran (47.094184803756640) and cpp (47.094186141863901) differ by less than 2E-4 (2.8413428720952538e-08) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -243,27 +243,27 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105696630006626] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.5147s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2899s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2248s for 90112 events => throughput is 4.01E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.5038s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2721s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2317s for 90112 events => throughput is 3.89E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989099) and cpp (47.105696630006626) differ by less than 2E-4 (2.8659327133695456e-08) +OK! xsec from fortran (47.105695279989114) and cpp (47.105696630006626) differ by less than 2E-4 (2.8659326689606246e-08) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.870020e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.744718e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.915241e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.796645e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,15 +284,15 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094186193208813] fbridge_mode=1 + [XSECTION] Cross section = 47.09 [47.094186169585456] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3767s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3634s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0132s for 8192 events => throughput is 6.19E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3789s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3661s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0128s for 8192 events => throughput is 6.40E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756640) and cpp (47.094186193208813) differ by less than 2E-4 (2.950368882537191e-08) +OK! xsec from fortran (47.094184803756640) and cpp (47.094186169585456) differ by less than 2E-4 (2.9002069412698006e-08) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -317,29 +317,29 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105696667630845] fbridge_mode=1 + [XSECTION] Cross section = 47.11 [47.105696663215774] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.4274s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2825s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1450s for 90112 events => throughput is 6.22E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4021s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2588s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1433s for 90112 events => throughput is 6.29E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989099) and cpp (47.105696667630845) differ by less than 2E-4 (2.945804622456194e-08) +OK! xsec from fortran (47.105695279989114) and cpp (47.105696663215774) differ by less than 2E-4 (2.9364318976377035e-08) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.250062e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.012402e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.098829e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.056070e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -360,15 +360,15 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094186193208813] fbridge_mode=1 + [XSECTION] Cross section = 47.09 [47.094186169585456] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.5095s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4973s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0121s for 8192 events => throughput is 6.76E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3770s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3652s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0118s for 8192 events => throughput is 6.94E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756640) and cpp (47.094186193208813) differ by less than 2E-4 (2.950368882537191e-08) +OK! xsec from fortran (47.094184803756640) and cpp (47.094186169585456) differ by less than 2E-4 (2.9002069412698006e-08) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -393,29 +393,29 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105696667630845] fbridge_mode=1 + [XSECTION] Cross section = 47.11 [47.105696663215774] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.4106s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2855s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1252s for 90112 events => throughput is 7.20E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3844s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2567s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1278s for 90112 events => throughput is 7.05E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989099) and cpp (47.105696667630845) differ by less than 2E-4 (2.945804622456194e-08) +OK! xsec from fortran (47.105695279989114) and cpp (47.105696663215774) differ by less than 2E-4 (2.9364318976377035e-08) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.075524e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.957699e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.434399e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.976096e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -436,15 +436,15 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094186193208813] fbridge_mode=1 + [XSECTION] Cross section = 47.09 [47.094186169585456] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3845s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3677s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0168s for 8192 events => throughput is 4.88E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3910s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3720s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0190s for 8192 events => throughput is 4.32E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756640) and cpp (47.094186193208813) differ by less than 2E-4 (2.950368882537191e-08) +OK! xsec from fortran (47.094184803756640) and cpp (47.094186169585456) differ by less than 2E-4 (2.9002069412698006e-08) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -469,29 +469,29 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105696667630845] fbridge_mode=1 + [XSECTION] Cross section = 47.11 [47.105696663215774] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.4742s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2875s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1868s for 90112 events => throughput is 4.82E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.4767s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2708s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2059s for 90112 events => throughput is 4.38E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989099) and cpp (47.105696667630845) differ by less than 2E-4 (2.945804622456194e-08) +OK! xsec from fortran (47.105695279989114) and cpp (47.105696663215774) differ by less than 2E-4 (2.9364318976377035e-08) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.593121e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.223304e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.524976e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.269412e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.09 [47.094184798437830] fbridge_mode=1 [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.7749s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7743s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.41E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7843s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7837s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.37E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,56 +547,56 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.11 [47.105695279068492] fbridge_mode=1 [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.7352s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7287s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0065s for 90112 events => throughput is 1.38E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.6837s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6773s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0064s for 90112 events => throughput is 1.40E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989099) and cpp (47.105695279068492) differ by less than 2E-4 (1.9543477947081556e-11) +OK! xsec from fortran (47.105695279989114) and cpp (47.105695279068492) differ by less than 2E-4 (1.954369999168648e-11) *** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.068039e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.090244e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.702432e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.672934e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.012365e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.997070e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.064674e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.055834e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.996044e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.991192e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.147754e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.134835e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.022142e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.012024e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.000250e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.999333e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index d12de30a22..cd3823dd45 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -1,42 +1,42 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=sse4 -make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y make USEBUILDDIR=1 AVX=512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' OMP_NUM_THREADS= -DATE: 2024-02-05_22:20:10 +DATE: 2024-03-01_03:37:39 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 [UNWEIGHT] Wrote 365 events (found 1496 events) - [COUNTERS] PROGRAM TOTAL : 0.6745s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3479s - [COUNTERS] Fortran MEs ( 1 ) : 0.3266s for 8192 events => throughput is 2.51E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6990s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3830s + [COUNTERS] Fortran MEs ( 1 ) : 0.3160s for 8192 events => throughput is 2.59E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.6326s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3084s - [COUNTERS] Fortran MEs ( 1 ) : 0.3242s for 8192 events => throughput is 2.53E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6286s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3119s + [COUNTERS] Fortran MEs ( 1 ) : 0.3167s for 8192 events => throughput is 2.59E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 5.0816s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5148s - [COUNTERS] Fortran MEs ( 1 ) : 3.5669s for 90112 events => throughput is 2.53E+04 events/s + [COUNTERS] PROGRAM TOTAL : 4.9846s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4835s + [COUNTERS] Fortran MEs ( 1 ) : 3.5010s for 90112 events => throughput is 2.57E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -132,15 +132,15 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=1 + [XSECTION] Cross section = 0.1011 [0.10112748607749110] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.9448s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6238s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3210s for 8192 events => throughput is 2.55E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.0129s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6673s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3457s for 8192 events => throughput is 2.37E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748607749111) differ by less than 3E-14 (0.0) +OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748607749110) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -165,29 +165,29 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238481932717694E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07924 [7.9238481932717666E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 5.3214s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8115s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.5099s for 90112 events => throughput is 2.57E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.3959s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8159s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.5801s for 90112 events => throughput is 2.52E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238481932717694E-002) differ by less than 3E-14 (3.3306690738754696e-16) +OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238481932717666E-002) differ by less than 3E-14 (6.661338147750939e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.636181e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.608629e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.640749e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.585600e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607748863] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.6387s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4712s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1675s for 8192 events => throughput is 4.89E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6456s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4771s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1686s for 8192 events => throughput is 4.86E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -241,29 +241,29 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238481932717680E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07924 [7.9238481932717666E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 3.5511s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7126s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.8385s for 90112 events => throughput is 4.90E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.4996s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6435s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.8561s for 90112 events => throughput is 4.85E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238481932717680E-002) differ by less than 3E-14 (5.551115123125783e-16) +OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238481932717666E-002) differ by less than 3E-14 (6.661338147750939e-16) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.072602e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.966202e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.077354e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.937901e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749110] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.4731s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3897s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0833s for 8192 events => throughput is 9.83E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4803s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3957s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0846s for 8192 events => throughput is 9.68E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,9 +319,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.5092s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5875s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9217s for 90112 events => throughput is 9.78E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.5052s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5758s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9294s for 90112 events => throughput is 9.70E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -332,14 +332,14 @@ OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238481932717722E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.006722e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.913999e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.926199e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.849874e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749110] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.4548s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3817s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0731s for 8192 events => throughput is 1.12E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4790s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3971s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0818s for 8192 events => throughput is 1.00E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,9 +395,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.3873s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5789s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8085s for 90112 events => throughput is 1.11E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.3870s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5613s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8257s for 90112 events => throughput is 1.09E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -408,14 +408,14 @@ OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238481932717722E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.125594e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.099230e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.143588e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.125635e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749110] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.5051s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4064s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0987s for 8192 events => throughput is 8.30E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5283s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4199s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1084s for 8192 events => throughput is 7.55E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,9 +471,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.7499s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6373s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1127s for 90112 events => throughput is 8.10E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.7535s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5980s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1555s for 90112 events => throughput is 7.80E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -484,14 +484,14 @@ OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238481932717722E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.367410e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.774058e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.383766e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.841638e+04 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -512,15 +512,15 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112748607749110] fbridge_mode=1 + [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.7445s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7391s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 8192 events => throughput is 1.52E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7462s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7408s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 8192 events => throughput is 1.51E+06 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748607749110) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748607749111) differ by less than 3E-14 (0.0) *** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -545,58 +545,58 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07924 [7.9238481932717736E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 1.9506s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9280s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0226s for 90112 events => throughput is 3.99E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.9272s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9044s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0228s for 90112 events => throughput is 3.95E+06 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238481932717722E-002) differ by less than 3E-14 (0.0) +OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238481932717736E-002) differ by less than 3E-14 (2.220446049250313e-16) *** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.632699e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.632538e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.198067e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.097542e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.687085e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.673182e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.245374e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.241730e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.652855e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.666883e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.254650e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.250394e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.685444e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.680746e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.763638e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.758368e+06 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index 6acca50600..b22193f403 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -1,42 +1,42 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg CUDACPP_BUILDDIR='.' make USEBUILDDIR=1 AVX=none - make USEBUILDDIR=1 AVX=sse4 + make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y make USEBUILDDIR=1 AVX=512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' OMP_NUM_THREADS= -DATE: 2024-02-05_22:20:53 +DATE: 2024-03-01_03:38:22 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 [UNWEIGHT] Wrote 365 events (found 1496 events) - [COUNTERS] PROGRAM TOTAL : 0.6668s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3423s - [COUNTERS] Fortran MEs ( 1 ) : 0.3245s for 8192 events => throughput is 2.52E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6628s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3454s + [COUNTERS] Fortran MEs ( 1 ) : 0.3174s for 8192 events => throughput is 2.58E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.6365s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3088s - [COUNTERS] Fortran MEs ( 1 ) : 0.3277s for 8192 events => throughput is 2.50E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6295s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3130s + [COUNTERS] Fortran MEs ( 1 ) : 0.3165s for 8192 events => throughput is 2.59E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 5.1044s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5176s - [COUNTERS] Fortran MEs ( 1 ) : 3.5868s for 90112 events => throughput is 2.51E+04 events/s + [COUNTERS] PROGRAM TOTAL : 4.9722s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4831s + [COUNTERS] Fortran MEs ( 1 ) : 3.4891s for 90112 events => throughput is 2.58E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -132,15 +132,15 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112722327776243] fbridge_mode=1 + [XSECTION] Cross section = 0.1011 [0.10112722621426752] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.9147s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6082s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3065s for 8192 events => throughput is 2.67E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.9336s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6187s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3149s for 8192 events => throughput is 2.60E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112748607749111) and cpp (0.10112722327776243) differ by less than 4E-4 (2.5986973362090993e-06) +OK! xsec from fortran (0.10112748607749111) and cpp (0.10112722621426752) differ by less than 4E-4 (2.569659680817793e-06) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -165,29 +165,29 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238466406484034E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07924 [7.9238468310179624E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 5.1640s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8215s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.3425s for 90112 events => throughput is 2.70E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.3385s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8107s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.5278s for 90112 events => throughput is 2.55E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238466406484034E-002) differ by less than 4E-4 (1.9594309874637617e-07) +OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238468310179624E-002) differ by less than 4E-4 (1.719182115555995e-07) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.825867e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.649087e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.822029e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.678753e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -208,15 +208,15 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112720218188545] fbridge_mode=1 + [XSECTION] Cross section = 0.1011 [0.10112720710186394] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.4892s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3982s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0910s for 8192 events => throughput is 9.00E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4987s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4044s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0944s for 8192 events => throughput is 8.68E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112748607749111) and cpp (0.10112720218188545) differ by less than 4E-4 (2.8073040938547678e-06) +OK! xsec from fortran (0.10112748607749111) and cpp (0.10112720710186394) differ by less than 4E-4 (2.758652844936371e-06) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -241,29 +241,29 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238450523404405E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07924 [7.9238454786658835E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.5887s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5903s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9985s for 90112 events => throughput is 9.02E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.5977s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5622s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0354s for 90112 events => throughput is 8.70E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238450523404405E-002) differ by less than 4E-4 (3.9638963988952725e-07) +OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238454786658835E-002) differ by less than 4E-4 (3.4258681169685445e-07) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.157934e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.791493e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.113656e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.818254e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,15 +284,15 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112721286411488] fbridge_mode=1 + [XSECTION] Cross section = 0.1011 [0.10112721766950902] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.3952s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3523s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0429s for 8192 events => throughput is 1.91E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4003s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3576s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0427s for 8192 events => throughput is 1.92E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112748607749111) and cpp (0.10112721286411488) differ by less than 4E-4 (2.701672777827291e-06) +OK! xsec from fortran (0.10112748607749111) and cpp (0.10112721766950902) differ by less than 4E-4 (2.654154597325764e-06) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -317,29 +317,29 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238449434208005E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07924 [7.9238453735016964E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.0166s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5446s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4720s for 90112 events => throughput is 1.91E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9911s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5156s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4755s for 90112 events => throughput is 1.90E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238449434208005E-002) differ by less than 4E-4 (4.101354408314606e-07) +OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238453735016964E-002) differ by less than 4E-4 (3.5585866953180556e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.923701e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.915431e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.922908e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.928091e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -360,15 +360,15 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112721286411488] fbridge_mode=1 + [XSECTION] Cross section = 0.1011 [0.10112721766950902] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.3835s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3461s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0374s for 8192 events => throughput is 2.19E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3910s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3509s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0400s for 8192 events => throughput is 2.05E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112748607749111) and cpp (0.10112721286411488) differ by less than 4E-4 (2.701672777827291e-06) +OK! xsec from fortran (0.10112748607749111) and cpp (0.10112721766950902) differ by less than 4E-4 (2.654154597325764e-06) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -393,29 +393,29 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238449434208005E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07924 [7.9238453735016964E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 1.9524s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5375s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4149s for 90112 events => throughput is 2.17E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9540s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5218s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4322s for 90112 events => throughput is 2.09E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238449434208005E-002) differ by less than 4E-4 (4.101354408314606e-07) +OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238453735016964E-002) differ by less than 4E-4 (3.5585866953180556e-07) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.197542e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.113903e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.185435e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.128293e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -436,15 +436,15 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112723411062496] fbridge_mode=1 + [XSECTION] Cross section = 0.1011 [0.10112723387847480] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.4079s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3585s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0494s for 8192 events => throughput is 1.66E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4175s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3658s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0516s for 8192 events => throughput is 1.59E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112748607749111) and cpp (0.10112723411062496) differ by less than 4E-4 (2.491576483576452e-06) +OK! xsec from fortran (0.10112748607749111) and cpp (0.10112723387847480) differ by less than 4E-4 (2.4938721023826105e-06) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -469,29 +469,29 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238464401552092E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07924 [7.9238464410949921E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.0986s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5551s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5435s for 90112 events => throughput is 1.66E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.0938s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5331s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5607s for 90112 events => throughput is 1.61E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238464401552092E-002) differ by less than 4E-4 (2.2124560195013743e-07) +OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238464410949921E-002) differ by less than 4E-4 (2.211270000440635e-07) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.635897e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.580486e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.640737e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.544942e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -512,15 +512,15 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112726034625695] fbridge_mode=1 + [XSECTION] Cross section = 0.1011 [0.10112726034625694] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.7361s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7353s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0008s for 8192 events => throughput is 9.77E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7476s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7467s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0008s for 8192 events => throughput is 9.72E+06 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112748607749111) and cpp (0.10112726034625695) differ by less than 4E-4 (2.2321452151086163e-06) +OK! xsec from fortran (0.10112748607749111) and cpp (0.10112726034625694) differ by less than 4E-4 (2.2321452152196386e-06) *** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -547,9 +547,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238473828077680E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 1.9356s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9261s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0095s for 90112 events => throughput is 9.50E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.9141s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9047s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0094s for 90112 events => throughput is 9.57E+06 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -560,43 +560,43 @@ OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238473828077680E-002 OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.324054e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.317603e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.861547e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.855249e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.639674e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.653705e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.427401e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.471958e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.660976e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.666794e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.534637e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.507869e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.507023e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.515295e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.629083e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.625829e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt index efef9126ba..994bc4f8f2 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt @@ -1,42 +1,42 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 + make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y make USEBUILDDIR=1 AVX=512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' OMP_NUM_THREADS= -DATE: 2024-02-05_22:21:32 +DATE: 2024-03-01_03:39:00 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 [UNWEIGHT] Wrote 365 events (found 1496 events) - [COUNTERS] PROGRAM TOTAL : 0.6659s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3410s - [COUNTERS] Fortran MEs ( 1 ) : 0.3248s for 8192 events => throughput is 2.52E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6750s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3573s + [COUNTERS] Fortran MEs ( 1 ) : 0.3178s for 8192 events => throughput is 2.58E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.6286s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3078s - [COUNTERS] Fortran MEs ( 1 ) : 0.3208s for 8192 events => throughput is 2.55E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6388s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3205s + [COUNTERS] Fortran MEs ( 1 ) : 0.3183s for 8192 events => throughput is 2.57E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -109,9 +109,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 5.1015s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5259s - [COUNTERS] Fortran MEs ( 1 ) : 3.5756s for 90112 events => throughput is 2.52E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.0099s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5073s + [COUNTERS] Fortran MEs ( 1 ) : 3.5026s for 90112 events => throughput is 2.57E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748700702684] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 1.0134s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6664s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3470s for 8192 events => throughput is 2.36E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.9635s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6336s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3299s for 8192 events => throughput is 2.48E+04 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -167,9 +167,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238482679400354E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 5.4987s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8629s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.6359s for 90112 events => throughput is 2.48E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.4154s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7958s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.6196s for 90112 events => throughput is 2.49E+04 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -180,14 +180,14 @@ OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238482679400354E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.570856e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.562106e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.544366e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.547562e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -208,15 +208,15 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112748702805031] fbridge_mode=1 + [XSECTION] Cross section = 0.1011 [0.10112748702805033] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.6336s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4688s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1649s for 8192 events => throughput is 4.97E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6394s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4739s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1654s for 8192 events => throughput is 4.95E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748702805031) differ by less than 2E-4 (9.399612643790078e-09) +OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748702805033) differ by less than 2E-4 (9.399612865834683e-09) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -241,29 +241,29 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238482683055653E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07924 [7.9238482683055667E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 3.4637s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6640s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.7997s for 90112 events => throughput is 5.01E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.4743s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6457s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.8285s for 90112 events => throughput is 4.93E+04 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238482683055653E-002) differ by less than 2E-4 (9.469362849401364e-09) +OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238482683055667E-002) differ by less than 2E-4 (9.469362849401364e-09) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.095748e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.063467e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.111417e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.051938e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,15 +284,15 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112748681415583] fbridge_mode=1 + [XSECTION] Cross section = 0.1011 [0.10112748681415580] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) [COUNTERS] PROGRAM TOTAL : 0.4771s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3934s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0837s for 8192 events => throughput is 9.79E+04 events/s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3935s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0836s for 8192 events => throughput is 9.80E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748681415583) differ by less than 2E-4 (7.284515213257237e-09) +OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748681415580) differ by less than 2E-4 (7.284514991212632e-09) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -317,29 +317,29 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238482534347218E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07924 [7.9238482534347232E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.4900s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5765s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9135s for 90112 events => throughput is 9.86E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.4752s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5541s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9211s for 90112 events => throughput is 9.78E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238482534347218E-002) differ by less than 2E-4 (7.592642958798024e-09) +OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238482534347232E-002) differ by less than 2E-4 (7.592642958798024e-09) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.013857e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.001861e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.015949e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.840887e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -360,15 +360,15 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112748681415583] fbridge_mode=1 + [XSECTION] Cross section = 0.1011 [0.10112748681415580] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.4476s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3758s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0718s for 8192 events => throughput is 1.14E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4581s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3846s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0735s for 8192 events => throughput is 1.11E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748681415583) differ by less than 2E-4 (7.284515213257237e-09) +OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748681415580) differ by less than 2E-4 (7.284514991212632e-09) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -393,29 +393,29 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238482534347218E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07924 [7.9238482534347232E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.3555s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5646s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7909s for 90112 events => throughput is 1.14E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.3626s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5499s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8127s for 90112 events => throughput is 1.11E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238482534347218E-002) differ by less than 2E-4 (7.592642958798024e-09) +OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238482534347232E-002) differ by less than 2E-4 (7.592642958798024e-09) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.152174e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.148207e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.139304e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.152825e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -436,15 +436,15 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112748681415583] fbridge_mode=1 + [XSECTION] Cross section = 0.1011 [0.10112748700265108] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.5114s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4097s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1017s for 8192 events => throughput is 8.06E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5403s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4268s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1136s for 8192 events => throughput is 7.21E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748681415583) differ by less than 2E-4 (7.284515213257237e-09) +OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748700265108) differ by less than 2E-4 (9.148451995955043e-09) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -469,29 +469,29 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238482534347218E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07924 [7.9238482666076374E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.7719s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6293s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1426s for 90112 events => throughput is 7.89E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.7973s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6097s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1876s for 90112 events => throughput is 7.59E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238482534347218E-002) differ by less than 2E-4 (7.592642958798024e-09) +OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238482666076374E-002) differ by less than 2E-4 (9.255082034087536e-09) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.151380e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.609614e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.222415e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.592843e+04 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.1011 [0.10112748601943165] fbridge_mode=1 [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.7428s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7373s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 8192 events => throughput is 1.50E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7459s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7405s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 8192 events => throughput is 1.51E+06 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -547,9 +547,9 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.07924 [7.9238481937154381E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 1.9444s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9218s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0226s for 90112 events => throughput is 3.99E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.9191s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8964s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0228s for 90112 events => throughput is 3.96E+06 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** @@ -560,43 +560,43 @@ OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238481937154381E-002 OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.637764e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.624489e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.311283e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.862423e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.607274e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.598562e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.233492e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.230160e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.634483e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.604858e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.243748e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.241022e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.619987e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.618302e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.709088e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.712384e+06 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index 42f6d38589..455a867420 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -1,42 +1,42 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y make USEBUILDDIR=1 AVX=512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' OMP_NUM_THREADS= -DATE: 2024-02-05_22:22:15 +DATE: 2024-03-01_03:39:43 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -57,11 +57,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8703612510102345E-004] fbridge_mode=0 + [XSECTION] Cross section = 0.000387 [3.8703612510102356E-004] fbridge_mode=0 [UNWEIGHT] Wrote 62 events (found 950 events) - [COUNTERS] PROGRAM TOTAL : 4.4979s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3422s - [COUNTERS] Fortran MEs ( 1 ) : 4.1556s for 8192 events => throughput is 1.97E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.5262s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3968s + [COUNTERS] Fortran MEs ( 1 ) : 4.1295s for 8192 events => throughput is 1.98E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -82,11 +82,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8703612510102345E-004] fbridge_mode=0 + [XSECTION] Cross section = 0.000387 [3.8703612510102356E-004] fbridge_mode=0 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 4.4698s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3331s - [COUNTERS] Fortran MEs ( 1 ) : 4.1367s for 8192 events => throughput is 1.98E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.4601s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3420s + [COUNTERS] Fortran MEs ( 1 ) : 4.1180s for 8192 events => throughput is 1.99E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -107,11 +107,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793438642451704E-004] fbridge_mode=0 + [XSECTION] Cross section = 0.0001579 [1.5793438642451712E-004] fbridge_mode=0 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 47.6993s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0438s - [COUNTERS] Fortran MEs ( 1 ) : 45.6555s for 90112 events => throughput is 1.97E+03 events/s + [COUNTERS] PROGRAM TOTAL : 47.7126s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0517s + [COUNTERS] Fortran MEs ( 1 ) : 45.6608s for 90112 events => throughput is 1.97E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,13 +134,13 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612510102372E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 8.9166s - [COUNTERS] Fortran Overhead ( 0 ) : 4.5742s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.3424s for 8192 events => throughput is 1.89E+03 events/s + [COUNTERS] PROGRAM TOTAL : 8.7056s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4601s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.2455s for 8192 events => throughput is 1.93E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.8703612510102345E-004) and cpp (3.8703612510102372E-004) differ by less than 3E-14 (6.661338147750939e-16) +OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703612510102372E-004) differ by less than 3E-14 (4.440892098500626e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -165,29 +165,29 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793438642451701E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.0001579 [1.5793438642451704E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 54.1949s - [COUNTERS] Fortran Overhead ( 0 ) : 6.2939s - [COUNTERS] CudaCpp MEs ( 2 ) : 47.9009s for 90112 events => throughput is 1.88E+03 events/s + [COUNTERS] PROGRAM TOTAL : 53.1561s + [COUNTERS] Fortran Overhead ( 0 ) : 6.1171s + [COUNTERS] CudaCpp MEs ( 2 ) : 47.0390s for 90112 events => throughput is 1.92E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5793438642451704E-004) and cpp (1.5793438642451701E-004) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438642451704E-004) differ by less than 3E-14 (5.551115123125783e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.927146e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.989312e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.934290e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.975004e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -208,15 +208,15 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8703612510102372E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000387 [3.8703612510102367E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 4.7446s - [COUNTERS] Fortran Overhead ( 0 ) : 2.5173s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.2273s for 8192 events => throughput is 3.68E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.7773s + [COUNTERS] Fortran Overhead ( 0 ) : 2.5170s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.2604s for 8192 events => throughput is 3.62E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.8703612510102345E-004) and cpp (3.8703612510102372E-004) differ by less than 3E-14 (6.661338147750939e-16) +OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703612510102367E-004) differ by less than 3E-14 (2.220446049250313e-16) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -241,29 +241,29 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793438642451704E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.0001579 [1.5793438642451701E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 28.9914s - [COUNTERS] Fortran Overhead ( 0 ) : 4.2638s - [COUNTERS] CudaCpp MEs ( 2 ) : 24.7275s for 90112 events => throughput is 3.64E+03 events/s + [COUNTERS] PROGRAM TOTAL : 29.0103s + [COUNTERS] Fortran Overhead ( 0 ) : 4.1559s + [COUNTERS] CudaCpp MEs ( 2 ) : 24.8544s for 90112 events => throughput is 3.63E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5793438642451704E-004) and cpp (1.5793438642451704E-004) differ by less than 3E-14 (0.0) +OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438642451701E-004) differ by less than 3E-14 (6.661338147750939e-16) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.808325e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.801009e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.810712e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.781734e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,13 +286,13 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612510102367E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 2.2692s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2960s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9732s for 8192 events => throughput is 8.42E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.2569s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2848s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9721s for 8192 events => throughput is 8.43E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.8703612510102345E-004) and cpp (3.8703612510102367E-004) differ by less than 3E-14 (6.661338147750939e-16) +OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703612510102367E-004) differ by less than 3E-14 (2.220446049250313e-16) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -317,29 +317,29 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793438642451707E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.0001579 [1.5793438642451712E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 13.7356s - [COUNTERS] Fortran Overhead ( 0 ) : 3.0257s - [COUNTERS] CudaCpp MEs ( 2 ) : 10.7099s for 90112 events => throughput is 8.41E+03 events/s + [COUNTERS] PROGRAM TOTAL : 13.7501s + [COUNTERS] Fortran Overhead ( 0 ) : 2.9426s + [COUNTERS] CudaCpp MEs ( 2 ) : 10.8075s for 90112 events => throughput is 8.34E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5793438642451704E-004) and cpp (1.5793438642451707E-004) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438642451712E-004) differ by less than 3E-14 (0.0) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.639623e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.607758e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.677492e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.615061e+03 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,13 +362,13 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612510102367E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 2.0280s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1744s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8536s for 8192 events => throughput is 9.60E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.0290s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1707s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8583s for 8192 events => throughput is 9.54E+03 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.8703612510102345E-004) and cpp (3.8703612510102367E-004) differ by less than 3E-14 (6.661338147750939e-16) +OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703612510102367E-004) differ by less than 3E-14 (2.220446049250313e-16) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -393,29 +393,29 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793438642451707E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.0001579 [1.5793438642451712E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 12.2880s - [COUNTERS] Fortran Overhead ( 0 ) : 2.8878s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.4002s for 90112 events => throughput is 9.59E+03 events/s + [COUNTERS] PROGRAM TOTAL : 12.2922s + [COUNTERS] Fortran Overhead ( 0 ) : 2.8302s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.4620s for 90112 events => throughput is 9.52E+03 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5793438642451704E-004) and cpp (1.5793438642451707E-004) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438642451712E-004) differ by less than 3E-14 (0.0) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.851640e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.701965e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.826048e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.814187e+03 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,13 +438,13 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612510102367E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 2.4467s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3864s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0603s for 8192 events => throughput is 7.73E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.5040s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4024s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1017s for 8192 events => throughput is 7.44E+03 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.8703612510102345E-004) and cpp (3.8703612510102367E-004) differ by less than 3E-14 (6.661338147750939e-16) +OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703612510102367E-004) differ by less than 3E-14 (2.220446049250313e-16) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -469,29 +469,29 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793438642451707E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.0001579 [1.5793438642451712E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 14.9115s - [COUNTERS] Fortran Overhead ( 0 ) : 3.1698s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.7417s for 90112 events => throughput is 7.67E+03 events/s + [COUNTERS] PROGRAM TOTAL : 14.7910s + [COUNTERS] Fortran Overhead ( 0 ) : 3.0412s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.7498s for 90112 events => throughput is 7.67E+03 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5793438642451704E-004) and cpp (1.5793438642451707E-004) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438642451712E-004) differ by less than 3E-14 (0.0) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.829236e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.831586e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.877772e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.821061e+03 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,13 +514,13 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612510102367E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 0.8732s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8402s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0331s for 8192 events => throughput is 2.48E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8686s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8368s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0319s for 8192 events => throughput is 2.57E+05 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.8703612510102345E-004) and cpp (3.8703612510102367E-004) differ by less than 3E-14 (6.661338147750939e-16) +OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703612510102367E-004) differ by less than 3E-14 (2.220446049250313e-16) *** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -547,56 +547,56 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793438642451712E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 2.9273s - [COUNTERS] Fortran Overhead ( 0 ) : 2.5680s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3593s for 90112 events => throughput is 2.51E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.8233s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4732s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3501s for 90112 events => throughput is 2.57E+05 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5793438642451704E-004) and cpp (1.5793438642451712E-004) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438642451712E-004) differ by less than 3E-14 (0.0) *** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.297744e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.280922e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.523615e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.518844e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.102829e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.106750e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.140866e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.162850e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.114484e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.106625e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.173422e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.168282e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.119855e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.107369e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.426650e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.430988e+05 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index 89070eac21..5e945a4db8 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -1,42 +1,42 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none + make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y make USEBUILDDIR=1 AVX=512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' OMP_NUM_THREADS= -DATE: 2024-02-05_22:26:31 +DATE: 2024-03-01_03:43:57 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -57,11 +57,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8703612510102345E-004] fbridge_mode=0 + [XSECTION] Cross section = 0.000387 [3.8703612510102356E-004] fbridge_mode=0 [UNWEIGHT] Wrote 62 events (found 950 events) - [COUNTERS] PROGRAM TOTAL : 4.4620s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3366s - [COUNTERS] Fortran MEs ( 1 ) : 4.1254s for 8192 events => throughput is 1.99E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.4989s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3403s + [COUNTERS] Fortran MEs ( 1 ) : 4.1586s for 8192 events => throughput is 1.97E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -82,11 +82,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8703612510102345E-004] fbridge_mode=0 + [XSECTION] Cross section = 0.000387 [3.8703612510102356E-004] fbridge_mode=0 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 4.4826s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3337s - [COUNTERS] Fortran MEs ( 1 ) : 4.1489s for 8192 events => throughput is 1.97E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.4650s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3366s + [COUNTERS] Fortran MEs ( 1 ) : 4.1284s for 8192 events => throughput is 1.98E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -107,11 +107,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793438642451704E-004] fbridge_mode=0 + [XSECTION] Cross section = 0.0001579 [1.5793438642451712E-004] fbridge_mode=0 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 47.7647s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0487s - [COUNTERS] Fortran MEs ( 1 ) : 45.7160s for 90112 events => throughput is 1.97E+03 events/s + [COUNTERS] PROGRAM TOTAL : 47.5707s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0343s + [COUNTERS] Fortran MEs ( 1 ) : 45.5364s for 90112 events => throughput is 1.98E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -132,15 +132,15 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8703728935895570E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000387 [3.8703729438336302E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 8.1948s - [COUNTERS] Fortran Overhead ( 0 ) : 4.2129s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.9819s for 8192 events => throughput is 2.06E+03 events/s + [COUNTERS] PROGRAM TOTAL : 8.4568s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3045s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.1523s for 8192 events => throughput is 1.97E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.8703612510102345E-004) and cpp (3.8703728935895570E-004) differ by less than 4E-4 (3.0081376303225937e-06) +OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703729438336302E-004) differ by less than 4E-4 (3.021119383106452e-06) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -165,29 +165,29 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793486223749466E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.0001579 [1.5793486626492658E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 49.9434s - [COUNTERS] Fortran Overhead ( 0 ) : 5.9464s - [COUNTERS] CudaCpp MEs ( 2 ) : 43.9970s for 90112 events => throughput is 2.05E+03 events/s + [COUNTERS] PROGRAM TOTAL : 51.1261s + [COUNTERS] Fortran Overhead ( 0 ) : 5.9844s + [COUNTERS] CudaCpp MEs ( 2 ) : 45.1417s for 90112 events => throughput is 2.00E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5793438642451704E-004) and cpp (1.5793486223749466E-004) differ by less than 4E-4 (3.0127256538392544e-06) +OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793486626492658E-004) differ by less than 4E-4 (3.0382263187522796e-06) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.118265e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.070377e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.118721e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.032691e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -208,15 +208,15 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8703721162664038E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000387 [3.8703722581317850E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 2.5421s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4276s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1145s for 8192 events => throughput is 7.35E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.5531s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4379s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1153s for 8192 events => throughput is 7.35E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.8703612510102345E-004) and cpp (3.8703721162664038E-004) differ by less than 4E-4 (2.8072976823168005e-06) +OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703722581317850E-004) differ by less than 4E-4 (2.843951981690296e-06) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -241,29 +241,29 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793482900053113E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.0001579 [1.5793483759856148E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 15.5186s - [COUNTERS] Fortran Overhead ( 0 ) : 3.1615s - [COUNTERS] CudaCpp MEs ( 2 ) : 12.3571s for 90112 events => throughput is 7.29E+03 events/s + [COUNTERS] PROGRAM TOTAL : 15.4011s + [COUNTERS] Fortran Overhead ( 0 ) : 3.1124s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.2887s for 90112 events => throughput is 7.33E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5793438642451704E-004) and cpp (1.5793482900053113E-004) differ by less than 4E-4 (2.8022777314173908e-06) +OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793483759856148E-004) differ by less than 4E-4 (2.856718252175483e-06) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.661702e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.468143e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.676226e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.493623e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,15 +284,15 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8703719746039955E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000387 [3.8703722425602170E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 1.3053s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8175s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4878s for 8192 events => throughput is 1.68E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.3122s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8184s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4938s for 8192 events => throughput is 1.66E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.8703612510102345E-004) and cpp (3.8703719746039955E-004) differ by less than 4E-4 (2.7706958254380964e-06) +OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703722425602170E-004) differ by less than 4E-4 (2.8399286962077497e-06) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -317,29 +317,29 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793482744283897E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.0001579 [1.5793483698376133E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 7.9508s - [COUNTERS] Fortran Overhead ( 0 ) : 2.5496s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.4012s for 90112 events => throughput is 1.67E+04 events/s + [COUNTERS] PROGRAM TOTAL : 7.8863s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4589s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.4274s for 90112 events => throughput is 1.66E+04 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5793438642451704E-004) and cpp (1.5793482744283897E-004) differ by less than 4E-4 (2.7924148244817815e-06) +OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793483698376133E-004) differ by less than 4E-4 (2.852825495613942e-06) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.722931e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.689224e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.655694e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.712522e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -360,15 +360,15 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8703719746039955E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000387 [3.8703722425602170E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 1.1890s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7577s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4314s for 8192 events => throughput is 1.90E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.1887s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7547s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4340s for 8192 events => throughput is 1.89E+04 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.8703612510102345E-004) and cpp (3.8703719746039955E-004) differ by less than 4E-4 (2.7706958254380964e-06) +OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703722425602170E-004) differ by less than 4E-4 (2.8399286962077497e-06) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -393,29 +393,29 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793482744283897E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.0001579 [1.5793483698376133E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 7.1796s - [COUNTERS] Fortran Overhead ( 0 ) : 2.4657s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.7139s for 90112 events => throughput is 1.91E+04 events/s + [COUNTERS] PROGRAM TOTAL : 7.2113s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4166s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.7946s for 90112 events => throughput is 1.88E+04 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5793438642451704E-004) and cpp (1.5793482744283897E-004) differ by less than 4E-4 (2.7924148244817815e-06) +OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793483698376133E-004) differ by less than 4E-4 (2.852825495613942e-06) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.965165e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.812765e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.961063e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.800388e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -436,15 +436,15 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8703728656142196E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000387 [3.8703728658657426E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 1.3838s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8569s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5269s for 8192 events => throughput is 1.55E+04 events/s + [COUNTERS] PROGRAM TOTAL : 1.4119s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8827s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5292s for 8192 events => throughput is 1.55E+04 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.8703612510102345E-004) and cpp (3.8703728656142196E-004) differ by less than 4E-4 (3.0009095357552695e-06) +OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703728658657426E-004) differ by less than 4E-4 (3.0009745224379714e-06) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -469,29 +469,29 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793486988396928E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.0001579 [1.5793486977281547E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 8.3413s - [COUNTERS] Fortran Overhead ( 0 ) : 2.5676s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.7737s for 90112 events => throughput is 1.56E+04 events/s + [COUNTERS] PROGRAM TOTAL : 8.3753s + [COUNTERS] Fortran Overhead ( 0 ) : 2.5229s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.8525s for 90112 events => throughput is 1.54E+04 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5793438642451704E-004) and cpp (1.5793486988396928E-004) differ by less than 4E-4 (3.0611411687697654e-06) +OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793486977281547E-004) differ by less than 4E-4 (3.0604373708609245e-06) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.563090e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.556546e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.584923e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.565832e+04 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,13 +514,13 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703736267486325E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 0.8348s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8135s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0213s for 8192 events => throughput is 3.84E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8334s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8120s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0214s for 8192 events => throughput is 3.82E+05 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.8703612510102345E-004) and cpp (3.8703736267486325E-004) differ by less than 4E-4 (3.197566737389579e-06) +OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703736267486325E-004) differ by less than 4E-4 (3.1975667371675343e-06) *** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -547,56 +547,56 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.0001579 [1.5793489323670813E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 2.7662s - [COUNTERS] Fortran Overhead ( 0 ) : 2.5323s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2339s for 90112 events => throughput is 3.85E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.7017s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4654s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2363s for 90112 events => throughput is 3.81E+05 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5793438642451704E-004) and cpp (1.5793489323670813E-004) differ by less than 4E-4 (3.2090047175081793e-06) +OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793489323670813E-004) differ by less than 4E-4 (3.20900471706409e-06) *** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.598573e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.592263e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.958409e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.940482e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.495046e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.499807e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.667956e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.638317e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.492157e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.497540e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.729344e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.635301e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.485281e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.483569e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.532672e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.518477e+05 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt index a9f0e03001..4a1ef98d00 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt @@ -1,4 +1,4 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg CUDACPP_BUILDDIR='.' @@ -10,33 +10,33 @@ make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y make USEBUILDDIR=1 AVX=512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' OMP_NUM_THREADS= -DATE: 2024-02-05_22:29:50 +DATE: 2024-03-01_03:47:17 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -57,11 +57,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8703612510102345E-004] fbridge_mode=0 + [XSECTION] Cross section = 0.000387 [3.8703612510102356E-004] fbridge_mode=0 [UNWEIGHT] Wrote 62 events (found 950 events) - [COUNTERS] PROGRAM TOTAL : 4.4716s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3356s - [COUNTERS] Fortran MEs ( 1 ) : 4.1360s for 8192 events => throughput is 1.98E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.4720s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3418s + [COUNTERS] Fortran MEs ( 1 ) : 4.1302s for 8192 events => throughput is 1.98E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -82,11 +82,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8703612510102345E-004] fbridge_mode=0 + [XSECTION] Cross section = 0.000387 [3.8703612510102356E-004] fbridge_mode=0 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 4.4821s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3396s - [COUNTERS] Fortran MEs ( 1 ) : 4.1425s for 8192 events => throughput is 1.98E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.4586s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3357s + [COUNTERS] Fortran MEs ( 1 ) : 4.1229s for 8192 events => throughput is 1.99E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -107,11 +107,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793438642451704E-004] fbridge_mode=0 + [XSECTION] Cross section = 0.0001579 [1.5793438642451712E-004] fbridge_mode=0 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 48.1896s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0584s - [COUNTERS] Fortran MEs ( 1 ) : 46.1312s for 90112 events => throughput is 1.95E+03 events/s + [COUNTERS] PROGRAM TOTAL : 47.6222s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0494s + [COUNTERS] Fortran MEs ( 1 ) : 45.5728s for 90112 events => throughput is 1.98E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -132,15 +132,15 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8703612659176647E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000387 [3.8703612659176674E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 9.0456s - [COUNTERS] Fortran Overhead ( 0 ) : 4.6365s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.4091s for 8192 events => throughput is 1.86E+03 events/s + [COUNTERS] PROGRAM TOTAL : 8.7912s + [COUNTERS] Fortran Overhead ( 0 ) : 4.5114s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.2799s for 8192 events => throughput is 1.91E+03 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.8703612510102345E-004) and cpp (3.8703612659176647E-004) differ by less than 2E-4 (3.851689633904698e-09) +OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703612659176674E-004) differ by less than 2E-4 (3.851690077993908e-09) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -165,29 +165,29 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793438704534937E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.0001579 [1.5793438704534934E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 54.9541s - [COUNTERS] Fortran Overhead ( 0 ) : 6.4100s - [COUNTERS] CudaCpp MEs ( 2 ) : 48.5441s for 90112 events => throughput is 1.86E+03 events/s + [COUNTERS] PROGRAM TOTAL : 53.4090s + [COUNTERS] Fortran Overhead ( 0 ) : 6.1734s + [COUNTERS] CudaCpp MEs ( 2 ) : 47.2356s for 90112 events => throughput is 1.91E+03 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5793438642451704E-004) and cpp (1.5793438704534937E-004) differ by less than 2E-4 (3.930950898123342e-09) +OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438704534934E-004) differ by less than 2E-4 (3.930950231989527e-09) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.919374e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.968066e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.902053e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.968245e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -208,15 +208,15 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8703612692816692E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000387 [3.8703612692816703E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 4.7464s - [COUNTERS] Fortran Overhead ( 0 ) : 2.5234s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.2230s for 8192 events => throughput is 3.69E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.7232s + [COUNTERS] Fortran Overhead ( 0 ) : 2.5040s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.2192s for 8192 events => throughput is 3.69E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.8703612510102345E-004) and cpp (3.8703612692816692E-004) differ by less than 2E-4 (4.720860369289426e-09) +OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703612692816703E-004) differ by less than 2E-4 (4.720860369289426e-09) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -241,29 +241,29 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793438707226032E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.0001579 [1.5793438707226035E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 28.6832s - [COUNTERS] Fortran Overhead ( 0 ) : 4.2381s - [COUNTERS] CudaCpp MEs ( 2 ) : 24.4451s for 90112 events => throughput is 3.69E+03 events/s + [COUNTERS] PROGRAM TOTAL : 28.6711s + [COUNTERS] Fortran Overhead ( 0 ) : 4.1739s + [COUNTERS] CudaCpp MEs ( 2 ) : 24.4972s for 90112 events => throughput is 3.68E+03 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5793438642451704E-004) and cpp (1.5793438707226032E-004) differ by less than 2E-4 (4.101344153184527e-09) +OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438707226035E-004) differ by less than 2E-4 (4.1013439311399225e-09) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.801229e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.727620e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.806913e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.685802e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,15 +284,15 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8703612675240507E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000387 [3.8703612675240517E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 2.2352s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2775s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9577s for 8192 events => throughput is 8.55E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.2625s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2738s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9887s for 8192 events => throughput is 8.29E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.8703612510102345E-004) and cpp (3.8703612675240507E-004) differ by less than 2E-4 (4.266737629876616e-09) +OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703612675240517E-004) differ by less than 2E-4 (4.266737629876616e-09) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -317,29 +317,29 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793438703631772E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.0001579 [1.5793438703631775E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 13.6830s - [COUNTERS] Fortran Overhead ( 0 ) : 3.0214s - [COUNTERS] CudaCpp MEs ( 2 ) : 10.6615s for 90112 events => throughput is 8.45E+03 events/s + [COUNTERS] PROGRAM TOTAL : 13.6031s + [COUNTERS] Fortran Overhead ( 0 ) : 2.9396s + [COUNTERS] CudaCpp MEs ( 2 ) : 10.6635s for 90112 events => throughput is 8.45E+03 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5793438642451704E-004) and cpp (1.5793438703631772E-004) differ by less than 2E-4 (3.873764864437135e-09) +OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438703631775E-004) differ by less than 2E-4 (3.873764420347925e-09) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.694548e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.715236e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.713044e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.685374e+03 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -360,15 +360,15 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8703612675240507E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000387 [3.8703612675240517E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 2.0177s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1622s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8555s for 8192 events => throughput is 9.58E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.0253s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1676s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8577s for 8192 events => throughput is 9.55E+03 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.8703612510102345E-004) and cpp (3.8703612675240507E-004) differ by less than 2E-4 (4.266737629876616e-09) +OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703612675240517E-004) differ by less than 2E-4 (4.266737629876616e-09) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -393,29 +393,29 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793438703631772E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.0001579 [1.5793438703631775E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 12.2482s - [COUNTERS] Fortran Overhead ( 0 ) : 2.8854s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.3628s for 90112 events => throughput is 9.62E+03 events/s + [COUNTERS] PROGRAM TOTAL : 12.2295s + [COUNTERS] Fortran Overhead ( 0 ) : 2.8222s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.4074s for 90112 events => throughput is 9.58E+03 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5793438642451704E-004) and cpp (1.5793438703631772E-004) differ by less than 2E-4 (3.873764864437135e-09) +OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438703631775E-004) differ by less than 2E-4 (3.873764420347925e-09) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.966213e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.886999e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.798303e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.910216e+03 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -436,15 +436,15 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.000387 [3.8703612675240507E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.000387 [3.8703612675240517E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 2.4852s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4051s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0801s for 8192 events => throughput is 7.58E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.4883s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4086s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0796s for 8192 events => throughput is 7.59E+03 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.8703612510102345E-004) and cpp (3.8703612675240507E-004) differ by less than 2E-4 (4.266737629876616e-09) +OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703612675240517E-004) differ by less than 2E-4 (4.266737629876616e-09) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -469,29 +469,29 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793438703631772E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.0001579 [1.5793438703631775E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 14.9006s - [COUNTERS] Fortran Overhead ( 0 ) : 3.1273s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.7733s for 90112 events => throughput is 7.65E+03 events/s + [COUNTERS] PROGRAM TOTAL : 15.1764s + [COUNTERS] Fortran Overhead ( 0 ) : 3.0860s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.0904s for 90112 events => throughput is 7.45E+03 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5793438642451704E-004) and cpp (1.5793438703631772E-004) differ by less than 2E-4 (3.873764864437135e-09) +OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438703631775E-004) differ by less than 2E-4 (3.873764420347925e-09) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.689329e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.643781e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.748625e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.679757e+03 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,13 +514,13 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] ChannelId = 2 [XSECTION] Cross section = 0.000387 [3.8703612512203166E-004] fbridge_mode=1 [UNWEIGHT] Wrote 121 events (found 923 events) - [COUNTERS] PROGRAM TOTAL : 0.8748s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8417s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0331s for 8192 events => throughput is 2.48E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8696s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8376s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0321s for 8192 events => throughput is 2.56E+05 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (3.8703612510102345E-004) and cpp (3.8703612512203166E-004) differ by less than 2E-4 (5.4279691852343603e-11) +OK! xsec from fortran (3.8703612510102356E-004) and cpp (3.8703612512203166E-004) differ by less than 2E-4 (5.427946980773868e-11) *** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -545,58 +545,58 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 2 - [XSECTION] Cross section = 0.0001579 [1.5793438642387715E-004] fbridge_mode=1 + [XSECTION] Cross section = 0.0001579 [1.5793438642387717E-004] fbridge_mode=1 [UNWEIGHT] Wrote 1361 events (found 1881 events) - [COUNTERS] PROGRAM TOTAL : 2.9385s - [COUNTERS] Fortran Overhead ( 0 ) : 2.5754s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3630s for 90112 events => throughput is 2.48E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.8559s + [COUNTERS] Fortran Overhead ( 0 ) : 2.5071s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3489s for 90112 events => throughput is 2.58E+05 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.5793438642451704E-004) and cpp (1.5793438642387715E-004) differ by less than 2E-4 (4.051647906067046e-12) +OK! xsec from fortran (1.5793438642451712E-004) and cpp (1.5793438642387717E-004) differ by less than 2E-4 (4.051980972974434e-12) *** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.285322e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.289596e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.527417e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.528638e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.111025e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.112086e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.154637e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.149032e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.103369e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.114551e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.159233e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.167728e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.097040e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.109912e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.433644e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.430504e+05 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index 91a0e957d1..6ba33cd625 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -1,42 +1,42 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=sse4 -make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y make USEBUILDDIR=1 AVX=512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' OMP_NUM_THREADS= -DATE: 2024-02-05_22:35:39 +DATE: 2024-03-01_03:53:01 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -57,11 +57,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403985227939176E-006] fbridge_mode=0 + [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 95.1303s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4967s - [COUNTERS] Fortran MEs ( 1 ) : 94.6335s for 8192 events => throughput is 8.66E+01 events/s + [COUNTERS] PROGRAM TOTAL : 96.0689s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5400s + [COUNTERS] Fortran MEs ( 1 ) : 95.5289s for 8192 events => throughput is 8.58E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -82,11 +82,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403985227939176E-006] fbridge_mode=0 + [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 94.7777s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4926s - [COUNTERS] Fortran MEs ( 1 ) : 94.2850s for 8192 events => throughput is 8.69E+01 events/s + [COUNTERS] PROGRAM TOTAL : 96.2818s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4823s + [COUNTERS] Fortran MEs ( 1 ) : 95.7994s for 8192 events => throughput is 8.55E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -107,11 +107,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322993086655972E-007] fbridge_mode=0 + [XSECTION] Cross section = 2.332e-07 [2.3322993086655967E-007] fbridge_mode=0 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1046.4939s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3422s - [COUNTERS] Fortran MEs ( 1 ) : 1042.1517s for 90112 events => throughput is 8.65E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1058.3505s + [COUNTERS] Fortran Overhead ( 0 ) : 4.1547s + [COUNTERS] Fortran MEs ( 1 ) : 1054.1958s for 90112 events => throughput is 8.55E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,13 +134,13 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939193E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 210.0765s - [COUNTERS] Fortran Overhead ( 0 ) : 97.1032s - [COUNTERS] CudaCpp MEs ( 2 ) : 112.9733s for 8192 events => throughput is 7.25E+01 events/s + [COUNTERS] PROGRAM TOTAL : 212.3366s + [COUNTERS] Fortran Overhead ( 0 ) : 99.0477s + [COUNTERS] CudaCpp MEs ( 2 ) : 113.2889s for 8192 events => throughput is 7.23E+01 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403985227939176E-006) and cpp (1.2403985227939193E-006) differ by less than 3E-14 (1.3322676295501878e-15) +OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985227939193E-006) differ by less than 3E-14 (1.5543122344752192e-15) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -165,29 +165,29 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322993086656006E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1343.2987s - [COUNTERS] Fortran Overhead ( 0 ) : 100.2236s - [COUNTERS] CudaCpp MEs ( 2 ) : 1243.0751s for 90112 events => throughput is 7.25E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1356.0370s + [COUNTERS] Fortran Overhead ( 0 ) : 104.1787s + [COUNTERS] CudaCpp MEs ( 2 ) : 1251.8583s for 90112 events => throughput is 7.20E+01 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322993086655972E-007) and cpp (2.3322993086656006E-007) differ by less than 3E-14 (1.5543122344752192e-15) +OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993086656014E-007) differ by less than 3E-14 (1.9984014443252818e-15) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.092994e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.154156e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.445665e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.197434e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -208,15 +208,15 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403985227939195E-006] fbridge_mode=1 + [XSECTION] Cross section = 1.24e-06 [1.2403985227939197E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 106.7093s - [COUNTERS] Fortran Overhead ( 0 ) : 49.4816s - [COUNTERS] CudaCpp MEs ( 2 ) : 57.2277s for 8192 events => throughput is 1.43E+02 events/s + [COUNTERS] PROGRAM TOTAL : 107.3498s + [COUNTERS] Fortran Overhead ( 0 ) : 49.5738s + [COUNTERS] CudaCpp MEs ( 2 ) : 57.7759s for 8192 events => throughput is 1.42E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403985227939176E-006) and cpp (1.2403985227939195E-006) differ by less than 3E-14 (1.5543122344752192e-15) +OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985227939197E-006) differ by less than 3E-14 (1.7763568394002505e-15) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -241,29 +241,29 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.332e-07 [2.3322993086656017E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 686.0903s - [COUNTERS] Fortran Overhead ( 0 ) : 53.2968s - [COUNTERS] CudaCpp MEs ( 2 ) : 632.7935s for 90112 events => throughput is 1.42E+02 events/s + [COUNTERS] PROGRAM TOTAL : 690.9132s + [COUNTERS] Fortran Overhead ( 0 ) : 53.4647s + [COUNTERS] CudaCpp MEs ( 2 ) : 637.4485s for 90112 events => throughput is 1.41E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322993086655972E-007) and cpp (2.3322993086656014E-007) differ by less than 3E-14 (1.7763568394002505e-15) +OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993086656017E-007) differ by less than 3E-14 (2.220446049250313e-15) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.657733e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.672791e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.659779e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.670748e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,13 +286,13 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939191E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 50.2187s - [COUNTERS] Fortran Overhead ( 0 ) : 23.1972s - [COUNTERS] CudaCpp MEs ( 2 ) : 27.0215s for 8192 events => throughput is 3.03E+02 events/s + [COUNTERS] PROGRAM TOTAL : 49.9431s + [COUNTERS] Fortran Overhead ( 0 ) : 23.2154s + [COUNTERS] CudaCpp MEs ( 2 ) : 26.7277s for 8192 events => throughput is 3.06E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403985227939176E-006) and cpp (1.2403985227939191E-006) differ by less than 3E-14 (1.1102230246251565e-15) +OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985227939191E-006) differ by less than 3E-14 (1.3322676295501878e-15) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -317,29 +317,29 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322993086656009E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 322.8676s - [COUNTERS] Fortran Overhead ( 0 ) : 26.8768s - [COUNTERS] CudaCpp MEs ( 2 ) : 295.9908s for 90112 events => throughput is 3.04E+02 events/s + [COUNTERS] PROGRAM TOTAL : 318.2044s + [COUNTERS] Fortran Overhead ( 0 ) : 26.8024s + [COUNTERS] CudaCpp MEs ( 2 ) : 291.4019s for 90112 events => throughput is 3.09E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322993086655972E-007) and cpp (2.3322993086656009E-007) differ by less than 3E-14 (1.5543122344752192e-15) +OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993086656014E-007) differ by less than 3E-14 (1.9984014443252818e-15) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.617077e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.618074e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.626995e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.618894e+02 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,13 +362,13 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939191E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 44.2033s - [COUNTERS] Fortran Overhead ( 0 ) : 20.0984s - [COUNTERS] CudaCpp MEs ( 2 ) : 24.1049s for 8192 events => throughput is 3.40E+02 events/s + [COUNTERS] PROGRAM TOTAL : 44.2064s + [COUNTERS] Fortran Overhead ( 0 ) : 20.3467s + [COUNTERS] CudaCpp MEs ( 2 ) : 23.8597s for 8192 events => throughput is 3.43E+02 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403985227939176E-006) and cpp (1.2403985227939191E-006) differ by less than 3E-14 (1.1102230246251565e-15) +OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985227939191E-006) differ by less than 3E-14 (1.3322676295501878e-15) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -393,29 +393,29 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322993086656009E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 287.4746s - [COUNTERS] Fortran Overhead ( 0 ) : 23.9380s - [COUNTERS] CudaCpp MEs ( 2 ) : 263.5366s for 90112 events => throughput is 3.42E+02 events/s + [COUNTERS] PROGRAM TOTAL : 291.1048s + [COUNTERS] Fortran Overhead ( 0 ) : 24.2318s + [COUNTERS] CudaCpp MEs ( 2 ) : 266.8729s for 90112 events => throughput is 3.38E+02 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322993086655972E-007) and cpp (2.3322993086656009E-007) differ by less than 3E-14 (1.5543122344752192e-15) +OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993086656014E-007) differ by less than 3E-14 (1.9984014443252818e-15) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.163294e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.097914e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.178912e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.125731e+02 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,13 +438,13 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939191E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 45.5276s - [COUNTERS] Fortran Overhead ( 0 ) : 22.2470s - [COUNTERS] CudaCpp MEs ( 2 ) : 23.2806s for 8192 events => throughput is 3.52E+02 events/s + [COUNTERS] PROGRAM TOTAL : 45.8566s + [COUNTERS] Fortran Overhead ( 0 ) : 22.2857s + [COUNTERS] CudaCpp MEs ( 2 ) : 23.5710s for 8192 events => throughput is 3.48E+02 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403985227939176E-006) and cpp (1.2403985227939191E-006) differ by less than 3E-14 (1.1102230246251565e-15) +OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985227939191E-006) differ by less than 3E-14 (1.3322676295501878e-15) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -469,29 +469,29 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322993086656009E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 280.9380s - [COUNTERS] Fortran Overhead ( 0 ) : 26.0371s - [COUNTERS] CudaCpp MEs ( 2 ) : 254.9009s for 90112 events => throughput is 3.54E+02 events/s + [COUNTERS] PROGRAM TOTAL : 285.1342s + [COUNTERS] Fortran Overhead ( 0 ) : 26.2120s + [COUNTERS] CudaCpp MEs ( 2 ) : 258.9222s for 90112 events => throughput is 3.48E+02 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322993086655972E-007) and cpp (2.3322993086656009E-007) differ by less than 3E-14 (1.5543122344752192e-15) +OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993086656014E-007) differ by less than 3E-14 (1.9984014443252818e-15) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.775442e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.725410e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.798228e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.772387e+02 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,13 +514,13 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985227939195E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 4.2469s - [COUNTERS] Fortran Overhead ( 0 ) : 3.1621s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0848s for 8192 events => throughput is 7.55E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.2510s + [COUNTERS] Fortran Overhead ( 0 ) : 3.1660s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0850s for 8192 events => throughput is 7.55E+03 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403985227939176E-006) and cpp (1.2403985227939195E-006) differ by less than 3E-14 (1.5543122344752192e-15) +OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985227939195E-006) differ by less than 3E-14 (1.7763568394002505e-15) *** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -547,56 +547,56 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993086656006E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 18.9307s - [COUNTERS] Fortran Overhead ( 0 ) : 7.0189s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.9118s for 90112 events => throughput is 7.56E+03 events/s + [COUNTERS] PROGRAM TOTAL : 18.8198s + [COUNTERS] Fortran Overhead ( 0 ) : 6.9183s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.9015s for 90112 events => throughput is 7.57E+03 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322993086655972E-007) and cpp (2.3322993086656006E-007) differ by less than 3E-14 (1.5543122344752192e-15) +OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993086656006E-007) differ by less than 3E-14 (1.7763568394002505e-15) *** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.516506e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.527080e+03 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.276640e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.239391e+03 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.254877e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.271267e+03 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.570497e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.600243e+03 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.248946e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.245889e+03 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.448571e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.476521e+03 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.255913e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.229131e+03 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.238725e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.234312e+03 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index 6e7885c855..2b7ca2c190 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -1,42 +1,42 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg CUDACPP_BUILDDIR='.' make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=sse4 +make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y make USEBUILDDIR=1 AVX=512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' OMP_NUM_THREADS= -DATE: 2024-02-06_00:00:45 +DATE: 2024-03-01_05:18:49 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -57,11 +57,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403985227939176E-006] fbridge_mode=0 + [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 95.1870s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4906s - [COUNTERS] Fortran MEs ( 1 ) : 94.6964s for 8192 events => throughput is 8.65E+01 events/s + [COUNTERS] PROGRAM TOTAL : 96.8320s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4739s + [COUNTERS] Fortran MEs ( 1 ) : 96.3581s for 8192 events => throughput is 8.50E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -82,11 +82,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403985227939176E-006] fbridge_mode=0 + [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 94.9685s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4953s - [COUNTERS] Fortran MEs ( 1 ) : 94.4732s for 8192 events => throughput is 8.67E+01 events/s + [COUNTERS] PROGRAM TOTAL : 96.1294s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4800s + [COUNTERS] Fortran MEs ( 1 ) : 95.6494s for 8192 events => throughput is 8.56E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -107,11 +107,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322993086655972E-007] fbridge_mode=0 + [XSECTION] Cross section = 2.332e-07 [2.3322993086655967E-007] fbridge_mode=0 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1045.4531s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3853s - [COUNTERS] Fortran MEs ( 1 ) : 1041.0679s for 90112 events => throughput is 8.66E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1058.3011s + [COUNTERS] Fortran Overhead ( 0 ) : 4.1783s + [COUNTERS] Fortran MEs ( 1 ) : 1054.1228s for 90112 events => throughput is 8.55E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -132,15 +132,15 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.241e-06 [1.2405719498009764E-006] fbridge_mode=1 + [XSECTION] Cross section = 1.241e-06 [1.2405719957040752E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 191.0542s - [COUNTERS] Fortran Overhead ( 0 ) : 88.4952s - [COUNTERS] CudaCpp MEs ( 2 ) : 102.5590s for 8192 events => throughput is 7.99E+01 events/s + [COUNTERS] PROGRAM TOTAL : 197.7089s + [COUNTERS] Fortran Overhead ( 0 ) : 90.3714s + [COUNTERS] CudaCpp MEs ( 2 ) : 107.3375s for 8192 events => throughput is 7.63E+01 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403985227939176E-006) and cpp (1.2405719498009764E-006) differ by less than 4E-4 (0.00013981555433351112) +OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2405719957040752E-006) differ by less than 4E-4 (0.00013985256106807675) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -165,29 +165,29 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.333e-07 [2.3326289850060011E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.333e-07 [2.3326290771198648E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1218.1212s - [COUNTERS] Fortran Overhead ( 0 ) : 92.3855s - [COUNTERS] CudaCpp MEs ( 2 ) : 1125.7357s for 90112 events => throughput is 8.00E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1274.0074s + [COUNTERS] Fortran Overhead ( 0 ) : 94.0944s + [COUNTERS] CudaCpp MEs ( 2 ) : 1179.9131s for 90112 events => throughput is 7.64E+01 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322993086655972E-007) and cpp (2.3326289850060011E-007) differ by less than 4E-4 (0.00014135250101854346) +OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3326290771198648E-007) differ by less than 4E-4 (0.00014139199589124907) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.234084e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.108865e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.340763e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.128078e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -208,15 +208,15 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.241e-06 [1.2405716133562926E-006] fbridge_mode=1 + [XSECTION] Cross section = 1.241e-06 [1.2405717007921116E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 49.0072s - [COUNTERS] Fortran Overhead ( 0 ) : 23.3751s - [COUNTERS] CudaCpp MEs ( 2 ) : 25.6321s for 8192 events => throughput is 3.20E+02 events/s + [COUNTERS] PROGRAM TOTAL : 49.6519s + [COUNTERS] Fortran Overhead ( 0 ) : 23.3946s + [COUNTERS] CudaCpp MEs ( 2 ) : 26.2573s for 8192 events => throughput is 3.12E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403985227939176E-006) and cpp (1.2405716133562926E-006) differ by less than 4E-4 (0.0001395443151488429) +OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2405717007921116E-006) differ by less than 4E-4 (0.00013961480525170877) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -241,29 +241,29 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.333e-07 [2.3326283773234128E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.333e-07 [2.3326284900828787E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 310.5028s - [COUNTERS] Fortran Overhead ( 0 ) : 26.9907s - [COUNTERS] CudaCpp MEs ( 2 ) : 283.5121s for 90112 events => throughput is 3.18E+02 events/s + [COUNTERS] PROGRAM TOTAL : 315.8806s + [COUNTERS] Fortran Overhead ( 0 ) : 27.1593s + [COUNTERS] CudaCpp MEs ( 2 ) : 288.7213s for 90112 events => throughput is 3.12E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322993086655972E-007) and cpp (2.3326283773234128E-007) differ by less than 4E-4 (0.00014109195015965525) +OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3326284900828787E-007) differ by less than 4E-4 (0.00014114029707035236) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.609202e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.581780e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.624327e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.565199e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,15 +284,15 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.241e-06 [1.2405715853898719E-006] fbridge_mode=1 + [XSECTION] Cross section = 1.241e-06 [1.2405716659252656E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 25.1152s - [COUNTERS] Fortran Overhead ( 0 ) : 11.8289s - [COUNTERS] CudaCpp MEs ( 2 ) : 13.2863s for 8192 events => throughput is 6.17E+02 events/s + [COUNTERS] PROGRAM TOTAL : 25.4788s + [COUNTERS] Fortran Overhead ( 0 ) : 11.8981s + [COUNTERS] CudaCpp MEs ( 2 ) : 13.5807s for 8192 events => throughput is 6.03E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403985227939176E-006) and cpp (1.2405715853898719E-006) differ by less than 4E-4 (0.00013952176883003098) +OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2405716659252656E-006) differ by less than 4E-4 (0.00013958669586155992) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -317,29 +317,29 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.333e-07 [2.3326275792962891E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.333e-07 [2.3326277036840957E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 162.6923s - [COUNTERS] Fortran Overhead ( 0 ) : 15.8974s - [COUNTERS] CudaCpp MEs ( 2 ) : 146.7949s for 90112 events => throughput is 6.14E+02 events/s + [COUNTERS] PROGRAM TOTAL : 165.7549s + [COUNTERS] Fortran Overhead ( 0 ) : 15.4780s + [COUNTERS] CudaCpp MEs ( 2 ) : 150.2769s for 90112 events => throughput is 6.00E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322993086655972E-007) and cpp (2.3326275792962891E-007) differ by less than 4E-4 (0.00014074978690437057) +OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3326277036840957E-007) differ by less than 4E-4 (0.00014080311959907554) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.205911e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.259920e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.215076e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.259066e+02 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -360,15 +360,15 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.241e-06 [1.2405715853898719E-006] fbridge_mode=1 + [XSECTION] Cross section = 1.241e-06 [1.2405716659252656E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 22.6386s - [COUNTERS] Fortran Overhead ( 0 ) : 10.6852s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.9534s for 8192 events => throughput is 6.85E+02 events/s + [COUNTERS] PROGRAM TOTAL : 22.3180s + [COUNTERS] Fortran Overhead ( 0 ) : 10.3786s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.9393s for 8192 events => throughput is 6.86E+02 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403985227939176E-006) and cpp (1.2405715853898719E-006) differ by less than 4E-4 (0.00013952176883003098) +OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2405716659252656E-006) differ by less than 4E-4 (0.00013958669586155992) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -393,29 +393,29 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.333e-07 [2.3326275792962891E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.333e-07 [2.3326277036840957E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 145.3504s - [COUNTERS] Fortran Overhead ( 0 ) : 14.3703s - [COUNTERS] CudaCpp MEs ( 2 ) : 130.9800s for 90112 events => throughput is 6.88E+02 events/s + [COUNTERS] PROGRAM TOTAL : 145.4310s + [COUNTERS] Fortran Overhead ( 0 ) : 14.1732s + [COUNTERS] CudaCpp MEs ( 2 ) : 131.2578s for 90112 events => throughput is 6.87E+02 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322993086655972E-007) and cpp (2.3326275792962891E-007) differ by less than 4E-4 (0.00014074978690437057) +OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3326277036840957E-007) differ by less than 4E-4 (0.00014080311959907554) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.145261e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.296906e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.089522e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.301383e+02 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -436,15 +436,15 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.241e-06 [1.2405719423038986E-006] fbridge_mode=1 + [XSECTION] Cross section = 1.241e-06 [1.2405719306052570E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 22.6291s - [COUNTERS] Fortran Overhead ( 0 ) : 11.3109s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.3183s for 8192 events => throughput is 7.24E+02 events/s + [COUNTERS] PROGRAM TOTAL : 23.0558s + [COUNTERS] Fortran Overhead ( 0 ) : 11.3644s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.6914s for 8192 events => throughput is 7.01E+02 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403985227939176E-006) and cpp (1.2405719423038986E-006) differ by less than 4E-4 (0.00013980951024539223) +OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2405719306052570E-006) differ by less than 4E-4 (0.00013980007888836354) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -469,29 +469,29 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.333e-07 [2.3326283662420285E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.333e-07 [2.3326283660088769E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 140.4388s - [COUNTERS] Fortran Overhead ( 0 ) : 15.1102s - [COUNTERS] CudaCpp MEs ( 2 ) : 125.3286s for 90112 events => throughput is 7.19E+02 events/s + [COUNTERS] PROGRAM TOTAL : 144.1559s + [COUNTERS] Fortran Overhead ( 0 ) : 15.2893s + [COUNTERS] CudaCpp MEs ( 2 ) : 128.8666s for 90112 events => throughput is 6.99E+02 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322993086655972E-007) and cpp (2.3326283662420285E-007) differ by less than 4E-4 (0.00014108719888938914) +OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3326283660088769E-007) differ by less than 4E-4 (0.00014108709892313165) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.551293e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.554413e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.502865e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.557969e+02 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -512,15 +512,15 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.241e-06 [1.2405722175509506E-006] fbridge_mode=1 + [XSECTION] Cross section = 1.241e-06 [1.2405722175509512E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 2.5073s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0137s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4936s for 8192 events => throughput is 1.66E+04 events/s + [COUNTERS] PROGRAM TOTAL : 2.4934s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9950s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4985s for 8192 events => throughput is 1.64E+04 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403985227939176E-006) and cpp (1.2405722175509506E-006) differ by less than 4E-4 (0.00014003141235763295) +OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2405722175509512E-006) differ by less than 4E-4 (0.00014003141235829908) *** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -547,56 +547,56 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.333e-07 [2.3326296967941821E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 11.2651s - [COUNTERS] Fortran Overhead ( 0 ) : 5.8307s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.4344s for 90112 events => throughput is 1.66E+04 events/s + [COUNTERS] PROGRAM TOTAL : 11.1120s + [COUNTERS] Fortran Overhead ( 0 ) : 5.7089s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.4031s for 90112 events => throughput is 1.67E+04 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322993086655972E-007) and cpp (2.3326296967941821E-007) differ by less than 4E-4 (0.00014165768834106807) +OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3326296967941821E-007) differ by less than 4E-4 (0.0001416576883412901) *** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.641774e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.650610e+04 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.628707e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.632591e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.317424e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.339184e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.380209e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.373598e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.351711e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.323596e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.398349e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.361104e+04 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.351253e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.325481e+04 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.434374e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.425348e+03 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt index 5311267b6e..99d7cfbcd5 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt @@ -1,8 +1,8 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 @@ -10,33 +10,33 @@ make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y make USEBUILDDIR=1 AVX=512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' OMP_NUM_THREADS= -DATE: 2024-02-06_01:04:55 +DATE: 2024-03-01_06:24:34 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -57,11 +57,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403985227939176E-006] fbridge_mode=0 + [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 95.2652s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4947s - [COUNTERS] Fortran MEs ( 1 ) : 94.7705s for 8192 events => throughput is 8.64E+01 events/s + [COUNTERS] PROGRAM TOTAL : 96.2156s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4799s + [COUNTERS] Fortran MEs ( 1 ) : 95.7357s for 8192 events => throughput is 8.56E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -82,11 +82,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403985227939176E-006] fbridge_mode=0 + [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 94.9824s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4952s - [COUNTERS] Fortran MEs ( 1 ) : 94.4872s for 8192 events => throughput is 8.67E+01 events/s + [COUNTERS] PROGRAM TOTAL : 96.1318s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4799s + [COUNTERS] Fortran MEs ( 1 ) : 95.6519s for 8192 events => throughput is 8.56E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -107,11 +107,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322993086655972E-007] fbridge_mode=0 + [XSECTION] Cross section = 2.332e-07 [2.3322993086655967E-007] fbridge_mode=0 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1049.9382s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3738s - [COUNTERS] Fortran MEs ( 1 ) : 1045.5645s for 90112 events => throughput is 8.62E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1057.5728s + [COUNTERS] Fortran Overhead ( 0 ) : 4.1537s + [COUNTERS] Fortran MEs ( 1 ) : 1053.4191s for 90112 events => throughput is 8.55E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -132,15 +132,15 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403985299359846E-006] fbridge_mode=1 + [XSECTION] Cross section = 1.24e-06 [1.2403985299359844E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 210.2766s - [COUNTERS] Fortran Overhead ( 0 ) : 96.9104s - [COUNTERS] CudaCpp MEs ( 2 ) : 113.3662s for 8192 events => throughput is 7.23E+01 events/s + [COUNTERS] PROGRAM TOTAL : 220.4361s + [COUNTERS] Fortran Overhead ( 0 ) : 102.4490s + [COUNTERS] CudaCpp MEs ( 2 ) : 117.9870s for 8192 events => throughput is 6.94E+01 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403985227939176E-006) and cpp (1.2403985299359846E-006) differ by less than 2E-4 (5.7578810608305275e-09) +OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985299359844E-006) differ by less than 2E-4 (5.7578810608305275e-09) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -167,27 +167,27 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993212353001E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1355.4366s - [COUNTERS] Fortran Overhead ( 0 ) : 101.1078s - [COUNTERS] CudaCpp MEs ( 2 ) : 1254.3289s for 90112 events => throughput is 7.18E+01 events/s + [COUNTERS] PROGRAM TOTAL : 1422.8276s + [COUNTERS] Fortran Overhead ( 0 ) : 106.0198s + [COUNTERS] CudaCpp MEs ( 2 ) : 1316.8079s for 90112 events => throughput is 6.84E+01 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322993086655972E-007) and cpp (2.3322993212353001E-007) differ by less than 2E-4 (5.389403812117166e-09) +OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993212353001E-007) differ by less than 2E-4 (5.389404034161771e-09) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.512949e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.035940e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.521314e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.018960e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -208,15 +208,15 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403985295828473E-006] fbridge_mode=1 + [XSECTION] Cross section = 1.24e-06 [1.2403985295828471E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 109.5011s - [COUNTERS] Fortran Overhead ( 0 ) : 50.7884s - [COUNTERS] CudaCpp MEs ( 2 ) : 58.7127s for 8192 events => throughput is 1.40E+02 events/s + [COUNTERS] PROGRAM TOTAL : 110.5022s + [COUNTERS] Fortran Overhead ( 0 ) : 50.8167s + [COUNTERS] CudaCpp MEs ( 2 ) : 59.6855s for 8192 events => throughput is 1.37E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403985227939176E-006) and cpp (1.2403985295828473E-006) differ by less than 2E-4 (5.473184350179849e-09) +OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985295828471E-006) differ by less than 2E-4 (5.473184350179849e-09) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -241,29 +241,29 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322993222645648E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.332e-07 [2.3322993222645653E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 699.7664s - [COUNTERS] Fortran Overhead ( 0 ) : 54.5245s - [COUNTERS] CudaCpp MEs ( 2 ) : 645.2418s for 90112 events => throughput is 1.40E+02 events/s + [COUNTERS] PROGRAM TOTAL : 715.3882s + [COUNTERS] Fortran Overhead ( 0 ) : 54.5501s + [COUNTERS] CudaCpp MEs ( 2 ) : 660.8381s for 90112 events => throughput is 1.36E+02 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322993086655972E-007) and cpp (2.3322993222645648E-007) differ by less than 2E-4 (5.8307128014689624e-09) +OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993222645653E-007) differ by less than 2E-4 (5.830713245558172e-09) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.643172e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.628879e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.637966e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.636164e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,13 +286,13 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985293629285E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 47.5676s - [COUNTERS] Fortran Overhead ( 0 ) : 22.0164s - [COUNTERS] CudaCpp MEs ( 2 ) : 25.5512s for 8192 events => throughput is 3.21E+02 events/s + [COUNTERS] PROGRAM TOTAL : 48.5744s + [COUNTERS] Fortran Overhead ( 0 ) : 22.1801s + [COUNTERS] CudaCpp MEs ( 2 ) : 26.3943s for 8192 events => throughput is 3.10E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403985227939176E-006) and cpp (1.2403985293629285E-006) differ by less than 2E-4 (5.29588750630694e-09) +OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985293629285E-006) differ by less than 2E-4 (5.29588750630694e-09) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,27 +319,27 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993222447204E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 305.3712s - [COUNTERS] Fortran Overhead ( 0 ) : 25.6316s - [COUNTERS] CudaCpp MEs ( 2 ) : 279.7396s for 90112 events => throughput is 3.22E+02 events/s + [COUNTERS] PROGRAM TOTAL : 319.2663s + [COUNTERS] Fortran Overhead ( 0 ) : 26.0078s + [COUNTERS] CudaCpp MEs ( 2 ) : 293.2585s for 90112 events => throughput is 3.07E+02 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322993086655972E-007) and cpp (2.3322993222447204E-007) differ by less than 2E-4 (5.82220427425284e-09) +OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993222447204E-007) differ by less than 2E-4 (5.822204496297445e-09) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.853680e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.764546e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.839043e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.773101e+02 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,13 +362,13 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985293629285E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 41.7281s - [COUNTERS] Fortran Overhead ( 0 ) : 19.1520s - [COUNTERS] CudaCpp MEs ( 2 ) : 22.5761s for 8192 events => throughput is 3.63E+02 events/s + [COUNTERS] PROGRAM TOTAL : 42.4540s + [COUNTERS] Fortran Overhead ( 0 ) : 19.2743s + [COUNTERS] CudaCpp MEs ( 2 ) : 23.1797s for 8192 events => throughput is 3.53E+02 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403985227939176E-006) and cpp (1.2403985293629285E-006) differ by less than 2E-4 (5.29588750630694e-09) +OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985293629285E-006) differ by less than 2E-4 (5.29588750630694e-09) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -395,27 +395,27 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993222447204E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 271.0485s - [COUNTERS] Fortran Overhead ( 0 ) : 23.0363s - [COUNTERS] CudaCpp MEs ( 2 ) : 248.0121s for 90112 events => throughput is 3.63E+02 events/s + [COUNTERS] PROGRAM TOTAL : 277.3470s + [COUNTERS] Fortran Overhead ( 0 ) : 22.9193s + [COUNTERS] CudaCpp MEs ( 2 ) : 254.4277s for 90112 events => throughput is 3.54E+02 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322993086655972E-007) and cpp (2.3322993222447204E-007) differ by less than 2E-4 (5.82220427425284e-09) +OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993222447204E-007) differ by less than 2E-4 (5.822204496297445e-09) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.395944e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.384820e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.405946e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.391539e+02 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,13 +438,13 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985293629285E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 44.0281s - [COUNTERS] Fortran Overhead ( 0 ) : 21.6063s - [COUNTERS] CudaCpp MEs ( 2 ) : 22.4218s for 8192 events => throughput is 3.65E+02 events/s + [COUNTERS] PROGRAM TOTAL : 45.2143s + [COUNTERS] Fortran Overhead ( 0 ) : 21.9553s + [COUNTERS] CudaCpp MEs ( 2 ) : 23.2589s for 8192 events => throughput is 3.52E+02 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403985227939176E-006) and cpp (1.2403985293629285E-006) differ by less than 2E-4 (5.29588750630694e-09) +OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985293629285E-006) differ by less than 2E-4 (5.29588750630694e-09) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -471,27 +471,27 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_ [XSECTION] ChannelId = 1 [XSECTION] Cross section = 2.332e-07 [2.3322993222447204E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 270.4138s - [COUNTERS] Fortran Overhead ( 0 ) : 25.3779s - [COUNTERS] CudaCpp MEs ( 2 ) : 245.0359s for 90112 events => throughput is 3.68E+02 events/s + [COUNTERS] PROGRAM TOTAL : 278.0679s + [COUNTERS] Fortran Overhead ( 0 ) : 25.4000s + [COUNTERS] CudaCpp MEs ( 2 ) : 252.6680s for 90112 events => throughput is 3.57E+02 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322993086655972E-007) and cpp (2.3322993222447204E-007) differ by less than 2E-4 (5.82220427425284e-09) +OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993222447204E-007) differ by less than 2E-4 (5.822204496297445e-09) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.938341e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.828727e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.926685e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.858416e+02 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,13 +514,13 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg [XSECTION] ChannelId = 1 [XSECTION] Cross section = 1.24e-06 [1.2403985217419736E-006] fbridge_mode=1 [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 3.5865s - [COUNTERS] Fortran Overhead ( 0 ) : 2.7235s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8630s for 8192 events => throughput is 9.49E+03 events/s + [COUNTERS] PROGRAM TOTAL : 3.5884s + [COUNTERS] Fortran Overhead ( 0 ) : 2.7239s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8645s for 8192 events => throughput is 9.48E+03 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403985227939176E-006) and cpp (1.2403985217419736E-006) differ by less than 2E-4 (8.480693924894922e-10) +OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985217419736E-006) differ by less than 2E-4 (8.480691704448873e-10) *** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -545,58 +545,58 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322993078576736E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.332e-07 [2.3322993078576733E-007] fbridge_mode=1 [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 16.0732s - [COUNTERS] Fortran Overhead ( 0 ) : 6.5735s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.4997s for 90112 events => throughput is 9.49E+03 events/s + [COUNTERS] PROGRAM TOTAL : 15.9902s + [COUNTERS] Fortran Overhead ( 0 ) : 6.4881s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.5020s for 90112 events => throughput is 9.48E+03 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322993086655972E-007) and cpp (2.3322993078576736E-007) differ by less than 2E-4 (3.4640645907302314e-10) +OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993078576733E-007) differ by less than 2E-4 (3.464063480507207e-10) *** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.407818e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.411937e+03 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.084506e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.083264e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.114502e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.112113e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.160968e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.161038e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.107573e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.111465e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.113991e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.105445e+04 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.111189e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.112837e+04 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.632319e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.656493e+03 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index 3e31dbe95a..8e9ad5ba7a 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -1,42 +1,42 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu CUDACPP_BUILDDIR='.' - make USEBUILDDIR=1 AVX=none - make USEBUILDDIR=1 AVX=sse4 + + make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y make USEBUILDDIR=1 AVX=512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' CUDACPP_BUILDDIR='build.none_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' CUDACPP_BUILDDIR='build.sse4_d_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_d_inl0_hrd0' +CUDACPP_BUILDDIR='build.512z_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +CUDACPP_BUILDDIR='build.512y_d_inl0_hrd0' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' OMP_NUM_THREADS= -DATE: 2024-02-05_22:34:08 +DATE: 2024-03-01_03:51:32 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1817 events) - [COUNTERS] PROGRAM TOTAL : 0.4576s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3875s - [COUNTERS] Fortran MEs ( 1 ) : 0.0701s for 8192 events => throughput is 1.17E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4944s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4247s + [COUNTERS] Fortran MEs ( 1 ) : 0.0697s for 8192 events => throughput is 1.18E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3843s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3144s - [COUNTERS] Fortran MEs ( 1 ) : 0.0699s for 8192 events => throughput is 1.17E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3864s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3169s + [COUNTERS] Fortran MEs ( 1 ) : 0.0696s for 8192 events => throughput is 1.18E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -107,11 +107,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510686556561290] fbridge_mode=0 + [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=0 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.2820s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5165s - [COUNTERS] Fortran MEs ( 1 ) : 0.7655s for 90112 events => throughput is 1.18E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.2522s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4897s + [COUNTERS] Fortran MEs ( 1 ) : 0.7625s for 90112 events => throughput is 1.18E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -134,9 +134,9 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263335] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4725s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3960s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0765s for 8192 events => throughput is 1.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4681s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3922s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0759s for 8192 events => throughput is 1.08E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -165,29 +165,29 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510686556561287] fbridge_mode=1 + [XSECTION] Cross section = 0.2151 [0.21510686556561293] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.4597s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6104s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8493s for 90112 events => throughput is 1.06E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.3698s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5419s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8279s for 90112 events => throughput is 1.09E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510686556561290) and cpp (0.21510686556561287) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686556561293) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.071916e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.084897e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.066839e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.103096e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -208,15 +208,15 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110539351262536] fbridge_mode=1 + [XSECTION] Cross section = 0.2711 [0.27110539351262530] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3995s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3576s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0419s for 8192 events => throughput is 1.96E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4004s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3592s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0412s for 8192 events => throughput is 1.99E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539351262536) differ by less than 3E-14 (2.930988785010413e-14) +OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539351262530) differ by less than 3E-14 (2.9531932455029164e-14) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -241,29 +241,29 @@ Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510686556561290] fbridge_mode=1 + [XSECTION] Cross section = 0.2151 [0.21510686556561281] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.0138s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5775s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4364s for 90112 events => throughput is 2.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9658s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5172s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4486s for 90112 events => throughput is 2.01E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510686556561290) and cpp (0.21510686556561290) differ by less than 3E-14 (0.0) +OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686556561281) differ by less than 3E-14 (6.661338147750939e-16) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.048379e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.019219e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.053491e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.018294e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263341] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3649s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3418s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0230s for 8192 events => throughput is 3.56E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3643s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3408s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0234s for 8192 events => throughput is 3.49E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,27 +319,27 @@ Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.8252s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5671s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2581s for 90112 events => throughput is 3.49E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7585s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4994s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2591s for 90112 events => throughput is 3.48E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510686556561290) and cpp (0.21510686556561295) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686556561295) differ by less than 3E-14 (0.0) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.435366e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.297018e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.453652e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.427747e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263341] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3613s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3408s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0205s for 8192 events => throughput is 3.99E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3848s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3623s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0225s for 8192 events => throughput is 3.65E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -395,27 +395,27 @@ Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.7846s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5597s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2249s for 90112 events => throughput is 4.01E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8132s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5645s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2486s for 90112 events => throughput is 3.62E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510686556561290) and cpp (0.21510686556561295) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686556561295) differ by less than 3E-14 (0.0) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.968141e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.905513e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.963827e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.866043e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263341] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3772s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3473s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0299s for 8192 events => throughput is 2.74E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3815s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3495s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0319s for 8192 events => throughput is 2.56E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -471,27 +471,27 @@ Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.8834s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5621s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3214s for 90112 events => throughput is 2.80E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.8893s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5364s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3529s for 90112 events => throughput is 2.55E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510686556561290) and cpp (0.21510686556561295) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686556561295) differ by less than 3E-14 (0.0) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.713679e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.640953e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.652984e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.543334e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -512,15 +512,15 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110539351263352] fbridge_mode=1 + [XSECTION] Cross section = 0.2711 [0.27110539351263363] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.7431s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7425s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.23E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7465s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7458s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.18E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539351263352) differ by less than 3E-14 (8.881784197001252e-16) +OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539351263363) differ by less than 3E-14 (1.3322676295501878e-15) *** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -545,58 +545,58 @@ Executing ' ./build.none_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510686556561298] fbridge_mode=1 + [XSECTION] Cross section = 0.2151 [0.21510686556561304] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.9618s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9543s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0075s for 90112 events => throughput is 1.21E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.9068s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8993s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0075s for 90112 events => throughput is 1.20E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510686556561290) and cpp (0.21510686556561298) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686556561304) differ by less than 3E-14 (4.440892098500626e-16) *** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.577372e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.589846e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.168278e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.058801e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.398056e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.383441e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.505593e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.512285e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.390610e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.382616e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.767356e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.771039e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.391773e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.376307e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.780303e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.776386e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index 9909d81694..63166c80e0 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -1,42 +1,42 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu CUDACPP_BUILDDIR='.' -make USEBUILDDIR=1 AVX=none +make USEBUILDDIR=1 AVX=none make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 make USEBUILDDIR=1 AVX=512y make USEBUILDDIR=1 AVX=512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' CUDACPP_BUILDDIR='build.none_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' +CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +CUDACPP_BUILDDIR='build.sse4_f_inl0_hrd0' CUDACPP_BUILDDIR='build.512z_f_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +CUDACPP_BUILDDIR='build.512y_f_inl0_hrd0' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' OMP_NUM_THREADS= -DATE: 2024-02-05_22:34:38 +DATE: 2024-03-01_03:52:02 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1817 events) - [COUNTERS] PROGRAM TOTAL : 0.4477s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3778s - [COUNTERS] Fortran MEs ( 1 ) : 0.0699s for 8192 events => throughput is 1.17E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4536s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3837s + [COUNTERS] Fortran MEs ( 1 ) : 0.0700s for 8192 events => throughput is 1.17E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3823s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3126s - [COUNTERS] Fortran MEs ( 1 ) : 0.0698s for 8192 events => throughput is 1.17E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3907s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3210s + [COUNTERS] Fortran MEs ( 1 ) : 0.0696s for 8192 events => throughput is 1.18E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -107,11 +107,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510686556561290] fbridge_mode=0 + [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=0 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.2711s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5092s - [COUNTERS] Fortran MEs ( 1 ) : 0.7619s for 90112 events => throughput is 1.18E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.2714s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5019s + [COUNTERS] Fortran MEs ( 1 ) : 0.7695s for 90112 events => throughput is 1.17E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -132,15 +132,15 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110461852325612] fbridge_mode=1 + [XSECTION] Cross section = 0.2711 [0.27110463093540638] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4797s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4071s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0727s for 8192 events => throughput is 1.13E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4586s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3882s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0704s for 8192 events => throughput is 1.16E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.27110539351263330) and cpp (0.27110461852325612) differ by less than 4E-4 (2.8586276618058903e-06) +OK! xsec from fortran (0.27110539351263330) and cpp (0.27110463093540638) differ by less than 4E-4 (2.812844174915341e-06) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -165,29 +165,29 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510685241079500] fbridge_mode=1 + [XSECTION] Cross section = 0.2151 [0.21510686273216112] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.3864s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6235s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7628s for 90112 events => throughput is 1.18E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.3150s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5373s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.7777s for 90112 events => throughput is 1.16E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510686556561290) and cpp (0.21510685241079500) differ by less than 4E-4 (6.11548025553077e-08) +OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686273216112) differ by less than 4E-4 (1.3172298474195543e-08) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.215484e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.170698e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.198326e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.161745e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -208,15 +208,15 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110456793177945] fbridge_mode=1 + [XSECTION] Cross section = 0.2711 [0.27110459152958460] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3638s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3404s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0234s for 8192 events => throughput is 3.50E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3657s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3405s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0252s for 8192 events => throughput is 3.25E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.27110539351263330) and cpp (0.27110456793177945) differ by less than 4E-4 (3.0452395031188573e-06) +OK! xsec from fortran (0.27110539351263330) and cpp (0.27110459152958460) differ by less than 4E-4 (2.9581965829139634e-06) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -241,29 +241,29 @@ Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510681375304044] fbridge_mode=1 + [XSECTION] Cross section = 0.2151 [0.21510683016166510] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.8134s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5557s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2577s for 90112 events => throughput is 3.50E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7697s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4943s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2753s for 90112 events => throughput is 3.27E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510686556561290) and cpp (0.21510681375304044) differ by less than 4E-4 (2.408689854238588e-07) +OK! xsec from fortran (0.21510686556561295) and cpp (0.21510683016166510) differ by less than 4E-4 (1.6458771667782202e-07) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.485750e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.219045e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.433033e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.229652e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,15 +284,15 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110458350871136] fbridge_mode=1 + [XSECTION] Cross section = 0.2711 [0.27110460595003461] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3444s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3319s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0125s for 8192 events => throughput is 6.55E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3421s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3299s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0122s for 8192 events => throughput is 6.71E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.27110539351263330) and cpp (0.27110458350871136) differ by less than 4E-4 (2.987782395047489e-06) +OK! xsec from fortran (0.27110539351263330) and cpp (0.27110460595003461) differ by less than 4E-4 (2.9050052766654844e-06) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -317,29 +317,29 @@ Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510680866622453] fbridge_mode=1 + [XSECTION] Cross section = 0.2151 [0.21510682502089912] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.6853s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5504s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1349s for 90112 events => throughput is 6.68E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6208s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4844s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1364s for 90112 events => throughput is 6.61E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510686556561290) and cpp (0.21510680866622453) differ by less than 4E-4 (2.6451684009831666e-07) +OK! xsec from fortran (0.21510686556561295) and cpp (0.21510682502089912) differ by less than 4E-4 (1.8848637739488083e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.565170e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.431027e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.672553e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.412727e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -360,15 +360,15 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110458350871136] fbridge_mode=1 + [XSECTION] Cross section = 0.2711 [0.27110460595003461] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3399s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3287s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0112s for 8192 events => throughput is 7.35E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3415s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3300s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0116s for 8192 events => throughput is 7.08E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.27110539351263330) and cpp (0.27110458350871136) differ by less than 4E-4 (2.987782395047489e-06) +OK! xsec from fortran (0.27110539351263330) and cpp (0.27110460595003461) differ by less than 4E-4 (2.9050052766654844e-06) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -393,29 +393,29 @@ Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510680866622453] fbridge_mode=1 + [XSECTION] Cross section = 0.2151 [0.21510682502089912] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.7807s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6487s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1319s for 90112 events => throughput is 6.83E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6084s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4831s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1253s for 90112 events => throughput is 7.19E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510686556561290) and cpp (0.21510680866622453) differ by less than 4E-4 (2.6451684009831666e-07) +OK! xsec from fortran (0.21510686556561295) and cpp (0.21510682502089912) differ by less than 4E-4 (1.8848637739488083e-07) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.899301e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.891581e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.086975e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.928440e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110464176080312] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3525s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3371s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0155s for 8192 events => throughput is 5.29E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3483s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3329s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0154s for 8192 events => throughput is 5.33E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -469,29 +469,29 @@ Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510685411522340] fbridge_mode=1 + [XSECTION] Cross section = 0.2151 [0.21510685411522326] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.7160s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5505s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1654s for 90112 events => throughput is 5.45E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.6561s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4840s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1721s for 90112 events => throughput is 5.24E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510686556561290) and cpp (0.21510685411522340) differ by less than 4E-4 (5.3231167029821336e-08) +OK! xsec from fortran (0.21510686556561295) and cpp (0.21510685411522326) differ by less than 4E-4 (5.3231167917999755e-08) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.309335e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.988554e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.077737e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.962392e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -514,9 +514,9 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110478167944563] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.7440s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7434s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.55E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7423s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7418s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0005s for 8192 events => throughput is 1.58E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** @@ -545,58 +545,58 @@ Executing ' ./build.none_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510689885789416] fbridge_mode=1 + [XSECTION] Cross section = 0.2151 [0.21510689885789414] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.9615s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9556s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0059s for 90112 events => throughput is 1.54E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.8968s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8910s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0059s for 90112 events => throughput is 1.53E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510686556561290) and cpp (0.21510689885789416) differ by less than 4E-4 (1.547708909921397e-07) +OK! xsec from fortran (0.21510686556561295) and cpp (0.21510689885789414) differ by less than 4E-4 (1.547708907700951e-07) *** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.681067e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.824058e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.455476e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.473484e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.793086e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.891145e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.723997e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.706092e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.841062e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.798334e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.798906e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.787777e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.394007e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.356687e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.992718e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.028611e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index d0f21b96dd..eb4ca92d13 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -1,42 +1,42 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu CUDACPP_BUILDDIR='.' +make USEBUILDDIR=1 AVX=none - -make USEBUILDDIR=1 AVX=none -make USEBUILDDIR=1 AVX=sse4 make USEBUILDDIR=1 AVX=avx2 +make USEBUILDDIR=1 AVX=sse4 + make USEBUILDDIR=1 AVX=512y make USEBUILDDIR=1 AVX=512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' CUDACPP_BUILDDIR='build.512y_m_inl0_hrd0' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' CUDACPP_BUILDDIR='build.512z_m_inl0_hrd0' -CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' +CUDACPP_BUILDDIR='build.sse4_m_inl0_hrd0' CUDACPP_BUILDDIR='build.none_m_inl0_hrd0' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +CUDACPP_BUILDDIR='build.avx2_m_inl0_hrd0' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' OMP_NUM_THREADS= -DATE: 2024-02-05_22:35:08 +DATE: 2024-03-01_03:52:30 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -59,9 +59,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1817 events) - [COUNTERS] PROGRAM TOTAL : 0.4458s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3754s - [COUNTERS] Fortran MEs ( 1 ) : 0.0704s for 8192 events => throughput is 1.16E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4522s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3828s + [COUNTERS] Fortran MEs ( 1 ) : 0.0694s for 8192 events => throughput is 1.18E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -84,9 +84,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3835s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3139s - [COUNTERS] Fortran MEs ( 1 ) : 0.0696s for 8192 events => throughput is 1.18E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3858s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3164s + [COUNTERS] Fortran MEs ( 1 ) : 0.0694s for 8192 events => throughput is 1.18E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -107,11 +107,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510686556561290] fbridge_mode=0 + [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=0 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.2916s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5236s - [COUNTERS] Fortran MEs ( 1 ) : 0.7681s for 90112 events => throughput is 1.17E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.2499s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4895s + [COUNTERS] Fortran MEs ( 1 ) : 0.7604s for 90112 events => throughput is 1.19E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -132,15 +132,15 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110539348915991] fbridge_mode=1 + [XSECTION] Cross section = 0.2711 [0.27110539348916002] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4710s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3945s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0765s for 8192 events => throughput is 1.07E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4694s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3943s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0751s for 8192 events => throughput is 1.09E+05 events/s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539348915991) differ by less than 2E-4 (8.658396222216425e-11) +OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539348916002) differ by less than 2E-4 (8.658362915525686e-11) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -165,29 +165,29 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510686560794334] fbridge_mode=1 + [XSECTION] Cross section = 0.2151 [0.21510686560794337] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.4876s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6294s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8583s for 90112 events => throughput is 1.05E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.4601s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6015s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8586s for 90112 events => throughput is 1.05E+05 events/s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510686556561290) and cpp (0.21510686560794334) differ by less than 2E-4 (1.967879192932287e-10) +OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686560794337) differ by less than 2E-4 (1.967879192932287e-10) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.023469e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.100770e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.065490e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.090853e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -210,9 +210,9 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539348916002] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3962s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3571s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0391s for 8192 events => throughput is 2.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3940s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3546s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0394s for 8192 events => throughput is 2.08E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -241,29 +241,29 @@ Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510686560794337] fbridge_mode=1 + [XSECTION] Cross section = 0.2151 [0.21510686560794334] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.0102s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5782s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4320s for 90112 events => throughput is 2.09E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9359s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5057s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4302s for 90112 events => throughput is 2.09E+05 events/s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510686556561290) and cpp (0.21510686560794337) differ by less than 2E-4 (1.9678814133783362e-10) +OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686560794334) differ by less than 2E-4 (1.9678769724862377e-10) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.046626e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.020468e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.000567e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.027641e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,9 +286,9 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539330272815] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3734s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3500s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0234s for 8192 events => throughput is 3.51E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3636s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3406s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0230s for 8192 events => throughput is 3.56E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -317,29 +317,29 @@ Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510686558551750] fbridge_mode=1 + [XSECTION] Cross section = 0.2151 [0.21510686558551748] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.8128s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5602s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2526s for 90112 events => throughput is 3.57E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7468s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4923s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2545s for 90112 events => throughput is 3.54E+05 events/s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510686556561290) and cpp (0.21510686558551750) differ by less than 2E-4 (9.2533536388828e-11) +OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686558551748) differ by less than 2E-4 (9.253309229961815e-11) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.546873e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.536848e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.489419e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.536744e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -362,9 +362,9 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539330272815] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3598s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3400s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0198s for 8192 events => throughput is 4.14E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3573s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3372s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0201s for 8192 events => throughput is 4.08E+05 events/s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -393,29 +393,29 @@ Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510686558551750] fbridge_mode=1 + [XSECTION] Cross section = 0.2151 [0.21510686558551748] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.7728s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5560s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2168s for 90112 events => throughput is 4.16E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7304s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5047s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2257s for 90112 events => throughput is 3.99E+05 events/s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510686556561290) and cpp (0.21510686558551750) differ by less than 2E-4 (9.2533536388828e-11) +OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686558551748) differ by less than 2E-4 (9.253309229961815e-11) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.091661e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.887668e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.136379e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.834847e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -438,9 +438,9 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2711 [0.27110539330272815] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3802s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3498s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0304s for 8192 events => throughput is 2.69E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4046s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3689s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0357s for 8192 events => throughput is 2.30E+05 events/s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -469,29 +469,29 @@ Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510686558551750] fbridge_mode=1 + [XSECTION] Cross section = 0.2151 [0.21510686558551748] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.9114s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5781s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3333s for 90112 events => throughput is 2.70E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9542s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5763s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3779s for 90112 events => throughput is 2.38E+05 events/s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510686556561290) and cpp (0.21510686558551750) differ by less than 2E-4 (9.2533536388828e-11) +OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686558551748) differ by less than 2E-4 (9.253309229961815e-11) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.656196e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.510568e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 12.1.0] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.642304e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.320811e+05 ) sec^-1 *** (3) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -512,15 +512,15 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110539343558532] fbridge_mode=1 + [XSECTION] Cross section = 0.2711 [0.27110539343558537] fbridge_mode=1 [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.7437s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7430s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.23E+07 events/s + [COUNTERS] PROGRAM TOTAL : 0.7473s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7466s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.20E+07 events/s *** (3) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539343558532) differ by less than 2E-4 (2.8419933073564607e-10) +OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539343558537) differ by less than 2E-4 (2.8419910869104115e-10) *** (3) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -547,56 +547,56 @@ Executing ' ./build.none_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.2151 [0.21510686553631395] fbridge_mode=1 [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.9687s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9611s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0076s for 90112 events => throughput is 1.19E+07 events/s + [COUNTERS] PROGRAM TOTAL : 1.8944s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8868s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0075s for 90112 events => throughput is 1.19E+07 events/s *** (3) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510686556561290) and cpp (0.21510686553631395) differ by less than 2E-4 (1.3620649053081024e-10) +OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686553631395) differ by less than 2E-4 (1.3620671257541517e-10) *** (3) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.582708e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.579519e+07 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.992203e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.134868e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.392597e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.391789e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.507161e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.511629e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.373790e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.394001e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.765106e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.800973e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.396492e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.396936e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 12.1.0)] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.772367e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.776316e+07 ) sec^-1 TEST COMPLETED