#--------------------------------------------------- # PALLAS MPI Benchmark Suite V2.1, MPI-1 part #--------------------------------------------------- # Date : Tue Mar 7 12:28:25 2000 # Machine : CRAY T3E# System : sn6713 # Release : 2.0.5.24 # Version : unicosmk # # Minimum message length in bytes: 0 # Maximum message length in bytes: 4194304 # # MPI_Datatype : MPI_BYTE # MPI_Datatype for reductions : MPI_FLOAT # MPI_Op : MPI_SUM # # # List of Benchmarks to run: # PingPong # PingPing # Sendrecv # Exchange # Allreduce # Reduce # Reduce_scatter # Allgather # Allgatherv # Alltoall # Bcast # Barrier #--------------------------------------------------- # Benchmarking PingPong # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #--------------------------------------------------- #bytes #repetitions t[usec] Mbytes/sec 0 1000 13.97 0.00 1 1000 14.26 0.07 2 1000 14.32 0.13 4 1000 14.41 0.26 8 1000 14.65 0.52 16 1000 14.85 1.03 32 1000 17.69 1.73 64 1000 19.69 3.10 128 1000 17.64 6.92 256 1000 19.78 12.34 512 1000 23.59 20.70 1024 1000 28.01 34.86 2048 1000 38.21 51.12 4096 1000 52.90 73.84 8192 1000 76.82 101.69 16384 1000 124.54 125.46 32768 1000 213.85 146.13 65536 640 395.83 157.90 131072 320 758.85 164.72 262144 160 1482.83 168.60 524288 80 2928.93 170.71 1048576 40 5840.72 171.21 2097152 20 11619.72 172.12 4194304 10 23170.59 172.63 #--------------------------------------------------- # Benchmarking PingPing # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #--------------------------------------------------- #bytes #repetitions t[usec] Mbytes/sec 0 1000 30.15 0.00 1 1000 30.36 0.03 2 1000 30.36 0.06 4 1000 30.59 0.12 8 1000 30.68 0.25 16 1000 30.53 0.50 32 1000 33.67 0.91 64 1000 34.73 1.76 128 1000 33.71 3.62 256 1000 34.81 7.01 512 1000 36.66 13.32 1024 1000 42.08 23.21 2048 1000 54.88 35.59 4096 1000 73.53 53.12 8192 1000 98.05 79.68 16384 1000 149.34 104.63 32768 1000 250.11 124.95 65536 640 453.82 137.72 131072 320 856.84 145.89 262144 160 1668.67 149.82 524288 80 3275.95 152.63 1048576 40 6489.32 154.10 2097152 20 12867.78 155.43 4194304 10 26600.47 150.37 #----------------------------------------------------------------------------- # Benchmarking Sendrecv # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 16.62 16.62 16.62 0.00 1 1000 17.01 17.01 17.01 0.11 2 1000 17.01 17.01 17.01 0.22 4 1000 16.77 16.77 16.77 0.45 8 1000 16.67 16.67 16.67 0.92 16 1000 16.90 16.90 16.90 1.81 32 1000 20.32 20.32 20.32 3.00 64 1000 21.75 21.75 21.75 5.61 128 1000 20.87 20.88 20.88 11.69 256 1000 21.81 21.81 21.81 22.39 512 1000 23.66 23.67 23.67 41.26 1024 1000 28.17 28.17 28.17 69.32 2048 1000 39.32 39.32 39.32 99.33 4096 1000 55.81 55.81 55.81 139.97 8192 1000 83.84 83.84 83.84 186.37 16384 1000 133.44 133.44 133.44 234.19 32768 1000 237.08 237.08 237.08 263.63 65536 640 439.54 439.59 439.56 284.36 131072 320 843.94 843.97 843.95 296.22 262144 160 1652.81 1652.89 1652.85 302.50 524288 80 3261.37 3261.59 3261.48 306.60 1048576 40 6467.57 6468.62 6468.10 309.19 2097152 20 12815.32 12817.57 12816.45 312.07 4194304 10 26141.31 26145.72 26143.52 305.98 #----------------------------------------------------------------------------- # Benchmarking Sendrecv # ( #processes = 4 ) # ( 4 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 16.62 16.63 16.63 0.00 1 1000 17.19 17.20 17.20 0.11 2 1000 17.08 17.09 17.08 0.22 4 1000 16.76 16.77 16.76 0.46 8 1000 17.19 17.20 17.19 0.89 16 1000 17.02 17.02 17.02 1.79 32 1000 20.52 20.53 20.52 2.97 64 1000 22.30 22.30 22.30 5.47 128 1000 21.50 21.51 21.50 11.35 256 1000 22.90 22.91 22.91 21.31 512 1000 24.64 24.65 24.64 39.62 1024 1000 28.79 28.80 28.79 67.82 2048 1000 40.76 40.76 40.76 95.83 4096 1000 58.01 58.01 58.01 134.66 8192 1000 85.12 85.14 85.13 183.53 16384 1000 135.38 135.42 135.41 230.76 32768 1000 240.38 240.40 240.39 259.99 65536 640 444.66 444.69 444.68 281.09 131072 320 883.50 883.72 883.64 282.90 262144 160 1795.13 1798.50 1797.06 278.01 524288 80 3301.69 3303.88 3302.94 302.67 1048576 40 6619.81 6648.64 6635.43 300.81 2097152 20 13422.93 13611.98 13530.12 293.86 4194304 10 27136.81 27217.67 27181.71 293.93 #----------------------------------------------------------------------------- # Benchmarking Sendrecv # ( #processes = 8 ) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 16.49 16.50 16.50 0.00 1 1000 17.42 17.43 17.43 0.11 2 1000 18.48 18.50 18.49 0.21 4 1000 17.25 17.26 17.25 0.44 8 1000 17.30 17.31 17.30 0.88 16 1000 18.76 18.77 18.76 1.63 32 1000 20.76 20.77 20.76 2.94 64 1000 22.83 22.86 22.85 5.34 128 1000 22.23 22.26 22.25 10.97 256 1000 22.80 22.81 22.80 21.41 512 1000 25.82 25.83 25.82 37.80 1024 1000 31.03 31.04 31.04 62.91 2048 1000 44.76 44.79 44.78 87.22 4096 1000 62.65 62.72 62.69 124.55 8192 1000 84.07 84.09 84.09 185.81 16384 1000 144.38 144.45 144.41 216.34 32768 1000 249.19 249.24 249.21 250.76 65536 640 459.21 459.28 459.24 272.17 131072 320 881.83 882.13 881.99 283.41 262144 160 1714.16 1715.13 1714.68 291.52 524288 80 3362.67 3374.17 3368.62 296.37 1048576 40 6656.31 6709.00 6683.46 298.11 2097152 20 13124.63 13230.81 13175.24 302.32 4194304 10 27305.28 27561.02 27431.39 290.27 #----------------------------------------------------------------------------- # Benchmarking Exchange # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 58.75 58.76 58.75 0.00 1 1000 65.37 65.38 65.38 0.06 2 1000 64.43 64.44 64.44 0.12 4 1000 63.80 63.80 63.80 0.24 8 1000 64.55 64.56 64.55 0.47 16 1000 65.75 65.76 65.76 0.93 32 1000 75.10 75.10 75.10 1.63 64 1000 71.69 71.69 71.69 3.41 128 1000 68.62 68.62 68.62 7.12 256 1000 71.00 71.00 71.00 13.75 512 1000 74.25 74.25 74.25 26.31 1024 1000 83.07 83.07 83.07 47.02 2048 1000 102.19 102.20 102.19 76.45 4096 1000 143.47 143.48 143.48 108.90 8192 1000 213.22 213.25 213.24 146.54 16384 1000 311.72 311.77 311.75 200.47 32768 1000 526.60 526.70 526.65 237.33 65536 640 961.68 961.90 961.79 259.90 131072 320 1849.91 1850.34 1850.13 270.22 262144 160 3428.32 3430.20 3429.26 291.53 524288 80 6503.05 6507.65 6505.35 307.33 1048576 40 12800.04 12809.25 12804.65 312.27 2097152 20 25488.87 25507.13 25498.00 313.64 4194304 10 50707.40 50727.69 50717.54 315.41 #----------------------------------------------------------------------------- # Benchmarking Exchange # ( #processes = 4 ) # ( 4 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 60.27 60.28 60.28 0.00 1 1000 60.85 60.86 60.85 0.06 2 1000 61.03 61.04 61.03 0.12 4 1000 60.19 60.20 60.20 0.25 8 1000 61.30 61.31 61.31 0.50 16 1000 61.68 61.69 61.68 0.99 32 1000 74.31 74.32 74.32 1.64 64 1000 79.22 79.25 79.23 3.08 128 1000 73.81 73.81 73.81 6.62 256 1000 78.71 78.72 78.72 12.41 512 1000 84.35 84.37 84.36 23.15 1024 1000 93.02 93.05 93.03 41.98 2048 1000 121.52 121.55 121.54 64.27 4096 1000 184.26 184.29 184.28 84.79 8192 1000 218.51 218.61 218.56 142.95 16384 1000 336.30 336.43 336.37 185.77 32768 1000 584.05 584.26 584.15 213.95 65536 640 1083.87 1084.33 1084.09 230.56 131072 320 2078.07 2080.57 2079.51 240.32 262144 160 4072.83 4083.21 4078.69 244.91 524288 80 7905.19 7928.69 7917.76 252.25 1048576 40 15693.13 15810.58 15747.68 253.00 2097152 20 31165.98 31543.39 31347.87 253.62 4194304 10 57881.57 60967.14 59623.22 262.44 #----------------------------------------------------------------------------- # Benchmarking Exchange # ( #processes = 8 ) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 65.35 65.40 65.37 0.00 1 1000 65.37 65.40 65.39 0.06 2 1000 64.71 64.74 64.72 0.12 4 1000 64.94 64.96 64.95 0.23 8 1000 64.51 64.54 64.53 0.47 16 1000 66.07 66.09 66.08 0.92 32 1000 76.79 76.82 76.80 1.59 64 1000 81.74 81.76 81.75 2.99 128 1000 76.12 76.14 76.14 6.41 256 1000 81.05 81.08 81.07 12.04 512 1000 87.93 87.95 87.94 22.21 1024 1000 99.78 99.84 99.81 39.13 2048 1000 129.76 129.86 129.82 60.16 4096 1000 189.86 189.92 189.88 82.27 8192 1000 216.65 216.80 216.73 144.14 16384 1000 335.26 335.41 335.34 186.34 32768 1000 589.00 589.37 589.17 212.09 65536 640 1095.71 1096.39 1096.05 228.02 131072 320 2143.86 2148.25 2145.90 232.75 262144 160 4196.75 4220.79 4208.66 236.92 524288 80 8271.90 8349.81 8309.16 239.53 1048576 40 15428.00 15695.91 15562.36 254.84 2097152 20 29575.46 30162.01 29834.77 265.23 4194304 10 55384.06 58524.67 56682.12 273.39 #---------------------------------------------------------------- # Benchmarking Allreduce # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 10.11 10.11 10.11 4 1000 13.55 13.55 13.55 8 1000 13.88 13.88 13.88 16 1000 14.29 14.30 14.29 32 1000 15.20 15.20 15.20 64 1000 21.40 21.41 21.41 128 1000 21.38 21.39 21.39 256 1000 25.75 25.75 25.75 512 1000 32.73 32.73 32.73 1024 1000 41.95 41.95 41.95 2048 1000 67.52 67.52 67.52 4096 1000 120.13 120.13 120.13 8192 1000 231.43 231.44 231.43 16384 1000 446.92 446.92 446.92 32768 1000 893.50 893.52 893.51 65536 640 1709.49 1709.56 1709.53 131072 320 3280.89 3281.04 3280.97 262144 160 6367.87 6368.19 6368.03 524288 80 12505.96 12507.61 12506.79 1048576 40 24792.55 24794.33 24793.44 2097152 20 49363.17 49367.91 49365.54 4194304 10 98464.75 98471.12 98467.93 #---------------------------------------------------------------- # Benchmarking Allreduce # ( #processes = 4 ) # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 16.98 16.99 16.98 4 1000 23.44 23.45 23.45 8 1000 24.02 24.03 24.02 16 1000 24.86 24.87 24.86 32 1000 26.59 26.59 26.59 64 1000 31.55 31.56 31.56 128 1000 32.58 32.59 32.59 256 1000 39.31 39.32 39.32 512 1000 52.35 52.36 52.35 1024 1000 77.55 77.56 77.56 2048 1000 129.15 129.16 129.16 4096 1000 225.41 225.42 225.42 8192 1000 409.72 409.74 409.73 16384 1000 812.58 812.62 812.61 32768 1000 1816.60 1816.89 1816.75 65536 640 3467.09 3467.98 3467.55 131072 320 6885.90 6889.23 6887.60 262144 160 13754.55 13769.91 13762.27 524288 80 27139.62 27208.49 27174.22 1048576 40 53610.19 53876.34 53744.43 2097152 20 105844.21 106884.54 106363.56 4194304 10 209034.14 213122.52 211092.69 #---------------------------------------------------------------- # Benchmarking Allreduce # ( #processes = 8 ) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 23.29 23.30 23.29 4 1000 33.25 33.26 33.25 8 1000 33.76 33.76 33.76 16 1000 35.40 35.41 35.41 32 1000 37.96 37.97 37.96 64 1000 46.10 46.10 46.10 128 1000 44.52 44.52 44.52 256 1000 55.29 55.29 55.29 512 1000 75.73 75.74 75.73 1024 1000 110.55 110.55 110.55 2048 1000 180.59 180.60 180.59 4096 1000 316.47 316.48 316.47 8192 1000 582.95 582.96 582.95 16384 1000 1155.58 1155.59 1155.58 32768 1000 2609.15 2609.58 2609.38 65536 640 4993.30 4994.61 4993.97 131072 320 9939.18 9945.09 9942.03 262144 160 19817.24 19842.13 19830.00 524288 80 39191.99 39293.11 39240.82 1048576 40 77326.22 77705.69 77521.61 2097152 20 152563.68 154135.23 153350.16 4194304 10 301786.38 307808.47 304780.65 #---------------------------------------------------------------- # Benchmarking Reduce # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 10.41 10.41 10.41 4 1000 14.33 14.33 14.33 8 1000 14.95 14.95 14.95 16 1000 15.27 15.27 15.27 32 1000 16.19 16.19 16.19 64 1000 19.32 19.32 19.32 128 1000 18.99 18.99 18.99 256 1000 22.51 22.51 22.51 512 1000 29.13 29.13 29.13 1024 1000 40.76 40.76 40.76 2048 1000 64.36 64.36 64.36 4096 1000 104.95 104.95 104.95 8192 1000 192.83 192.83 192.83 16384 1000 380.01 380.01 380.01 32768 1000 746.31 746.32 746.32 65536 640 1408.69 1408.71 1408.70 131072 320 2673.59 2673.62 2673.60 262144 160 5213.07 5213.13 5213.10 524288 80 10307.45 10307.60 10307.53 1048576 40 20501.86 20502.14 20502.00 2097152 20 40860.56 40861.17 40860.86 4194304 10 81549.17 81550.36 81549.76 #---------------------------------------------------------------- # Benchmarking Reduce # ( #processes = 4 ) # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 19.58 19.59 19.59 4 1000 27.81 27.83 27.82 8 1000 28.64 28.66 28.65 16 1000 29.48 29.50 29.49 32 1000 31.09 31.12 31.11 64 1000 36.80 36.83 36.82 128 1000 35.75 35.78 35.77 256 1000 42.20 42.23 42.22 512 1000 53.57 53.62 53.60 1024 1000 73.64 73.70 73.68 2048 1000 118.19 118.30 118.26 4096 1000 192.19 192.36 192.29 8192 1000 359.15 359.47 359.35 16384 1000 698.84 699.45 699.22 32768 1000 1481.59 1482.91 1482.40 65536 640 2862.62 2866.66 2865.09 131072 320 5567.72 5584.00 5577.64 262144 160 10947.31 11009.94 10985.42 524288 80 21483.01 21733.00 21635.18 1048576 40 42163.81 43159.17 42769.34 2097152 20 81815.70 85806.58 84244.45 4194304 10 146117.21 170915.62 162486.23 #---------------------------------------------------------------- # Benchmarking Reduce # ( #processes = 8 ) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 26.83 26.87 26.86 4 1000 38.43 38.48 38.46 8 1000 39.44 39.50 39.47 16 1000 40.62 40.67 40.65 32 1000 43.16 43.22 43.19 64 1000 49.68 49.75 49.72 128 1000 47.73 47.80 47.77 256 1000 56.31 56.39 56.36 512 1000 73.62 73.73 73.68 1024 1000 102.79 102.94 102.87 2048 1000 160.25 160.50 160.39 4096 1000 270.69 271.10 270.93 8192 1000 508.04 508.82 508.49 16384 1000 995.64 997.15 996.51 32768 1000 2076.78 2080.05 2078.68 65536 640 4052.83 4062.86 4058.66 131072 320 7874.22 7913.69 7897.20 262144 160 15288.73 15445.64 15380.06 524288 80 29809.42 30428.16 30169.84 1048576 40 58219.40 60647.63 59629.07 2097152 20 109134.95 120549.04 116055.42 4194304 10 201721.76 240557.30 221435.56 #---------------------------------------------------------------- # Benchmarking Reduce_scatter # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 16.84 16.84 16.84 4 1000 21.08 21.08 21.08 8 1000 24.14 24.15 24.14 16 1000 24.73 24.74 24.73 32 1000 25.85 25.85 25.85 64 1000 29.63 29.63 29.63 128 1000 31.82 31.82 31.82 256 1000 31.57 31.57 31.57 512 1000 38.29 38.30 38.30 1024 1000 49.47 49.47 49.47 2048 1000 72.88 72.88 72.88 4096 1000 116.66 116.67 116.66 8192 1000 203.98 203.98 203.98 16384 1000 391.03 391.04 391.03 32768 1000 764.15 764.15 764.15 65536 640 1417.60 1417.61 1417.60 131072 320 2689.75 2689.79 2689.77 262144 160 5245.13 5245.21 5245.17 524288 80 10379.05 10379.24 10379.14 1048576 40 20635.61 20635.97 20635.79 2097152 20 41201.60 41202.31 41201.96 4194304 10 82252.17 82253.28 82252.72 #---------------------------------------------------------------- # Benchmarking Reduce_scatter # ( #processes = 4 ) # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 31.57 31.57 31.57 4 1000 39.34 39.34 39.34 8 1000 42.31 42.32 42.31 16 1000 46.05 46.06 46.05 32 1000 47.69 47.70 47.69 64 1000 53.79 53.80 53.80 128 1000 55.28 55.29 55.28 256 1000 65.50 65.52 65.51 512 1000 69.92 69.93 69.93 1024 1000 91.29 91.30 91.29 2048 1000 134.86 134.87 134.87 4096 1000 209.51 209.53 209.52 8192 1000 384.33 384.36 384.35 16384 1000 724.08 724.12 724.10 32768 1000 1512.99 1513.06 1513.03 65536 640 2881.19 2881.38 2881.31 131072 320 5608.92 5609.58 5609.33 262144 160 11010.35 11012.85 11011.91 524288 80 21794.51 21805.19 21801.28 1048576 40 43222.84 43261.67 43247.13 2097152 20 85891.88 86048.88 85989.77 4194304 10 170973.58 171603.47 171367.59 #---------------------------------------------------------------- # Benchmarking Reduce_scatter # ( #processes = 8 ) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 49.54 49.56 49.55 4 1000 59.86 59.88 59.87 8 1000 63.45 63.47 63.46 16 1000 67.72 67.74 67.73 32 1000 74.40 74.42 74.41 64 1000 82.41 82.44 82.42 128 1000 84.01 84.04 84.03 256 1000 95.65 95.68 95.66 512 1000 119.95 119.98 119.97 1024 1000 138.67 138.70 138.69 2048 1000 193.73 193.76 193.75 4096 1000 305.74 305.78 305.76 8192 1000 547.57 547.63 547.60 16384 1000 1054.88 1054.96 1054.92 32768 1000 2128.17 2128.29 2128.24 65536 640 4106.10 4106.41 4106.27 131072 320 7947.68 7948.69 7948.25 262144 160 15534.78 15538.61 15536.94 524288 80 30561.41 30576.16 30569.71 1048576 40 60674.84 60733.50 60707.87 2097152 20 120483.42 120718.50 120615.56 4194304 10 239918.52 240867.14 240451.29 #---------------------------------------------------------------- # Benchmarking Allgather # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 12.27 12.27 12.27 1 1000 20.50 20.50 20.50 2 1000 20.49 20.50 20.50 4 1000 15.24 15.25 15.24 8 1000 15.48 15.48 15.48 16 1000 15.72 15.72 15.72 32 1000 16.08 16.08 16.08 64 1000 16.99 16.99 16.99 128 1000 16.36 16.37 16.37 256 1000 17.44 17.44 17.44 512 1000 18.78 18.78 18.78 1024 1000 21.87 21.87 21.87 2048 1000 30.14 30.14 30.14 4096 1000 44.22 44.22 44.22 8192 1000 73.68 73.68 73.68 16384 1000 132.45 132.46 132.46 32768 1000 253.90 253.90 253.90 65536 640 493.07 493.07 493.07 131072 320 971.31 971.32 971.31 262144 160 1926.31 1926.33 1926.32 524288 80 3836.36 3836.38 3836.37 1048576 40 7674.12 7674.21 7674.17 2097152 20 15289.09 15289.10 15289.10 4194304 10 30523.56 30523.78 30523.67 #---------------------------------------------------------------- # Benchmarking Allgather # ( #processes = 4 ) # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 24.93 24.94 24.94 1 1000 37.87 37.88 37.88 2 1000 37.89 37.89 37.89 4 1000 30.45 30.45 30.45 8 1000 30.87 30.87 30.87 16 1000 31.55 31.55 31.55 32 1000 32.46 32.47 32.46 64 1000 34.62 34.63 34.63 128 1000 32.95 32.95 32.95 256 1000 35.11 35.12 35.11 512 1000 37.98 37.98 37.98 1024 1000 44.91 44.91 44.91 2048 1000 62.47 62.48 62.48 4096 1000 90.89 90.89 90.89 8192 1000 152.75 152.76 152.76 16384 1000 285.57 285.57 285.57 32768 1000 538.57 538.58 538.57 65536 640 1088.96 1088.97 1088.96 131072 320 2121.54 2121.57 2121.55 262144 160 4762.00 4765.62 4763.97 524288 80 8449.35 8452.97 8451.55 1048576 40 16450.89 16456.02 16454.29 2097152 20 32432.13 32476.80 32462.00 4194304 10 64692.03 64765.97 64734.91 #---------------------------------------------------------------- # Benchmarking Allgather # ( #processes = 8 ) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 48.28 48.28 48.28 1 1000 71.59 71.60 71.59 2 1000 72.25 72.25 72.25 4 1000 59.25 59.26 59.25 8 1000 60.10 60.11 60.10 16 1000 61.41 61.42 61.42 32 1000 64.49 64.49 64.49 64 1000 70.02 70.04 70.03 128 1000 64.61 64.62 64.61 256 1000 68.99 69.00 68.99 512 1000 76.64 76.65 76.64 1024 1000 97.73 97.74 97.73 2048 1000 149.93 149.94 149.94 4096 1000 231.76 231.80 231.78 8192 1000 422.66 422.74 422.69 16384 1000 816.80 816.93 816.87 32768 1000 1606.93 1607.15 1607.06 65536 640 3270.45 3270.79 3270.65 131072 320 6358.56 6361.87 6360.85 262144 160 12523.87 12541.85 12532.38 524288 80 24972.20 25004.03 24989.61 1048576 40 50121.63 50398.75 50241.56 2097152 20 99646.53 100020.68 99876.78 4194304 10 198973.31 201809.88 200212.64 #---------------------------------------------------------------- # Benchmarking Allgatherv # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 12.15 12.16 12.15 1 1000 18.92 18.93 18.92 2 1000 18.90 18.90 18.90 4 1000 15.01 15.01 15.01 8 1000 15.68 15.68 15.68 16 1000 15.49 15.49 15.49 32 1000 15.83 15.84 15.84 64 1000 16.70 16.70 16.70 128 1000 16.04 16.04 16.04 256 1000 17.06 17.06 17.06 512 1000 18.45 18.46 18.45 1024 1000 21.60 21.60 21.60 2048 1000 29.82 29.82 29.82 4096 1000 43.81 43.81 43.81 8192 1000 73.16 73.16 73.16 16384 1000 133.19 133.20 133.19 32768 1000 254.51 254.51 254.51 65536 640 494.75 494.75 494.75 131072 320 974.84 974.86 974.85 262144 160 1931.12 1931.12 1931.12 524288 80 3838.00 3838.04 3838.02 1048576 40 7634.31 7634.40 7634.36 2097152 20 15226.29 15226.53 15226.41 4194304 10 30323.09 30323.71 30323.40 #---------------------------------------------------------------- # Benchmarking Allgatherv # ( #processes = 4 ) # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 24.64 24.65 24.64 1 1000 36.66 36.67 36.66 2 1000 36.93 36.95 36.94 4 1000 30.53 30.53 30.53 8 1000 30.72 30.73 30.72 16 1000 31.34 31.35 31.34 32 1000 32.31 32.32 32.31 64 1000 34.55 34.55 34.55 128 1000 32.49 32.49 32.49 256 1000 35.40 35.41 35.41 512 1000 37.80 37.80 37.80 1024 1000 44.60 44.60 44.60 2048 1000 62.93 62.93 62.93 4096 1000 90.25 90.26 90.25 8192 1000 153.84 153.85 153.84 16384 1000 286.60 286.61 286.61 32768 1000 554.96 555.02 555.00 65536 640 1112.04 1112.07 1112.06 131072 320 2164.34 2164.66 2164.56 262144 160 4654.36 4655.72 4655.37 524288 80 8418.48 8427.81 8424.20 1048576 40 17083.72 17117.68 17104.72 2097152 20 32226.84 32230.45 32229.02 4194304 10 65391.30 65607.33 65535.12 #---------------------------------------------------------------- # Benchmarking Allgatherv # ( #processes = 8 ) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 47.77 47.78 47.77 1 1000 70.79 70.81 70.80 2 1000 71.27 71.30 71.29 4 1000 58.98 58.99 58.98 8 1000 59.94 59.96 59.95 16 1000 61.34 61.35 61.34 32 1000 63.44 63.46 63.45 64 1000 70.83 70.84 70.83 128 1000 63.44 63.46 63.45 256 1000 68.69 68.71 68.70 512 1000 76.12 76.12 76.12 1024 1000 97.10 97.11 97.11 2048 1000 150.27 150.29 150.28 4096 1000 234.97 235.03 235.02 8192 1000 425.55 425.60 425.58 16384 1000 820.37 820.43 820.39 32768 1000 1604.96 1605.26 1605.10 65536 640 3284.44 3285.28 3284.85 131072 320 6401.70 6403.32 6402.32 262144 160 12666.41 12671.42 12668.98 524288 80 25167.67 25287.57 25224.84 1048576 40 51057.31 51204.97 51137.97 2097152 20 103660.15 104340.97 104026.23 4194304 10 196954.81 199363.49 197831.81 #---------------------------------------------------------------- # Benchmarking Alltoall # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 12.52 12.52 12.52 1 1000 21.64 21.65 21.65 2 1000 21.65 21.65 21.65 4 1000 15.42 15.43 15.43 8 1000 15.63 15.64 15.63 16 1000 15.88 15.89 15.88 32 1000 16.31 16.32 16.32 64 1000 17.09 17.09 17.09 128 1000 16.65 16.66 16.66 256 1000 17.67 17.67 17.67 512 1000 19.06 19.07 19.07 1024 1000 22.19 22.19 22.19 2048 1000 30.46 30.46 30.46 4096 1000 44.90 44.90 44.90 8192 1000 74.60 74.60 74.60 16384 1000 134.95 134.95 134.95 32768 1000 255.42 255.42 255.42 65536 640 494.85 494.86 494.86 131072 320 973.38 973.39 973.39 262144 160 1928.99 1928.99 1928.99 524288 80 3849.12 3849.18 3849.15 1048576 40 7668.66 7668.75 7668.71 2097152 20 15220.84 15221.04 15220.94 4194304 10 30276.72 30277.42 30277.07 #---------------------------------------------------------------- # Benchmarking Alltoall # ( #processes = 4 ) # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 25.12 25.12 25.12 1 1000 44.46 44.47 44.47 2 1000 43.17 43.18 43.17 4 1000 30.78 30.79 30.78 8 1000 31.42 31.42 31.42 16 1000 31.79 31.80 31.80 32 1000 33.00 33.01 33.00 64 1000 34.91 34.92 34.91 128 1000 33.02 33.03 33.03 256 1000 35.34 35.35 35.35 512 1000 38.83 38.83 38.83 1024 1000 45.04 45.05 45.04 2048 1000 63.06 63.06 63.06 4096 1000 91.84 91.85 91.85 8192 1000 153.09 153.09 153.09 16384 1000 287.16 287.16 287.16 32768 1000 553.00 553.00 553.00 65536 640 1099.28 1099.30 1099.29 131072 320 2190.23 2190.60 2190.46 262144 160 4408.19 4412.13 4410.54 524288 80 8600.80 8604.99 8603.23 1048576 40 16215.99 16226.99 16222.92 2097152 20 32777.33 32825.80 32812.62 4194304 10 64200.93 64381.73 64313.85 #---------------------------------------------------------------- # Benchmarking Alltoall # ( #processes = 8 ) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 48.49 48.51 48.50 1 1000 86.24 86.25 86.24 2 1000 83.84 83.86 83.85 4 1000 60.08 60.09 60.08 8 1000 61.11 61.12 61.11 16 1000 62.05 62.07 62.06 32 1000 64.61 64.63 64.62 64 1000 70.38 70.40 70.39 128 1000 64.49 64.50 64.50 256 1000 69.62 69.63 69.62 512 1000 78.01 78.02 78.02 1024 1000 98.36 98.38 98.37 2048 1000 150.31 150.36 150.34 4096 1000 233.28 233.33 233.31 8192 1000 423.77 423.89 423.84 16384 1000 822.52 822.70 822.64 32768 1000 1603.33 1603.61 1603.46 65536 640 3272.45 3273.09 3272.85 131072 320 6374.31 6376.59 6375.69 262144 160 12604.89 12618.84 12612.00 524288 80 25081.26 25149.47 25120.29 1048576 40 50769.73 51157.10 50941.70 2097152 20 100213.72 100727.00 100536.21 4194304 10 200655.65 208636.50 205699.54 #---------------------------------------------------------------- # Benchmarking Bcast # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 5.66 5.67 5.67 1 1000 9.70 9.71 9.71 2 1000 10.14 10.14 10.14 4 1000 7.89 7.90 7.89 8 1000 8.18 8.18 8.18 16 1000 8.29 8.30 8.30 32 1000 8.54 8.55 8.54 64 1000 9.00 9.01 9.00 128 1000 8.60 8.60 8.60 256 1000 9.31 9.31 9.31 512 1000 10.24 10.24 10.24 1024 1000 11.58 11.59 11.59 2048 1000 15.78 15.78 15.78 4096 1000 20.55 20.56 20.55 8192 1000 32.60 32.60 32.60 16384 1000 56.54 56.54 56.54 32768 1000 105.46 105.47 105.46 65536 640 202.49 202.49 202.49 131072 320 395.49 395.51 395.50 262144 160 780.49 780.52 780.51 524288 80 1558.31 1558.39 1558.35 1048576 40 3094.66 3094.77 3094.71 2097152 20 6172.66 6173.01 6172.84 4194304 10 12394.36 12394.69 12394.52 #---------------------------------------------------------------- # Benchmarking Bcast # ( #processes = 4 ) # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 10.16 10.17 10.17 1 1000 16.56 16.57 16.56 2 1000 16.60 16.61 16.61 4 1000 13.62 13.63 13.62 8 1000 13.86 13.87 13.87 16 1000 14.08 14.09 14.09 32 1000 14.49 14.49 14.49 64 1000 15.31 15.32 15.31 128 1000 14.45 14.46 14.46 256 1000 15.57 15.58 15.57 512 1000 17.13 17.14 17.14 1024 1000 20.41 20.42 20.42 2048 1000 28.77 28.78 28.77 4096 1000 38.49 38.50 38.50 8192 1000 62.50 62.51 62.50 16384 1000 110.65 110.66 110.66 32768 1000 207.76 207.77 207.77 65536 640 400.84 400.86 400.85 131072 320 790.72 790.75 790.74 262144 160 1559.58 1559.63 1559.61 524288 80 3122.61 3122.85 3122.74 1048576 40 6209.13 6209.60 6209.39 2097152 20 12397.94 12400.50 12399.23 4194304 10 24783.38 24792.18 24787.58 #---------------------------------------------------------------- # Benchmarking Bcast # ( #processes = 8 ) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 12.22 12.23 12.22 1 1000 20.85 20.86 20.86 2 1000 20.78 20.80 20.79 4 1000 17.19 17.21 17.20 8 1000 17.68 17.69 17.69 16 1000 18.17 18.18 18.18 32 1000 19.21 19.22 19.22 64 1000 20.65 20.66 20.65 128 1000 19.26 19.28 19.27 256 1000 20.91 20.92 20.92 512 1000 24.12 24.13 24.13 1024 1000 29.40 29.41 29.41 2048 1000 40.92 40.93 40.93 4096 1000 58.78 58.79 58.79 8192 1000 98.52 98.54 98.53 16384 1000 177.96 177.97 177.97 32768 1000 337.80 337.81 337.81 65536 640 656.94 656.95 656.95 131072 320 1298.47 1298.51 1298.50 262144 160 2580.31 2580.39 2580.37 524288 80 5171.55 5171.92 5171.73 1048576 40 10311.39 10312.02 10311.71 2097152 20 20507.29 20511.66 20508.79 4194304 10 41242.89 41263.88 41253.60 #--------------------------------------------------- # Benchmarking Barrier # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #--------------------------------------------------- #repetitions t_min[usec] t_max[usec] t_avg[usec] 1000 5.57 5.57 5.57 #--------------------------------------------------- # Benchmarking Barrier # ( #processes = 4 ) # ( 4 additional processes waiting in MPI_Barrier) #--------------------------------------------------- #repetitions t_min[usec] t_max[usec] t_avg[usec] 1000 10.40 10.40 10.40 #--------------------------------------------------- # Benchmarking Barrier # ( #processes = 8 ) #--------------------------------------------------- #repetitions t_min[usec] t_max[usec] t_avg[usec] 1000 15.20 15.21 15.20