#--------------------------------------------------- # PALLAS MPI Benchmark Suite V2.2, MPI-1 part #--------------------------------------------------- # Date : Wed Jul 30 11:28:06 2003 # Machine : i686# System : Linux # Release : 2.4.20-8smp # Version : #1 SMP Thu Mar 13 16:43:01 EST 2003 # # Minimum message length in bytes: 0 # Maximum message length in bytes: 4194304 # # MPI_Datatype : MPI_BYTE # MPI_Datatype for reductions : MPI_FLOAT # MPI_Op : MPI_SUM # # # List of Benchmarks to run: # PingPong # PingPing # Sendrecv # Exchange # Allreduce # Reduce # Reduce_scatter # Allgather # Allgatherv # Alltoall # Bcast # Barrier #--------------------------------------------------- # Benchmarking PingPong # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #--------------------------------------------------- #bytes #repetitions t[usec] Mbytes/sec 0 1000 1.52 0.00 1 1000 1.58 0.60 2 1000 1.59 1.20 4 1000 1.65 2.32 8 1000 1.65 4.62 16 1000 1.68 9.11 32 1000 1.70 17.90 64 1000 1.79 34.11 128 1000 2.29 53.33 256 1000 2.94 83.01 512 1000 4.31 113.19 1024 1000 6.88 142.02 2048 1000 12.00 162.74 4096 1000 22.27 175.39 8192 1000 43.02 181.59 16384 1000 89.30 174.98 32768 1000 140.36 222.64 65536 640 255.26 244.84 131072 320 722.50 173.01 262144 160 1436.08 174.08 524288 80 2834.39 176.40 1048576 40 5497.73 181.89 2097152 20 10945.15 182.73 4194304 10 21621.70 185.00 #--------------------------------------------------- # Benchmarking PingPing # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #--------------------------------------------------- #bytes #repetitions t[usec] Mbytes/sec 0 1000 2.64 0.00 1 1000 3.14 0.30 2 1000 3.12 0.61 4 1000 3.17 1.20 8 1000 3.18 2.40 16 1000 3.22 4.74 32 1000 3.26 9.37 64 1000 3.33 18.32 128 1000 3.88 31.45 256 1000 4.59 53.15 512 1000 6.09 80.15 1024 1000 9.10 107.34 2048 1000 15.19 128.60 4096 1000 27.97 139.68 8192 1000 52.73 148.15 16384 1000 105.01 148.80 32768 1000 206.62 151.25 65536 640 419.36 149.04 131072 320 1372.66 91.06 262144 160 2888.60 86.55 524288 80 5737.15 87.15 1048576 40 11364.55 87.99 2097152 20 22837.30 87.58 4194304 10 45743.80 87.44 #----------------------------------------------------------------------------- # Benchmarking Sendrecv # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 2.83 2.83 2.83 0.00 1 1000 2.95 2.95 2.95 0.65 2 1000 3.00 3.01 3.00 1.27 4 1000 3.05 3.05 3.05 2.50 8 1000 3.01 3.01 3.01 5.06 16 1000 3.07 3.07 3.07 9.95 32 1000 3.11 3.11 3.11 19.59 64 1000 3.16 3.16 3.16 38.64 128 1000 3.62 3.62 3.62 67.37 256 1000 4.35 4.35 4.35 112.22 512 1000 5.86 5.86 5.86 166.62 1024 1000 8.80 8.80 8.80 221.97 2048 1000 14.55 14.55 14.55 268.51 4096 1000 27.27 27.27 27.27 286.48 8192 1000 51.27 51.27 51.27 304.75 16384 1000 109.94 109.94 109.94 284.24 32768 1000 206.03 206.03 206.03 303.35 65536 640 418.92 418.95 418.94 298.36 131072 320 1383.18 1383.23 1383.21 180.74 262144 160 2888.14 2888.45 2888.29 173.10 524288 80 5726.70 5726.79 5726.74 174.62 1048576 40 11384.58 11384.63 11384.60 175.68 2097152 20 22851.85 22852.05 22851.95 175.04 4194304 10 45876.00 45878.40 45877.20 174.37 #----------------------------------------------------------------------------- # Benchmarking Sendrecv # ( #processes = 4 ) # ( 4 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 14.88 14.91 14.89 0.00 1 1000 14.81 14.84 14.82 0.13 2 1000 14.41 14.43 14.42 0.26 4 1000 15.13 15.17 15.15 0.50 8 1000 14.88 14.90 14.89 1.02 16 1000 15.52 15.55 15.54 1.96 32 1000 15.53 15.58 15.55 3.92 64 1000 16.07 16.10 16.08 7.58 128 1000 18.11 18.13 18.12 13.46 256 1000 19.19 19.22 19.21 25.41 512 1000 21.22 21.25 21.23 45.96 1024 1000 27.23 27.27 27.25 71.61 2048 1000 41.51 41.55 41.53 94.00 4096 1000 70.18 70.23 70.20 111.24 8192 1000 123.46 123.56 123.51 126.46 16384 1000 239.69 239.79 239.75 130.32 32768 1000 381.10 381.32 381.21 163.91 65536 640 679.62 680.21 679.92 183.77 131072 320 1484.57 1486.51 1485.57 168.18 262144 160 3133.29 3141.86 3137.89 159.14 524288 80 5693.43 5714.26 5705.43 175.00 1048576 40 11816.28 11949.95 11887.33 167.36 2097152 20 23642.15 25712.14 24789.94 155.57 4194304 10 45800.70 47884.89 46919.30 167.07 #----------------------------------------------------------------------------- # Benchmarking Sendrecv # ( #processes = 8 ) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 15.61 15.65 15.62 0.00 1 1000 15.52 15.56 15.54 0.12 2 1000 15.58 15.62 15.60 0.24 4 1000 15.87 15.93 15.91 0.48 8 1000 15.65 15.69 15.67 0.97 16 1000 15.93 15.97 15.94 1.91 32 1000 16.04 16.10 16.07 3.79 64 1000 16.13 16.17 16.15 7.55 128 1000 18.04 18.09 18.06 13.50 256 1000 28.17 28.21 28.19 17.31 512 1000 21.13 21.16 21.15 46.14 1024 1000 27.76 27.81 27.79 70.24 2048 1000 42.51 42.58 42.55 91.73 4096 1000 70.33 70.41 70.37 110.97 8192 1000 125.06 125.08 125.07 124.92 16384 1000 243.67 243.96 243.80 128.09 32768 1000 396.60 396.96 396.78 157.45 65536 640 706.19 707.60 707.00 176.65 131072 320 1499.85 1505.72 1503.09 166.03 262144 160 3110.80 3124.66 3120.62 160.02 524288 80 6192.64 6245.66 6228.76 160.11 1048576 40 12122.72 12337.60 12275.58 162.11 2097152 20 24135.50 25019.05 24763.79 159.88 4194304 10 46468.01 50512.49 48839.95 158.38 #----------------------------------------------------------------------------- # Benchmarking Exchange # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 4.87 4.87 4.87 0.00 1 1000 5.66 5.66 5.66 0.67 2 1000 5.61 5.62 5.62 1.36 4 1000 5.73 5.74 5.73 2.66 8 1000 5.80 5.80 5.80 5.26 16 1000 5.76 5.76 5.76 10.59 32 1000 5.82 5.82 5.82 20.98 64 1000 6.02 6.02 6.02 40.57 128 1000 7.10 7.11 7.11 68.69 256 1000 8.91 8.91 8.91 109.59 512 1000 12.47 12.47 12.47 156.59 1024 1000 19.91 19.91 19.91 196.20 2048 1000 33.85 33.85 33.85 230.78 4096 1000 63.91 63.92 63.92 244.43 8192 1000 119.64 119.66 119.65 261.16 16384 1000 206.40 206.40 206.40 302.81 32768 1000 409.43 409.43 409.43 305.30 65536 640 846.98 846.98 846.98 295.16 131072 320 2753.50 2753.54 2753.52 181.58 262144 160 5780.20 5780.26 5780.23 173.00 524288 80 11445.41 11445.49 11445.45 174.74 1048576 40 22788.32 22788.55 22788.44 175.53 2097152 20 45317.25 45327.65 45322.45 176.49 4194304 10 90298.70 90304.40 90301.55 177.18 #----------------------------------------------------------------------------- # Benchmarking Exchange # ( #processes = 4 ) # ( 4 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 28.65 28.68 28.66 0.00 1 1000 29.96 29.99 29.97 0.13 2 1000 28.76 28.78 28.77 0.27 4 1000 28.78 28.79 28.78 0.53 8 1000 28.75 28.79 28.77 1.06 16 1000 29.41 29.42 29.41 2.07 32 1000 29.57 29.59 29.58 4.13 64 1000 29.76 29.78 29.77 8.20 128 1000 34.80 34.84 34.82 14.02 256 1000 36.96 36.99 36.98 26.40 512 1000 41.78 41.83 41.80 46.69 1024 1000 51.95 51.99 51.97 75.13 2048 1000 81.21 81.27 81.25 96.13 4096 1000 134.94 135.00 134.98 115.74 8192 1000 239.28 239.37 239.33 130.55 16384 1000 415.26 415.29 415.27 150.50 32768 1000 738.95 739.09 739.04 169.13 65536 640 1645.46 1646.27 1645.92 151.86 131072 320 3604.02 3606.49 3605.41 138.64 262144 160 7280.12 7291.69 7287.42 137.14 524288 80 14823.70 14861.22 14844.78 134.58 1048576 40 28538.45 28714.93 28653.78 139.30 2097152 20 56328.50 57007.85 56771.71 140.33 4194304 10 113027.00 115671.69 114741.20 138.32 #----------------------------------------------------------------------------- # Benchmarking Exchange # ( #processes = 8 ) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 29.00 29.04 29.02 0.00 1 1000 28.84 28.90 28.87 0.13 2 1000 28.90 28.93 28.92 0.26 4 1000 28.89 28.94 28.91 0.53 8 1000 28.86 28.89 28.87 1.06 16 1000 29.61 29.63 29.62 2.06 32 1000 29.72 29.77 29.74 4.10 64 1000 29.83 29.91 29.87 8.16 128 1000 34.21 34.26 34.24 14.25 256 1000 36.86 36.91 36.88 26.46 512 1000 41.31 41.38 41.35 47.21 1024 1000 54.18 54.23 54.20 72.03 2048 1000 81.36 81.42 81.39 95.95 4096 1000 134.20 134.29 134.24 116.35 8192 1000 248.59 248.70 248.65 125.66 16384 1000 482.35 482.60 482.48 129.51 32768 1000 860.78 861.31 861.08 145.13 65536 640 1639.84 1642.84 1641.60 152.18 131072 320 3318.89 3324.19 3321.78 150.41 262144 160 7291.84 7321.97 7308.88 136.58 524288 80 13666.20 13758.76 13720.56 145.36 1048576 40 30734.68 31437.98 31160.69 127.23 2097152 20 54565.90 56427.60 55549.13 141.77 4194304 10 106459.81 113672.79 110425.39 140.75 #---------------------------------------------------------------- # Benchmarking Allreduce # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.05 0.05 0.05 4 1000 4.64 4.64 4.64 8 1000 4.58 4.58 4.58 16 1000 4.65 4.65 4.65 32 1000 4.73 4.73 4.73 64 1000 5.25 5.25 5.25 128 1000 5.91 5.91 5.91 256 1000 6.76 6.76 6.76 512 1000 9.95 9.95 9.95 1024 1000 12.88 12.88 12.88 2048 1000 21.02 21.02 21.02 4096 1000 37.34 37.34 37.34 8192 1000 70.09 70.09 70.09 16384 1000 141.18 141.20 141.19 32768 1000 282.95 282.96 282.96 65536 640 712.36 712.39 712.38 131072 320 2697.03 2697.12 2697.07 262144 160 6245.50 6246.16 6245.83 524288 80 12606.80 12607.61 12607.21 1048576 40 25194.67 25200.33 25197.50 2097152 20 50677.15 50701.70 50689.42 4194304 10 104182.40 104186.11 104184.25 #---------------------------------------------------------------- # Benchmarking Allreduce # ( #processes = 4 ) # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.05 0.05 0.05 4 1000 31.36 31.38 31.37 8 1000 31.45 31.45 31.45 16 1000 33.01 33.04 33.03 32 1000 33.22 33.26 33.24 64 1000 33.45 33.49 33.47 128 1000 39.09 39.11 39.10 256 1000 43.28 43.30 43.29 512 1000 51.94 51.96 51.96 1024 1000 67.87 67.91 67.89 2048 1000 103.80 103.84 103.82 4096 1000 176.82 176.87 176.84 8192 1000 326.44 326.49 326.45 16384 1000 744.27 744.53 744.40 32768 1000 1339.03 1339.45 1339.26 65536 640 2754.70 2756.32 2755.48 131072 320 11674.40 11683.38 11678.88 262144 160 17924.41 17942.51 17933.43 524288 80 30369.46 30407.65 30388.49 1048576 40 56112.63 56184.72 56148.84 2097152 20 108702.90 108851.95 108776.80 4194304 10 219918.30 220375.00 220139.45 #---------------------------------------------------------------- # Benchmarking Allreduce # ( #processes = 8 ) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.05 0.05 0.05 4 1000 61.74 61.78 61.76 8 1000 61.51 61.54 61.52 16 1000 63.11 63.13 63.12 32 1000 63.24 63.28 63.26 64 1000 64.07 64.11 64.09 128 1000 74.77 74.80 74.78 256 1000 82.24 82.29 82.26 512 1000 96.35 96.38 96.37 1024 1000 121.40 121.44 121.42 2048 1000 184.19 184.26 184.22 4096 1000 312.36 312.45 312.40 8192 1000 565.81 565.89 565.84 16384 1000 1322.46 1322.82 1322.62 32768 1000 2382.38 2382.94 2382.63 65536 640 4893.51 4897.00 4895.42 131072 320 16359.74 16371.79 16365.97 262144 160 24634.04 24667.25 24651.44 524288 80 42699.25 42790.45 42745.10 1048576 40 75027.17 75150.37 75092.63 2097152 20 147033.95 147274.00 147141.16 4194304 10 298365.29 299314.09 298857.65 #---------------------------------------------------------------- # Benchmarking Reduce # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.09 0.09 0.09 4 1000 2.39 2.39 2.39 8 1000 2.37 2.37 2.37 16 1000 2.42 2.42 2.42 32 1000 2.43 2.43 2.43 64 1000 2.56 2.56 2.56 128 1000 3.25 3.25 3.25 256 1000 4.02 4.02 4.02 512 1000 5.45 5.45 5.45 1024 1000 8.30 8.30 8.30 2048 1000 14.11 14.11 14.11 4096 1000 25.79 25.80 25.80 8192 1000 50.32 50.35 50.34 16384 1000 105.64 105.69 105.67 32768 1000 178.29 178.37 178.33 65536 640 799.08 799.48 799.28 131072 320 2171.61 2174.07 2172.84 262144 160 4427.70 4435.97 4431.83 524288 80 8870.16 8902.71 8886.44 1048576 40 17555.70 17684.50 17620.10 2097152 20 35003.85 35509.55 35256.70 4194304 10 70899.81 72691.11 71795.46 #---------------------------------------------------------------- # Benchmarking Reduce # ( #processes = 4 ) # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.09 0.09 0.09 4 1000 19.21 19.27 19.24 8 1000 19.32 19.36 19.35 16 1000 20.28 20.30 20.29 32 1000 20.53 20.56 20.55 64 1000 21.52 21.56 21.55 128 1000 24.47 24.52 24.48 256 1000 30.78 30.85 30.83 512 1000 39.43 39.52 39.47 1024 1000 56.30 56.39 56.34 2048 1000 86.36 86.52 86.45 4096 1000 140.99 141.22 141.10 8192 1000 238.67 239.07 238.87 16384 1000 525.84 526.34 526.12 32768 1000 905.17 905.92 905.57 65536 640 5555.57 5563.76 5559.94 131072 320 7821.01 7832.85 7826.86 262144 160 11980.18 12020.03 12000.10 524288 80 19198.71 19300.20 19246.72 1048576 40 33249.45 33724.47 33499.76 2097152 20 58334.15 60111.40 59280.39 4194304 10 112846.80 120306.41 116780.95 #---------------------------------------------------------------- # Benchmarking Reduce # ( #processes = 8 ) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.09 0.09 0.09 4 1000 27.97 28.03 28.00 8 1000 27.97 28.04 28.01 16 1000 29.46 29.53 29.49 32 1000 30.90 30.96 30.93 64 1000 30.19 30.28 30.24 128 1000 36.48 36.57 36.53 256 1000 49.42 49.54 49.48 512 1000 61.47 61.64 61.55 1024 1000 87.96 88.17 88.07 2048 1000 137.75 138.05 137.90 4096 1000 227.98 228.48 228.25 8192 1000 402.34 403.23 402.78 16384 1000 746.84 747.78 747.30 32768 1000 1347.32 1349.05 1348.19 65536 640 6815.82 6825.15 6820.44 131072 320 10213.59 10241.76 10227.22 262144 160 14968.47 15047.61 15010.02 524288 80 23604.11 23862.02 23726.94 1048576 40 41197.32 42151.17 41666.34 2097152 20 72192.45 76234.55 74352.81 4194304 10 142583.50 159807.19 151658.69 #---------------------------------------------------------------- # Benchmarking Reduce_scatter # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 7.01 7.01 7.01 4 1000 8.36 8.36 8.36 8 1000 8.97 8.98 8.98 16 1000 9.02 9.02 9.02 32 1000 9.15 9.16 9.15 64 1000 9.37 9.37 9.37 128 1000 10.12 10.12 10.12 256 1000 10.84 10.84 10.84 512 1000 13.28 13.28 13.28 1024 1000 9.34 9.34 9.34 2048 1000 13.64 13.64 13.64 4096 1000 21.73 21.73 21.73 8192 1000 38.44 38.45 38.45 16384 1000 71.51 71.51 71.51 32768 1000 144.23 144.24 144.24 65536 640 283.72 283.72 283.72 131072 320 964.57 964.59 964.58 262144 160 2767.12 2767.19 2767.15 524288 80 6433.70 6433.75 6433.73 1048576 40 12986.03 12988.50 12987.26 2097152 20 25713.90 25718.60 25716.25 4194304 10 51275.10 51289.11 51282.10 #---------------------------------------------------------------- # Benchmarking Reduce_scatter # ( #processes = 4 ) # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 32.72 32.74 32.74 4 1000 35.07 35.07 35.07 8 1000 35.45 35.48 35.46 16 1000 35.95 35.96 35.96 32 1000 37.33 37.35 37.34 64 1000 37.94 37.95 37.94 128 1000 39.14 39.17 39.15 256 1000 44.26 44.29 44.27 512 1000 50.39 50.43 50.41 1024 1000 82.63 82.65 82.64 2048 1000 97.33 97.38 97.35 4096 1000 132.97 133.02 133.00 8192 1000 194.27 194.33 194.30 16384 1000 316.69 316.79 316.74 32768 1000 564.73 564.86 564.79 65536 640 962.07 962.20 962.14 131072 320 1635.38 1635.87 1635.64 262144 160 3102.33 3103.06 3102.74 524288 80 13338.60 13378.30 13358.43 1048576 40 22041.88 22109.37 22075.54 2097152 20 39368.90 39543.20 39455.79 4194304 10 74976.99 75272.69 75123.35 #---------------------------------------------------------------- # Benchmarking Reduce_scatter # ( #processes = 8 ) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 64.88 64.92 64.90 4 1000 66.53 66.56 66.55 8 1000 66.81 66.85 66.83 16 1000 68.16 68.20 68.18 32 1000 69.35 69.38 69.37 64 1000 69.93 69.98 69.95 128 1000 73.49 73.53 73.50 256 1000 86.16 86.20 86.18 512 1000 98.13 98.18 98.15 1024 1000 215.26 215.29 215.27 2048 1000 234.80 234.85 234.83 4096 1000 261.18 261.23 261.20 8192 1000 339.01 339.06 339.03 16384 1000 512.04 512.11 512.07 32768 1000 862.92 863.01 862.97 65536 640 1519.35 1519.62 1519.48 131072 320 2685.42 2685.72 2685.55 262144 160 4480.19 4481.33 4480.87 524288 80 8301.71 8305.79 8303.72 1048576 40 24902.28 24978.58 24939.53 2097152 20 46764.10 46976.00 46866.43 4194304 10 88435.71 88913.60 88681.14 #---------------------------------------------------------------- # Benchmarking Allgather # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.09 0.09 0.09 1 1000 4.10 4.10 4.10 2 1000 4.09 4.09 4.09 4 1000 4.18 4.18 4.18 8 1000 4.16 4.17 4.17 16 1000 4.22 4.22 4.22 32 1000 4.24 4.25 4.24 64 1000 4.40 4.40 4.40 128 1000 4.97 4.97 4.97 256 1000 5.93 5.93 5.93 512 1000 7.82 7.82 7.82 1024 1000 11.80 11.80 11.80 2048 1000 19.50 19.51 19.50 4096 1000 35.02 35.02 35.02 8192 1000 66.41 66.41 66.41 16384 1000 131.31 131.31 131.31 32768 1000 258.25 258.25 258.25 65536 640 685.32 685.34 685.33 131072 320 2010.30 2010.34 2010.32 262144 160 4432.78 4432.78 4432.78 524288 80 8890.48 8890.96 8890.72 1048576 40 17815.25 17815.28 17815.26 2097152 20 35844.45 35844.75 35844.60 4194304 10 70263.20 70276.20 70269.70 #---------------------------------------------------------------- # Benchmarking Allgather # ( #processes = 4 ) # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.09 0.09 0.09 1 1000 31.28 31.32 31.29 2 1000 31.18 31.18 31.18 4 1000 31.21 31.24 31.23 8 1000 33.91 33.93 33.92 16 1000 32.82 32.86 32.83 32 1000 33.17 33.19 33.18 64 1000 38.89 38.94 38.91 128 1000 42.05 42.07 42.05 256 1000 50.66 50.68 50.67 512 1000 64.51 64.53 64.52 1024 1000 98.23 98.28 98.25 2048 1000 168.36 168.38 168.37 4096 1000 293.18 293.20 293.19 8192 1000 593.77 593.95 593.85 16384 1000 1109.75 1110.20 1110.00 32768 1000 2072.19 2072.64 2072.42 65536 640 4610.81 4611.62 4611.27 131072 320 9803.14 9803.58 9803.33 262144 160 16972.43 16973.32 16972.91 524288 80 30054.36 30056.20 30055.14 1048576 40 55009.90 55016.68 55012.98 2097152 20 107436.60 107450.70 107442.89 4194304 10 247318.40 247357.30 247338.05 #---------------------------------------------------------------- # Benchmarking Allgather # ( #processes = 8 ) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.09 0.10 0.09 1 1000 61.41 61.44 61.42 2 1000 62.38 62.42 62.40 4 1000 62.15 62.17 62.16 8 1000 62.89 62.95 62.92 16 1000 63.15 63.18 63.17 32 1000 69.39 69.44 69.41 64 1000 76.57 76.63 76.59 128 1000 87.41 87.45 87.43 256 1000 104.81 104.87 104.84 512 1000 147.05 147.07 147.06 1024 1000 235.14 235.23 235.19 2048 1000 404.30 404.39 404.35 4096 1000 796.83 797.05 796.96 8192 1000 1669.81 1670.49 1670.15 16384 1000 3098.62 3099.37 3099.04 32768 1000 6061.37 6062.11 6061.83 65536 640 12918.36 12919.66 12919.18 131072 320 20165.98 20167.06 20166.57 262144 160 37804.30 37806.49 37805.23 524288 80 67560.92 67568.00 67564.33 1048576 40 115099.13 115428.05 115265.53 2097152 20 268125.60 268178.35 268153.10 4194304 10 565999.40 574894.91 570487.86 #---------------------------------------------------------------- # Benchmarking Allgatherv # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.11 0.11 0.11 1 1000 6.12 6.12 6.12 2 1000 6.24 6.25 6.24 4 1000 6.28 6.28 6.28 8 1000 6.29 6.29 6.29 16 1000 6.36 6.36 6.36 32 1000 6.45 6.45 6.45 64 1000 6.79 6.79 6.79 128 1000 7.64 7.64 7.64 256 1000 9.01 9.01 9.01 512 1000 11.87 11.87 11.87 1024 1000 17.67 17.67 17.67 2048 1000 29.18 29.18 29.18 4096 1000 53.24 53.24 53.24 8192 1000 97.74 97.74 97.74 16384 1000 194.59 194.59 194.59 32768 1000 379.37 379.37 379.37 65536 640 1813.09 1813.13 1813.11 131072 320 4079.41 4079.62 4079.52 262144 160 4430.56 4430.56 4430.56 524288 80 8859.10 8860.76 8859.93 1048576 40 17768.68 17768.77 17768.72 2097152 20 35364.70 35365.85 35365.27 4194304 10 71091.10 71091.70 71091.40 #---------------------------------------------------------------- # Benchmarking Allgatherv # ( #processes = 4 ) # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.11 0.11 0.11 1 1000 33.06 33.09 33.08 2 1000 34.28 34.31 34.29 4 1000 33.11 33.12 33.12 8 1000 34.38 34.41 34.39 16 1000 34.58 34.61 34.60 32 1000 35.10 35.10 35.10 64 1000 42.59 42.62 42.61 128 1000 46.81 46.84 46.82 256 1000 57.39 57.42 57.41 512 1000 72.33 72.38 72.36 1024 1000 108.51 108.55 108.53 2048 1000 183.56 183.61 183.58 4096 1000 324.89 324.93 324.91 8192 1000 734.41 734.49 734.45 16384 1000 1275.64 1276.08 1275.88 32768 1000 8499.67 8503.33 8501.49 65536 640 12352.39 12357.54 12354.98 131072 320 5989.22 5991.42 5990.35 262144 160 10922.49 10927.75 10925.13 524288 80 20612.79 20646.22 20630.74 1048576 40 40842.83 40972.45 40912.92 2097152 20 81689.24 82220.40 81976.06 4194304 10 159476.60 161532.60 160598.85 #---------------------------------------------------------------- # Benchmarking Allgatherv # ( #processes = 8 ) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.12 0.12 0.12 1 1000 66.19 66.23 66.21 2 1000 66.24 66.26 66.25 4 1000 68.72 68.75 68.73 8 1000 68.33 68.38 68.35 16 1000 70.25 70.30 70.27 32 1000 77.26 77.31 77.28 64 1000 86.22 86.27 86.25 128 1000 97.92 97.97 97.93 256 1000 116.74 116.80 116.77 512 1000 162.66 162.72 162.68 1024 1000 258.88 258.97 258.92 2048 1000 445.85 445.95 445.91 4096 1000 891.75 892.01 891.93 8192 1000 1856.35 1856.60 1856.49 16384 1000 9249.23 9253.33 9251.21 32768 1000 13370.75 13374.60 13372.61 65536 640 8087.67 8090.13 8088.81 131072 320 13332.71 13336.33 13333.87 262144 160 23463.35 23471.04 23467.36 524288 80 46147.01 46200.73 46180.96 1048576 40 91916.82 92135.37 92053.71 2097152 20 181058.35 181578.15 181366.26 4194304 10 427369.40 432171.00 429605.76 #---------------------------------------------------------------- # Benchmarking Alltoall # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 6.16 6.16 6.16 1 1000 6.20 6.20 6.20 2 1000 6.29 6.29 6.29 4 1000 6.35 6.35 6.35 8 1000 6.39 6.39 6.39 16 1000 6.46 6.46 6.46 32 1000 6.94 6.94 6.94 64 1000 7.60 7.61 7.61 128 1000 5.05 5.05 5.05 256 1000 6.02 6.02 6.02 512 1000 7.90 7.90 7.90 1024 1000 11.91 11.91 11.91 2048 1000 19.61 19.62 19.62 4096 1000 35.50 35.51 35.51 8192 1000 66.79 66.79 66.79 16384 1000 134.79 134.79 134.79 32768 1000 261.46 261.46 261.46 65536 640 934.47 934.49 934.48 131072 320 2172.58 2172.60 2172.59 262144 160 4537.41 4537.44 4537.42 524288 80 8923.10 8923.55 8923.32 1048576 40 17906.28 17906.40 17906.34 2097152 20 35402.05 35408.20 35405.13 4194304 10 69701.90 69710.00 69705.95 #---------------------------------------------------------------- # Benchmarking Alltoall # ( #processes = 4 ) # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 32.27 32.29 32.28 1 1000 32.82 32.84 32.83 2 1000 33.99 34.02 34.00 4 1000 34.35 34.35 34.35 8 1000 34.67 34.70 34.69 16 1000 41.80 41.84 41.82 32 1000 45.87 45.92 45.90 64 1000 58.58 58.63 58.61 128 1000 67.64 67.69 67.67 256 1000 71.04 71.10 71.07 512 1000 78.54 78.58 78.56 1024 1000 99.48 99.49 99.49 2048 1000 150.98 151.00 150.99 4096 1000 263.54 263.59 263.56 8192 1000 477.38 477.52 477.46 16384 1000 703.58 703.76 703.70 32768 1000 1222.67 1223.00 1222.82 65536 640 2957.92 2958.20 2958.04 131072 320 5756.99 5757.46 5757.20 262144 160 11241.44 11242.85 11242.15 524288 80 21836.19 21837.94 21837.03 1048576 40 43233.87 43237.63 43235.83 2097152 20 89853.15 89862.35 89858.11 4194304 10 207180.30 207671.00 207426.00 #---------------------------------------------------------------- # Benchmarking Alltoall # ( #processes = 8 ) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 65.47 65.50 65.49 1 1000 69.28 69.33 69.31 2 1000 68.71 68.75 68.73 4 1000 76.36 76.40 76.38 8 1000 84.97 85.00 84.98 16 1000 96.56 96.61 96.59 32 1000 173.01 173.08 173.05 64 1000 174.42 174.50 174.46 128 1000 198.91 199.02 198.96 256 1000 223.52 223.61 223.57 512 1000 222.80 222.91 222.85 1024 1000 299.03 299.13 299.08 2048 1000 461.76 461.89 461.81 4096 1000 823.52 823.82 823.67 8192 1000 1513.08 1513.67 1513.37 16384 1000 2366.20 2366.71 2366.45 32768 1000 4046.93 4047.31 4047.08 65536 640 7470.11 7470.81 7470.39 131072 320 14187.46 14189.07 14188.13 262144 160 27423.78 27427.44 27426.08 524288 80 52990.87 53006.69 52997.36 1048576 40 104313.47 104339.35 104329.60 2097152 20 274759.20 275138.65 274970.19 4194304 10 547220.80 548764.30 548137.15 #---------------------------------------------------------------- # Benchmarking Bcast # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.07 0.07 0.07 1 1000 1.89 1.89 1.89 2 1000 1.77 1.78 1.78 4 1000 1.83 1.83 1.83 8 1000 1.85 1.85 1.85 16 1000 1.84 1.84 1.84 32 1000 1.88 1.88 1.88 64 1000 1.94 1.95 1.94 128 1000 2.48 2.48 2.48 256 1000 3.15 3.15 3.15 512 1000 4.66 4.66 4.66 1024 1000 7.21 7.21 7.21 2048 1000 12.44 12.44 12.44 4096 1000 22.60 22.61 22.61 8192 1000 43.39 43.41 43.40 16384 1000 93.39 93.40 93.40 32768 1000 193.57 193.57 193.57 65536 640 404.30 404.32 404.31 131072 320 1025.64 1025.82 1025.73 262144 160 2028.38 2028.46 2028.42 524288 80 4338.74 4338.95 4338.84 1048576 40 8612.65 8616.33 8614.49 2097152 20 16817.95 16819.30 16818.62 4194304 10 31763.10 31763.20 31763.15 #---------------------------------------------------------------- # Benchmarking Bcast # ( #processes = 4 ) # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.07 0.07 0.07 1 1000 18.20 18.24 18.22 2 1000 18.37 18.41 18.39 4 1000 19.45 19.50 19.48 8 1000 18.21 18.25 18.24 16 1000 18.68 18.73 18.71 32 1000 18.90 18.96 18.94 64 1000 19.23 19.26 19.25 128 1000 20.24 20.31 20.28 256 1000 22.16 22.22 22.19 512 1000 28.66 28.73 28.70 1024 1000 38.77 38.86 38.83 2048 1000 59.58 59.70 59.65 4096 1000 100.65 100.83 100.76 8192 1000 180.17 180.50 180.38 16384 1000 484.16 484.24 484.20 32768 1000 856.17 856.40 856.29 65536 640 1478.59 1478.67 1478.63 131072 320 2667.92 2668.19 2668.09 262144 160 4980.62 4980.87 4980.74 524288 80 9063.64 9065.29 9064.53 1048576 40 16865.77 16869.28 16867.71 2097152 20 31812.35 31818.10 31815.31 4194304 10 59809.01 60707.70 60259.43 #---------------------------------------------------------------- # Benchmarking Bcast # ( #processes = 8 ) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.07 0.07 0.07 1 1000 29.22 29.27 29.25 2 1000 29.27 29.33 29.31 4 1000 29.31 29.36 29.34 8 1000 29.11 29.17 29.15 16 1000 29.97 30.05 30.01 32 1000 29.41 29.48 29.45 64 1000 29.81 29.85 29.83 128 1000 30.13 30.19 30.17 256 1000 32.55 32.64 32.61 512 1000 39.60 39.72 39.67 1024 1000 54.21 54.33 54.29 2048 1000 83.85 84.02 83.97 4096 1000 160.97 161.31 161.20 8192 1000 270.73 271.35 271.15 16384 1000 646.94 647.11 647.01 32768 1000 1155.73 1156.02 1155.89 65536 640 1790.43 1790.88 1790.66 131072 320 3170.16 3171.07 3170.62 262144 160 6065.22 6068.24 6066.62 524288 80 11837.09 11849.77 11843.20 1048576 40 22111.17 22161.37 22136.01 2097152 20 41262.10 41601.25 41488.46 4194304 10 77797.31 79106.20 78671.91 #--------------------------------------------------- # Benchmarking Barrier # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #--------------------------------------------------- #repetitions t_min[usec] t_max[usec] t_avg[usec] 1000 3.02 3.02 3.02 #--------------------------------------------------- # Benchmarking Barrier # ( #processes = 4 ) # ( 4 additional processes waiting in MPI_Barrier) #--------------------------------------------------- #repetitions t_min[usec] t_max[usec] t_avg[usec] 1000 30.15 30.15 30.15 #--------------------------------------------------- # Benchmarking Barrier # ( #processes = 8 ) #--------------------------------------------------- #repetitions t_min[usec] t_max[usec] t_avg[usec] 1000 61.65 61.67 61.66 #===================================================== # # Thanks for using PMB2.2 # # The Pallas team kindly requests that you # give us as much feedback for PMB as possible. # # It would be very helpful when you sent the # output tables of your run(s) of PMB to # # ####################### # # # # # pmb@pallas.com # # # # # ####################### # # You might also add # # - personal information (institution, motivation # for using PMB) # - basic information about the machine you used # (number of CPUs, processor type e.t.c.) # #=====================================================