#--------------------------------------------------- # PALLAS MPI Benchmark Suite V2.2, MPI-1 part #--------------------------------------------------- # Date : Mon Apr 26 16:52:14 2004 # Machine : i686# System : Linux # Release : 2.4.22-1.2188.nptlsmp # Version : #1 SMP Wed Apr 21 20:12:56 EDT 2004 # # Minimum message length in bytes: 0 # Maximum message length in bytes: 4194304 # # MPI_Datatype : MPI_BYTE # MPI_Datatype for reductions : MPI_FLOAT # MPI_Op : MPI_SUM # # # List of Benchmarks to run: # PingPong # PingPing # Sendrecv # Exchange # Allreduce # Reduce # Reduce_scatter # Allgather # Allgatherv # Alltoall # Bcast # Barrier #--------------------------------------------------- # Benchmarking PingPong # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #--------------------------------------------------- #bytes #repetitions t[usec] Mbytes/sec 0 1000 67.68 0.00 1 1000 67.80 0.01 2 1000 67.92 0.03 4 1000 67.94 0.06 8 1000 68.04 0.11 16 1000 68.10 0.22 32 1000 68.92 0.44 64 1000 70.93 0.86 128 1000 73.84 1.65 256 1000 81.00 3.01 512 1000 94.75 5.15 1024 1000 121.37 8.05 2048 1000 153.22 12.75 4096 1000 220.85 17.69 8192 1000 359.10 21.76 16384 1000 639.39 24.44 32768 1000 1200.78 26.02 65536 640 3705.42 16.87 131072 320 5910.78 21.15 262144 160 9267.51 26.98 524288 80 18798.44 26.60 1048576 40 34211.10 29.23 2097152 20 68435.55 29.22 4194304 10 136016.60 29.41 #--------------------------------------------------- # Benchmarking PingPing # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #--------------------------------------------------- #bytes #repetitions t[usec] Mbytes/sec 0 1000 72.86 0.00 1 1000 72.76 0.01 2 1000 73.17 0.03 4 1000 73.62 0.05 8 1000 73.66 0.10 16 1000 73.34 0.21 32 1000 74.09 0.41 64 1000 76.92 0.79 128 1000 80.39 1.52 256 1000 87.89 2.78 512 1000 101.59 4.81 1024 1000 129.97 7.51 2048 1000 185.27 10.54 4096 1000 291.32 13.41 8192 1000 555.41 14.07 16384 1000 1123.89 13.90 32768 1000 2196.81 14.23 65536 640 4385.97 14.25 131072 320 8570.90 14.58 262144 160 17824.81 14.03 524288 80 32111.14 15.57 1048576 40 63706.45 15.70 2097152 20 127860.05 15.64 4194304 10 255166.20 15.68 #----------------------------------------------------------------------------- # Benchmarking Sendrecv # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 73.09 73.09 73.09 0.00 1 1000 73.53 74.13 73.83 0.03 2 1000 73.43 73.48 73.45 0.05 4 1000 74.16 74.22 74.19 0.10 8 1000 73.48 73.49 73.49 0.21 16 1000 73.80 73.84 73.82 0.41 32 1000 73.95 73.99 73.97 0.82 64 1000 76.84 76.87 76.86 1.59 128 1000 81.12 81.18 81.15 3.01 256 1000 87.96 87.96 87.96 5.55 512 1000 102.05 102.16 102.11 9.56 1024 1000 130.41 130.48 130.45 14.97 2048 1000 186.15 186.23 186.19 20.98 4096 1000 292.66 292.74 292.70 26.69 8192 1000 555.19 555.30 555.25 28.14 16384 1000 1123.11 1123.52 1123.31 27.81 32768 1000 2194.29 2195.11 2194.70 28.47 65536 640 4448.28 4448.79 4448.54 28.10 131072 320 8527.31 8528.90 8528.10 29.31 262144 160 16480.24 16488.58 16484.41 30.32 524288 80 32090.79 32109.75 32100.27 31.14 1048576 40 63727.60 63760.50 63744.05 31.37 2097152 20 127699.15 127797.35 127748.25 31.30 4194304 10 254847.50 255110.10 254978.80 31.36 #----------------------------------------------------------------------------- # Benchmarking Sendrecv # ( #processes = 4 ) # ( 4 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 1011.39 1011.64 1011.54 0.00 1 1000 1081.88 1119.82 1110.26 0.00 2 1000 788.85 788.99 788.91 0.00 4 1000 1263.92 1264.06 1264.00 0.01 8 1000 714.10 714.23 714.17 0.02 16 1000 862.15 862.36 862.30 0.04 32 1000 775.36 775.43 775.39 0.08 64 1000 81.10 81.25 81.18 1.50 128 1000 160.18 160.32 160.25 1.52 256 1000 95.69 95.79 95.72 5.10 512 1000 115.08 115.19 115.15 8.48 1024 1000 149.28 149.35 149.32 13.08 2048 1000 201.07 201.31 201.18 19.40 4096 1000 317.86 317.99 317.94 24.57 8192 1000 569.91 570.06 570.01 27.41 16384 1000 1129.13 1129.53 1129.38 27.67 32768 1000 2241.38 2241.85 2241.55 27.88 65536 640 4352.88 4358.40 4356.64 28.68 131072 320 11566.86 11584.12 11578.17 21.58 262144 160 21548.53 21648.31 21600.77 23.10 524288 80 43010.06 43348.80 43191.99 23.07 1048576 40 85107.00 86330.62 85750.74 23.17 2097152 20 171989.90 176560.60 174392.57 22.66 4194304 10 350882.50 368357.90 359949.22 21.72 #----------------------------------------------------------------------------- # Benchmarking Sendrecv # ( #processes = 8 ) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 1759.95 1798.15 1783.75 0.00 1 1000 1968.92 1975.12 1974.02 0.00 2 1000 2013.16 2013.53 2013.33 0.00 4 1000 1666.41 1666.84 1666.68 0.00 8 1000 1806.36 1844.99 1835.13 0.01 16 1000 1659.99 1660.44 1660.27 0.02 32 1000 1201.87 1202.33 1202.09 0.05 64 1000 1358.23 1358.80 1358.51 0.09 128 1000 489.40 489.64 489.52 0.50 256 1000 238.16 238.42 238.31 2.05 512 1000 127.15 127.42 127.31 7.66 1024 1000 171.30 171.49 171.38 11.39 2048 1000 302.31 303.23 302.85 12.88 4096 1000 561.30 562.38 561.90 13.89 8192 1000 1111.29 1112.62 1112.04 14.04 16384 1000 2227.46 2233.97 2232.13 13.99 32768 1000 4335.78 4340.28 4338.33 14.40 65536 640 9233.09 9254.65 9244.14 13.51 131072 320 20637.03 20759.18 20711.57 12.04 262144 160 39545.14 39955.16 39767.95 12.51 524288 80 84708.55 86469.31 85745.81 11.56 1048576 40 164465.00 172956.30 169906.34 11.56 2097152 20 317265.05 337872.55 328922.12 11.84 4194304 10 602510.60 718702.50 671969.42 11.13 #----------------------------------------------------------------------------- # Benchmarking Exchange # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 88.72 88.82 88.77 0.00 1 1000 88.68 88.69 88.68 0.04 2 1000 89.90 89.96 89.93 0.08 4 1000 89.18 89.22 89.20 0.17 8 1000 90.69 90.71 90.70 0.34 16 1000 89.66 89.68 89.67 0.68 32 1000 90.58 90.65 90.61 1.35 64 1000 93.09 93.09 93.09 2.62 128 1000 97.01 97.08 97.04 5.03 256 1000 105.23 105.25 105.24 9.28 512 1000 138.46 138.50 138.48 14.10 1024 1000 188.24 188.33 188.28 20.74 2048 1000 310.66 310.75 310.70 25.14 4096 1000 559.21 559.34 559.27 27.93 8192 1000 1112.21 1112.75 1112.48 28.08 16384 1000 2247.18 2248.26 2247.72 27.80 32768 1000 4303.29 4303.58 4303.43 29.05 65536 640 7663.41 7664.54 7663.97 32.62 131072 320 17013.63 17015.88 17014.75 29.38 262144 160 34259.05 34268.54 34263.79 29.18 524288 80 64071.10 64086.38 64078.74 31.21 1048576 40 127303.45 127330.70 127317.08 31.41 2097152 20 255601.15 255656.10 255628.63 31.29 4194304 10 509956.40 510080.50 510018.45 31.37 #----------------------------------------------------------------------------- # Benchmarking Exchange # ( #processes = 4 ) # ( 4 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 96.84 96.98 96.92 0.00 1 1000 98.08 98.16 98.11 0.04 2 1000 97.91 98.02 97.97 0.08 4 1000 99.72 99.80 99.77 0.15 8 1000 97.83 97.93 97.89 0.31 16 1000 99.01 99.13 99.07 0.62 32 1000 98.34 98.46 98.41 1.24 64 1000 102.28 102.39 102.33 2.38 128 1000 107.59 107.72 107.65 4.53 256 1000 118.63 118.76 118.71 8.22 512 1000 149.66 149.77 149.71 13.04 1024 1000 211.99 212.12 212.05 18.42 2048 1000 335.77 335.82 335.80 23.26 4096 1000 562.09 562.28 562.21 27.79 8192 1000 1104.77 1105.30 1105.03 28.27 16384 1000 2205.67 2206.85 2206.34 28.32 32768 1000 4617.06 4618.06 4617.67 27.07 65536 640 8919.11 8923.84 8921.66 28.01 131072 320 25333.72 25349.84 25339.79 19.72 262144 160 49489.01 49543.72 49510.43 20.18 524288 80 90935.24 91207.29 91099.57 21.93 1048576 40 197365.70 198563.42 198162.84 20.14 2097152 20 403467.75 408720.75 407000.34 19.57 4194304 10 823315.20 845775.80 838452.73 18.92 #----------------------------------------------------------------------------- # Benchmarking Exchange # ( #processes = 8 ) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 118.04 118.25 118.12 0.00 1 1000 118.34 118.49 118.41 0.03 2 1000 119.52 119.78 119.65 0.06 4 1000 119.18 119.37 119.28 0.13 8 1000 119.06 119.28 119.16 0.26 16 1000 119.67 119.83 119.77 0.51 32 1000 119.85 120.07 119.99 1.02 64 1000 125.42 125.52 125.46 1.95 128 1000 133.46 133.72 133.58 3.65 256 1000 152.18 152.43 152.35 6.41 512 1000 198.19 198.59 198.43 9.83 1024 1000 297.00 297.47 297.23 13.13 2048 1000 603.23 603.81 603.58 12.94 4096 1000 1102.22 1103.27 1102.77 14.16 8192 1000 2202.40 2204.60 2203.61 14.17 16384 1000 4582.98 4586.77 4585.20 13.63 32768 1000 9546.84 9558.91 9555.58 13.08 65536 640 20066.28 20403.50 20274.12 12.25 131072 320 46854.79 46979.75 46923.56 10.64 262144 160 86197.63 86742.21 86517.39 11.53 524288 80 167642.90 169626.54 168746.58 11.79 1048576 40 322398.97 331136.85 327478.01 12.08 2097152 20 645068.30 681753.40 667078.39 11.73 4194304 10 1200879.70 1344371.70 1287006.55 11.90 #---------------------------------------------------------------- # Benchmarking Allreduce # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.06 0.06 0.06 4 1000 77.97 78.00 77.98 8 1000 77.64 77.66 77.65 16 1000 77.35 77.41 77.38 32 1000 78.95 78.99 78.97 64 1000 81.73 81.77 81.75 128 1000 86.14 86.22 86.18 256 1000 93.55 93.59 93.57 512 1000 107.64 107.65 107.65 1024 1000 135.99 136.07 136.03 2048 1000 194.30 194.38 194.34 4096 1000 301.93 301.94 301.93 8192 1000 556.35 556.47 556.41 16384 1000 1124.18 1124.61 1124.40 32768 1000 2165.36 2166.15 2165.76 65536 640 4732.78 4733.80 4733.29 131072 320 9223.84 9226.23 9225.04 262144 160 20128.44 20138.23 20133.34 524288 80 36582.27 36595.57 36588.92 1048576 40 72588.38 72612.90 72600.64 2097152 20 146408.15 146457.45 146432.80 4194304 10 289541.40 289560.60 289551.00 #---------------------------------------------------------------- # Benchmarking Allreduce # ( #processes = 4 ) # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.06 0.07 0.06 4 1000 156.66 156.76 156.71 8 1000 157.42 157.52 157.47 16 1000 156.66 156.74 156.70 32 1000 157.82 157.92 157.86 64 1000 164.97 165.02 164.99 128 1000 174.17 174.27 174.21 256 1000 191.24 191.34 191.28 512 1000 227.31 227.49 227.40 1024 1000 303.83 303.99 303.90 2048 1000 423.25 423.32 423.28 4096 1000 665.85 666.51 666.15 8192 1000 1218.20 1218.36 1218.28 16384 1000 2332.58 2333.12 2332.93 32768 1000 4552.48 4554.20 4553.47 65536 640 9246.28 9247.00 9246.66 131072 320 18110.51 18112.54 18111.62 262144 160 40031.48 40069.38 40046.21 524288 80 87019.93 87183.17 87089.33 1048576 40 180151.02 180836.15 180471.12 2097152 20 369050.10 371684.50 370323.11 4194304 10 736743.00 746436.00 741577.92 #---------------------------------------------------------------- # Benchmarking Allreduce # ( #processes = 8 ) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.06 0.09 0.07 4 1000 273.10 273.32 273.20 8 1000 274.12 274.25 274.19 16 1000 275.17 275.28 275.23 32 1000 275.81 276.12 275.97 64 1000 287.83 288.01 287.94 128 1000 306.75 307.06 306.89 256 1000 342.69 342.79 342.74 512 1000 425.24 425.47 425.36 1024 1000 575.91 576.20 576.00 2048 1000 957.96 958.34 958.13 4096 1000 1697.29 1697.94 1697.62 8192 1000 3330.77 3332.05 3331.59 16384 1000 6719.36 6721.67 6720.74 32768 1000 15143.10 15147.44 15145.94 65536 640 34783.45 34804.59 34792.23 131072 320 72717.69 72758.25 72736.36 262144 160 133252.07 133451.44 133344.04 524288 80 240169.37 240950.29 240522.28 1048576 40 462593.97 465473.00 463970.55 2097152 20 918229.35 928194.10 924489.96 4194304 10 1774204.10 1818395.00 1802560.01 #---------------------------------------------------------------- # Benchmarking Reduce # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.09 0.10 0.09 4 1000 69.84 69.94 69.89 8 1000 70.07 70.14 70.10 16 1000 70.34 70.43 70.38 32 1000 70.48 70.54 70.51 64 1000 72.97 73.10 73.04 128 1000 76.85 76.94 76.90 256 1000 83.62 83.68 83.65 512 1000 98.08 98.18 98.13 1024 1000 124.86 124.95 124.90 2048 1000 157.01 157.14 157.07 4096 1000 226.29 226.46 226.38 8192 1000 369.45 369.68 369.57 16384 1000 659.25 659.58 659.41 32768 1000 1304.09 1304.73 1304.41 65536 640 2734.42 2736.15 2735.29 131072 320 6833.00 6840.38 6836.69 262144 160 11396.09 11417.71 11406.90 524288 80 22047.25 22114.36 22080.81 1048576 40 43429.80 43665.90 43547.85 2097152 20 85782.65 86671.05 86226.85 4194304 10 169395.40 172846.80 171121.10 #---------------------------------------------------------------- # Benchmarking Reduce # ( #processes = 4 ) # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.09 0.09 0.09 4 1000 824.87 825.12 825.01 8 1000 473.00 473.25 473.14 16 1000 141.65 141.85 141.77 32 1000 137.97 138.22 138.08 64 1000 144.35 144.60 144.47 128 1000 153.14 153.43 153.27 256 1000 169.49 169.79 169.64 512 1000 203.90 204.21 204.06 1024 1000 269.85 270.19 270.01 2048 1000 338.73 339.13 338.94 4096 1000 525.78 526.53 526.13 8192 1000 871.74 872.92 872.27 16384 1000 1575.15 1577.18 1576.04 32768 1000 3095.61 3099.53 3097.37 65536 640 8712.09 8723.88 8717.29 131072 320 12898.05 12945.04 12918.75 262144 160 24704.30 24868.42 24779.74 524288 80 48103.46 48704.51 48409.56 1048576 40 94430.47 96725.78 95656.94 2097152 20 184589.45 193685.40 189578.93 4194304 10 361179.60 390843.30 376551.02 #---------------------------------------------------------------- # Benchmarking Reduce # ( #processes = 8 ) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.09 0.15 0.11 4 1000 1667.73 1668.01 1667.90 8 1000 1214.98 1215.47 1215.26 16 1000 910.37 910.90 910.63 32 1000 626.30 626.56 626.41 64 1000 230.79 231.26 231.02 128 1000 245.51 245.95 245.76 256 1000 271.12 271.65 271.40 512 1000 321.91 322.64 322.31 1024 1000 421.25 422.11 421.68 2048 1000 535.99 536.86 536.43 4096 1000 844.25 845.83 845.00 8192 1000 1429.33 1431.84 1430.51 16384 1000 2685.46 2690.00 2687.58 32768 1000 5545.95 5555.18 5550.12 65536 640 12608.89 12638.59 12622.39 131072 320 27253.15 27379.67 27318.65 262144 160 44134.05 44573.87 44366.77 524288 80 89428.08 91093.26 90376.60 1048576 40 163971.38 170360.95 167761.47 2097152 20 319983.35 342650.40 334160.01 4194304 10 614117.00 681806.40 644377.78 #---------------------------------------------------------------- # Benchmarking Reduce_scatter # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 82.43 82.45 82.44 4 1000 85.33 85.40 85.36 8 1000 87.99 88.02 88.00 16 1000 87.69 87.72 87.71 32 1000 88.74 88.83 88.79 64 1000 89.64 89.65 89.65 128 1000 92.56 92.57 92.56 256 1000 96.59 96.67 96.63 512 1000 104.69 104.69 104.69 1024 1000 109.59 109.64 109.62 2048 1000 137.56 137.61 137.58 4096 1000 193.79 193.84 193.82 8192 1000 302.58 302.63 302.61 16384 1000 556.94 557.12 557.03 32768 1000 1122.36 1122.89 1122.62 65536 640 2175.86 2177.38 2176.62 131072 320 4782.66 4783.25 4782.95 262144 160 9186.41 9187.64 9187.03 524288 80 18650.71 18660.93 18655.82 1048576 40 36677.12 36704.65 36690.89 2097152 20 72589.15 72669.70 72629.42 4194304 10 145286.30 145453.40 145369.85 #---------------------------------------------------------------- # Benchmarking Reduce_scatter # ( #processes = 4 ) # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 170.41 170.54 170.48 4 1000 176.12 176.14 176.13 8 1000 176.03 176.14 176.09 16 1000 177.58 177.79 177.68 32 1000 181.04 181.28 181.16 64 1000 183.61 183.72 183.68 128 1000 189.05 189.13 189.10 256 1000 201.36 201.56 201.46 512 1000 223.72 223.81 223.76 1024 1000 296.40 296.50 296.46 2048 1000 354.53 354.61 354.59 4096 1000 464.02 464.18 464.11 8192 1000 637.48 637.69 637.60 16384 1000 1019.64 1019.95 1019.81 32768 1000 1818.96 1819.36 1819.21 65536 640 3469.12 3469.85 3469.50 131072 320 6835.21 6839.46 6837.88 262144 160 14983.89 14990.28 14986.72 524288 80 39906.48 39946.46 39927.19 1048576 40 78179.67 78345.98 78278.03 2097152 20 151729.20 153538.20 152616.31 4194304 10 317982.20 323242.40 320811.55 #---------------------------------------------------------------- # Benchmarking Reduce_scatter # ( #processes = 8 ) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 280.62 280.96 280.78 4 1000 289.79 290.03 289.92 8 1000 291.70 292.02 291.85 16 1000 294.36 294.59 294.47 32 1000 295.68 295.76 295.71 64 1000 303.36 303.66 303.51 128 1000 316.91 317.17 317.02 256 1000 343.64 343.83 343.74 512 1000 398.12 398.53 398.34 1024 1000 678.64 678.78 678.73 2048 1000 737.82 738.01 737.93 4096 1000 886.62 886.90 886.77 8192 1000 1173.56 1173.88 1173.73 16384 1000 1959.99 1960.28 1960.12 32768 1000 3427.76 3428.31 3428.00 65536 640 6768.42 6769.91 6768.96 131072 320 17349.03 17355.79 17352.24 262144 160 32451.94 32477.22 32468.97 524288 80 63331.44 63435.25 63378.40 1048576 40 137629.88 143015.82 140916.43 2097152 20 261386.95 263021.75 262393.41 4194304 10 521129.20 527417.40 525652.31 #---------------------------------------------------------------- # Benchmarking Allgather # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.09 0.09 0.09 1 1000 77.90 77.98 77.94 2 1000 77.53 77.56 77.54 4 1000 77.75 77.79 77.77 8 1000 77.96 78.01 77.98 16 1000 78.14 78.21 78.18 32 1000 79.25 79.34 79.30 64 1000 80.98 81.02 81.00 128 1000 84.87 84.91 84.89 256 1000 92.36 92.43 92.39 512 1000 106.37 106.45 106.41 1024 1000 134.18 134.27 134.22 2048 1000 190.48 190.49 190.49 4096 1000 299.55 299.69 299.62 8192 1000 554.93 555.18 555.05 16384 1000 1122.91 1123.47 1123.19 32768 1000 2218.65 2219.68 2219.17 65536 640 4687.93 4688.08 4688.00 131072 320 9132.65 9133.70 9133.17 262144 160 17533.96 17540.14 17537.05 524288 80 34068.17 34073.75 34070.96 1048576 40 67575.97 67588.85 67582.41 2097152 20 134764.65 134770.45 134767.55 4194304 10 269109.60 269162.30 269135.95 #---------------------------------------------------------------- # Benchmarking Allgather # ( #processes = 4 ) # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.09 0.09 0.09 1 1000 165.80 165.95 165.87 2 1000 165.99 166.08 166.03 4 1000 165.88 165.99 165.95 8 1000 166.79 166.92 166.85 16 1000 166.78 166.82 166.80 32 1000 170.14 170.31 170.22 64 1000 179.09 179.25 179.17 128 1000 190.91 191.03 190.97 256 1000 221.33 221.49 221.41 512 1000 277.23 277.45 277.33 1024 1000 360.74 360.84 360.81 2048 1000 545.87 546.00 545.92 4096 1000 936.82 937.62 937.26 8192 1000 1769.82 1770.31 1770.12 16384 1000 3460.77 3462.07 3461.60 32768 1000 6876.72 6877.09 6876.84 65536 640 13467.05 13468.97 13467.65 131072 320 28011.92 28020.06 28014.68 262144 160 61199.92 61257.41 61218.47 524288 80 125382.91 125663.31 125507.22 1048576 40 261962.65 263168.40 262519.87 2097152 20 522994.90 527937.70 525398.50 4194304 10 1040112.10 1060698.00 1050241.35 #---------------------------------------------------------------- # Benchmarking Allgather # ( #processes = 8 ) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.09 0.12 0.11 1 1000 274.53 274.68 274.61 2 1000 275.81 276.05 275.91 4 1000 274.73 274.80 274.76 8 1000 277.64 277.81 277.72 16 1000 282.84 283.01 282.92 32 1000 292.69 292.81 292.76 64 1000 315.57 315.86 315.72 128 1000 362.75 362.96 362.83 256 1000 451.50 451.85 451.65 512 1000 653.24 653.63 653.43 1024 1000 1108.96 1109.77 1109.34 2048 1000 2015.52 2016.62 2016.15 4096 1000 3961.61 3963.91 3962.72 8192 1000 8737.02 8739.43 8738.10 16384 1000 18936.31 18944.58 18940.54 32768 1000 43925.96 43946.51 43935.04 65536 640 77959.96 78011.96 77982.90 131072 320 145584.64 145735.41 145646.02 262144 160 284972.20 285682.03 285320.78 524288 80 544311.71 546921.84 545808.56 1048576 40 1064406.85 1075988.35 1070618.03 2097152 20 2076354.30 2109422.70 2094827.56 4194304 10 3988659.60 4120680.30 4072423.85 #---------------------------------------------------------------- # Benchmarking Allgatherv # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.11 0.13 0.12 1 1000 82.52 82.54 82.53 2 1000 83.01 83.10 83.06 4 1000 83.62 83.67 83.64 8 1000 82.15 82.17 82.16 16 1000 82.57 82.61 82.59 32 1000 83.80 83.90 83.85 64 1000 86.60 86.65 86.62 128 1000 91.70 91.77 91.74 256 1000 98.06 98.10 98.08 512 1000 113.12 113.15 113.13 1024 1000 139.52 139.64 139.58 2048 1000 198.77 198.83 198.80 4096 1000 311.97 312.12 312.04 8192 1000 559.25 559.41 559.33 16384 1000 1117.46 1117.86 1117.66 32768 1000 2320.70 2321.30 2321.00 65536 640 5407.41 5408.80 5408.11 131072 320 10249.83 10250.80 10250.31 262144 160 18820.08 18825.98 18823.03 524288 80 34333.00 34346.14 34339.57 1048576 40 68209.10 68230.70 68219.90 2097152 20 136182.45 136233.10 136207.77 4194304 10 272324.70 272463.00 272393.85 #---------------------------------------------------------------- # Benchmarking Allgatherv # ( #processes = 4 ) # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.11 0.13 0.12 1 1000 171.30 171.49 171.40 2 1000 172.65 172.70 172.68 4 1000 172.39 172.53 172.46 8 1000 172.89 172.97 172.93 16 1000 173.88 173.95 173.91 32 1000 176.75 176.79 176.76 64 1000 184.51 184.57 184.54 128 1000 198.14 198.20 198.18 256 1000 229.50 229.66 229.57 512 1000 292.52 292.69 292.61 1024 1000 370.85 370.93 370.90 2048 1000 557.65 557.97 557.80 4096 1000 953.29 953.48 953.40 8192 1000 1810.62 1811.19 1811.00 16384 1000 3677.17 3678.31 3677.98 32768 1000 7590.64 7593.67 7592.61 65536 640 15229.08 15230.29 15229.51 131072 320 35613.11 35645.45 35630.82 262144 160 66657.65 66763.17 66713.20 524288 80 134494.94 134915.05 134700.40 1048576 40 265027.65 266307.17 265713.60 2097152 20 536493.90 541285.20 539025.17 4194304 10 1103969.70 1122147.60 1113306.62 #---------------------------------------------------------------- # Benchmarking Allgatherv # ( #processes = 8 ) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.11 0.15 0.13 1 1000 284.65 284.90 284.74 2 1000 286.55 286.81 286.70 4 1000 285.11 285.37 285.24 8 1000 286.61 286.81 286.70 16 1000 292.40 292.51 292.45 32 1000 303.13 303.27 303.18 64 1000 326.05 326.14 326.10 128 1000 374.62 374.93 374.77 256 1000 466.18 466.51 466.34 512 1000 664.94 665.36 665.10 1024 1000 1115.81 1116.51 1116.09 2048 1000 2036.34 2037.61 2036.95 4096 1000 4035.08 4037.19 4036.05 8192 1000 7938.84 7943.24 7940.57 16384 1000 18669.36 18678.74 18673.45 32768 1000 38236.45 38253.14 38244.24 65536 640 64222.22 64248.43 64234.90 131072 320 147657.44 147778.75 147733.24 262144 160 275290.74 275791.59 275585.18 524288 80 607347.06 609214.03 608539.22 1048576 40 1161386.07 1167183.47 1164997.78 2097152 20 2149594.10 2166766.40 2159047.67 4194304 10 4166258.40 4249073.90 4209397.52 #---------------------------------------------------------------- # Benchmarking Alltoall # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 81.58 81.59 81.58 1 1000 82.43 82.49 82.46 2 1000 82.64 82.68 82.66 4 1000 82.92 82.95 82.93 8 1000 82.42 82.50 82.46 16 1000 83.54 83.55 83.55 32 1000 86.35 86.35 86.35 64 1000 89.77 89.85 89.81 128 1000 82.69 82.73 82.71 256 1000 90.08 90.15 90.11 512 1000 105.77 105.84 105.80 1024 1000 132.89 132.93 132.91 2048 1000 189.35 189.38 189.36 4096 1000 296.93 296.97 296.95 8192 1000 554.57 554.80 554.69 16384 1000 1118.62 1119.07 1118.84 32768 1000 2129.21 2130.17 2129.69 65536 640 4192.56 4192.91 4192.74 131072 320 9046.32 9047.18 9046.75 262144 160 17387.01 17393.92 17390.47 524288 80 34188.45 34202.67 34195.56 1048576 40 68130.28 68161.08 68145.68 2097152 20 135482.25 135544.55 135513.40 4194304 10 270638.50 270751.30 270694.90 #---------------------------------------------------------------- # Benchmarking Alltoall # ( #processes = 4 ) # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 160.92 160.94 160.93 1 1000 162.20 162.36 162.27 2 1000 162.94 163.01 162.98 4 1000 162.98 163.06 163.01 8 1000 166.36 166.49 166.40 16 1000 174.64 174.77 174.71 32 1000 187.48 187.51 187.49 64 1000 120.64 120.69 120.67 128 1000 129.91 130.02 129.97 256 1000 149.96 150.04 150.00 512 1000 197.40 197.46 197.44 1024 1000 287.08 287.18 287.13 2048 1000 480.83 480.92 480.87 4096 1000 837.31 837.51 837.41 8192 1000 1665.98 1666.39 1666.14 16384 1000 3223.02 3224.02 3223.58 32768 1000 6141.37 6142.36 6141.83 65536 640 13570.79 13572.07 13571.44 131072 320 38675.20 38706.29 38687.40 262144 160 77645.31 77761.69 77693.11 524288 80 147377.72 147820.79 147593.64 1048576 40 294534.52 295806.98 295209.11 2097152 20 592645.85 599634.85 596620.70 4194304 10 1187632.10 1214953.00 1203484.98 #---------------------------------------------------------------- # Benchmarking Alltoall # ( #processes = 8 ) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 283.26 283.44 283.37 1 1000 286.70 286.98 286.88 2 1000 293.66 293.83 293.76 4 1000 300.43 300.52 300.47 8 1000 324.70 324.86 324.77 16 1000 374.57 374.89 374.73 32 1000 314.40 314.65 314.48 64 1000 319.70 319.81 319.73 128 1000 338.18 338.38 338.26 256 1000 396.33 396.51 396.44 512 1000 568.70 569.01 568.83 1024 1000 895.95 896.29 896.11 2048 1000 1806.65 1807.47 1807.02 4096 1000 3285.67 3287.10 3286.44 8192 1000 6102.76 6105.50 6104.23 16384 1000 14551.80 14557.42 14554.97 32768 1000 32907.39 32910.96 32909.44 65536 640 68980.55 68990.74 68986.40 131072 320 152592.43 152673.72 152642.23 262144 160 260328.80 260609.22 260485.70 524288 80 516811.42 517731.21 517378.55 1048576 40 996075.27 999066.25 997804.96 2097152 20 2052121.50 2065594.95 2060715.48 4194304 10 3933852.20 3985191.20 3961218.61 #---------------------------------------------------------------- # Benchmarking Bcast # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.07 0.07 0.07 1 1000 69.25 69.28 69.27 2 1000 68.71 68.75 68.73 4 1000 69.88 69.94 69.91 8 1000 68.94 69.01 68.98 16 1000 69.46 69.51 69.48 32 1000 69.62 69.69 69.65 64 1000 71.85 71.90 71.87 128 1000 75.47 75.54 75.50 256 1000 81.93 82.00 81.97 512 1000 96.07 96.16 96.12 1024 1000 123.20 123.32 123.26 2048 1000 153.77 153.91 153.84 4096 1000 223.35 223.61 223.48 8192 1000 361.60 362.01 361.80 16384 1000 889.17 889.38 889.27 32768 1000 1737.08 1737.43 1737.25 65536 640 5115.68 5117.27 5116.48 131072 320 6396.34 6397.58 6396.96 262144 160 17578.66 17584.36 17581.51 524288 80 28895.36 28907.81 28901.59 1048576 40 51103.72 51131.70 51117.71 2097152 20 100709.40 100763.60 100736.50 4194304 10 220498.20 220665.50 220581.85 #---------------------------------------------------------------- # Benchmarking Bcast # ( #processes = 4 ) # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.07 0.09 0.08 1 1000 824.15 824.30 824.20 2 1000 966.32 966.46 966.37 4 1000 827.47 827.61 827.52 8 1000 956.94 957.13 957.01 16 1000 360.47 360.68 360.54 32 1000 79.08 79.26 79.14 64 1000 83.79 83.91 83.83 128 1000 90.64 90.79 90.69 256 1000 103.69 103.83 103.73 512 1000 129.88 130.05 129.94 1024 1000 179.64 179.92 179.76 2048 1000 253.73 254.05 253.88 4096 1000 422.31 422.90 422.62 8192 1000 756.16 757.15 756.71 16384 1000 1565.19 1565.65 1565.41 32768 1000 2996.69 2997.55 2997.18 65536 640 5789.49 5791.74 5790.96 131072 320 13160.88 13162.91 13161.99 262144 160 28252.59 28305.69 28278.68 524288 80 55805.10 55925.44 55851.23 1048576 40 93976.30 94391.00 94145.56 2097152 20 185677.45 187286.30 186427.54 4194304 10 366708.90 372735.80 369622.80 #---------------------------------------------------------------- # Benchmarking Bcast # ( #processes = 8 ) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.07 0.09 0.08 1 1000 1582.83 1583.16 1582.94 2 1000 1220.50 1220.89 1220.68 4 1000 963.65 963.97 963.84 8 1000 840.85 841.08 841.01 16 1000 1129.06 1129.49 1129.25 32 1000 732.03 732.38 732.22 64 1000 107.02 107.39 107.16 128 1000 116.42 116.71 116.54 256 1000 131.17 131.45 131.34 512 1000 167.61 167.97 167.76 1024 1000 241.06 241.49 241.26 2048 1000 387.63 388.15 387.92 4096 1000 701.41 702.48 701.84 8192 1000 1373.04 1374.86 1373.76 16384 1000 2748.31 2749.44 2749.00 32768 1000 5338.16 5340.11 5339.07 65536 640 12165.93 12171.31 12168.73 131072 320 25047.39 25074.46 25061.47 262144 160 55003.90 55075.90 55037.51 524288 80 91435.85 91756.27 91574.49 1048576 40 176487.50 178179.25 177500.08 2097152 20 329177.25 335287.00 332979.71 4194304 10 636744.60 651508.60 645719.33 #--------------------------------------------------- # Benchmarking Barrier # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #--------------------------------------------------- #repetitions t_min[usec] t_max[usec] t_avg[usec] 1000 75.20 75.24 75.22 #--------------------------------------------------- # Benchmarking Barrier # ( #processes = 4 ) # ( 4 additional processes waiting in MPI_Barrier) #--------------------------------------------------- #repetitions t_min[usec] t_max[usec] t_avg[usec] 1000 161.19 161.37 161.29 #--------------------------------------------------- # Benchmarking Barrier # ( #processes = 8 ) #--------------------------------------------------- #repetitions t_min[usec] t_max[usec] t_avg[usec] 1000 269.26 269.51 269.42 #===================================================== # # Thanks for using PMB2.2 # # The Pallas team kindly requests that you # give us as much feedback for PMB as possible. # # It would be very helpful when you sent the # output tables of your run(s) of PMB to # # ####################### # # # # # pmb@pallas.com # # # # # ####################### # # You might also add # # - personal information (institution, motivation # for using PMB) # - basic information about the machine you used # (number of CPUs, processor type e.t.c.) # #=====================================================