#--------------------------------------------------- # PALLAS MPI Benchmark Suite V2.2, MPI-1 part #--------------------------------------------------- # Date : Thu May 22 16:47:03 2003 # Machine : i686# System : Linux # Release : 2.4.20-8smp # Version : #1 SMP Thu Mar 13 16:43:01 EST 2003 # # Minimum message length in bytes: 0 # Maximum message length in bytes: 4194304 # # MPI_Datatype : MPI_BYTE # MPI_Datatype for reductions : MPI_FLOAT # MPI_Op : MPI_SUM # # # List of Benchmarks to run: # PingPong # PingPing # Sendrecv # Exchange # Allreduce # Reduce # Reduce_scatter # Allgather # Allgatherv # Alltoall # Bcast # Barrier #--------------------------------------------------- # Benchmarking PingPong # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #--------------------------------------------------- #bytes #repetitions t[usec] Mbytes/sec 0 1000 1.55 0.00 1 1000 1.81 0.53 2 1000 1.83 1.04 4 1000 1.88 2.03 8 1000 1.92 3.97 16 1000 1.87 8.16 32 1000 1.87 16.31 64 1000 1.93 31.62 128 1000 2.37 51.48 256 1000 3.02 80.72 512 1000 4.33 112.74 1024 1000 6.93 140.88 2048 1000 12.06 162.00 4096 1000 22.17 176.19 8192 1000 42.76 182.71 16384 1000 87.46 178.66 32768 1000 139.94 223.31 65536 640 297.59 210.02 131072 320 612.15 204.20 262144 160 1208.69 206.83 524288 80 2353.48 212.45 1048576 40 4655.98 214.78 2097152 20 9307.02 214.89 4194304 10 18471.05 216.56 #--------------------------------------------------- # Benchmarking PingPing # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #--------------------------------------------------- #bytes #repetitions t[usec] Mbytes/sec 0 1000 2.56 0.00 1 1000 2.98 0.32 2 1000 2.99 0.64 4 1000 3.06 1.24 8 1000 3.04 2.51 16 1000 3.11 4.91 32 1000 3.09 9.88 64 1000 3.17 19.27 128 1000 3.78 32.30 256 1000 4.49 54.33 512 1000 5.94 82.24 1024 1000 8.91 109.64 2048 1000 14.79 132.05 4096 1000 26.95 144.96 8192 1000 51.84 150.71 16384 1000 99.04 157.76 32768 1000 193.08 161.85 65536 640 528.92 118.16 131072 320 1197.15 104.41 262144 160 2420.89 103.27 524288 80 4826.93 103.59 1048576 40 9620.50 103.94 2097152 20 19248.45 103.90 4194304 10 38550.60 103.76 #----------------------------------------------------------------------------- # Benchmarking Sendrecv # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 2.77 2.77 2.77 0.00 1 1000 2.93 2.93 2.93 0.65 2 1000 2.99 2.99 2.99 1.28 4 1000 3.00 3.00 3.00 2.54 8 1000 3.02 3.02 3.02 5.05 16 1000 3.03 3.03 3.03 10.07 32 1000 3.03 3.03 3.03 20.14 64 1000 3.11 3.11 3.11 39.19 128 1000 3.58 3.58 3.58 68.21 256 1000 4.32 4.32 4.32 112.98 512 1000 5.73 5.73 5.73 170.40 1024 1000 8.58 8.58 8.58 227.50 2048 1000 14.21 14.21 14.21 274.80 4096 1000 25.90 25.90 25.90 301.59 8192 1000 49.03 49.04 49.04 318.65 16384 1000 99.17 99.17 99.17 315.12 32768 1000 193.35 193.36 193.36 323.23 65536 640 530.88 530.92 530.90 235.44 131072 320 1199.50 1199.65 1199.58 208.39 262144 160 2418.55 2418.56 2418.56 206.73 524288 80 4843.96 4844.06 4844.01 206.44 1048576 40 9619.18 9619.25 9619.21 207.92 2097152 20 19251.75 19251.90 19251.83 207.77 4194304 10 38571.89 38572.70 38572.30 207.40 #----------------------------------------------------------------------------- # Benchmarking Sendrecv # ( #processes = 4 ) # ( 4 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 16.47 16.51 16.49 0.00 1 1000 16.33 16.36 16.34 0.12 2 1000 16.56 16.58 16.56 0.23 4 1000 16.48 16.52 16.51 0.46 8 1000 17.52 17.54 17.53 0.87 16 1000 16.61 16.65 16.63 1.83 32 1000 16.69 16.72 16.71 3.65 64 1000 16.92 16.95 16.94 7.20 128 1000 18.90 18.94 18.93 12.89 256 1000 19.38 19.41 19.40 25.15 512 1000 21.55 21.60 21.57 45.22 1024 1000 27.67 27.70 27.68 70.50 2048 1000 40.36 40.42 40.39 96.65 4096 1000 65.79 65.87 65.83 118.60 8192 1000 118.12 118.19 118.15 132.20 16384 1000 265.98 266.13 266.06 117.42 32768 1000 421.94 422.15 422.05 148.05 65536 640 792.48 793.13 792.83 157.60 131072 320 1599.49 1601.86 1600.73 156.07 262144 160 3575.66 3584.84 3580.26 139.48 524288 80 6094.32 6114.76 6104.60 163.54 1048576 40 13649.78 13798.08 13724.95 144.95 2097152 20 27632.95 28196.20 27915.74 141.86 4194304 10 51640.81 53890.20 52772.25 148.45 #----------------------------------------------------------------------------- # Benchmarking Sendrecv # ( #processes = 8 ) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 15.88 15.94 15.91 0.00 1 1000 15.90 15.95 15.92 0.12 2 1000 15.98 16.02 16.00 0.24 4 1000 15.96 16.03 16.00 0.48 8 1000 15.88 15.92 15.90 0.96 16 1000 16.14 16.19 16.16 1.88 32 1000 16.30 16.34 16.31 3.74 64 1000 16.24 16.28 16.26 7.50 128 1000 18.23 18.30 18.26 13.34 256 1000 18.82 18.85 18.84 25.90 512 1000 21.20 21.23 21.21 46.01 1024 1000 27.65 27.69 27.68 70.53 2048 1000 40.93 41.00 40.97 95.29 4096 1000 69.35 69.48 69.43 112.44 8192 1000 121.45 121.61 121.53 128.48 16384 1000 257.53 257.82 257.67 121.21 32768 1000 471.46 472.11 471.82 132.38 65536 640 806.64 808.70 807.88 154.57 131072 320 1677.25 1683.20 1680.92 148.53 262144 160 3367.79 3376.63 3372.86 148.08 524288 80 6564.19 6641.81 6602.22 150.56 1048576 40 12567.08 12800.98 12711.64 156.24 2097152 20 27053.65 28161.70 27680.78 142.04 4194304 10 45826.79 54753.20 51347.25 146.11 #----------------------------------------------------------------------------- # Benchmarking Exchange # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 4.80 4.80 4.80 0.00 1 1000 5.47 5.47 5.47 0.70 2 1000 5.86 5.86 5.86 1.30 4 1000 5.55 5.55 5.55 2.75 8 1000 5.55 5.56 5.55 5.49 16 1000 5.59 5.60 5.60 10.91 32 1000 5.68 5.68 5.68 21.47 64 1000 5.82 5.82 5.82 41.94 128 1000 7.10 7.10 7.10 68.76 256 1000 8.96 8.96 8.96 109.02 512 1000 12.48 12.48 12.48 156.50 1024 1000 19.38 19.38 19.38 201.59 2048 1000 33.24 33.25 33.25 234.96 4096 1000 60.92 60.93 60.93 256.43 8192 1000 117.46 117.48 117.47 266.01 16384 1000 195.57 195.57 195.57 319.57 32768 1000 384.52 384.53 384.53 325.07 65536 640 1062.87 1062.90 1062.88 235.21 131072 320 2382.92 2382.97 2382.95 209.82 262144 160 4823.27 4823.32 4823.30 207.33 524288 80 9654.36 9654.45 9654.41 207.16 1048576 40 19224.13 19224.15 19224.14 208.07 2097152 20 38432.85 38438.90 38435.88 208.12 4194304 10 76894.50 76896.00 76895.25 208.07 #----------------------------------------------------------------------------- # Benchmarking Exchange # ( #processes = 4 ) # ( 4 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 29.69 29.72 29.71 0.00 1 1000 29.82 29.85 29.83 0.13 2 1000 29.75 29.79 29.77 0.26 4 1000 29.77 29.79 29.78 0.51 8 1000 29.75 29.76 29.76 1.03 16 1000 30.23 30.25 30.24 2.02 32 1000 30.41 30.45 30.43 4.01 64 1000 30.70 30.75 30.73 7.94 128 1000 35.55 35.59 35.57 13.72 256 1000 37.51 37.55 37.53 26.01 512 1000 41.58 41.62 41.60 46.93 1024 1000 52.48 52.53 52.51 74.36 2048 1000 78.39 78.46 78.43 99.57 4096 1000 126.91 127.01 126.96 123.02 8192 1000 237.20 237.30 237.26 131.69 16384 1000 473.74 473.87 473.83 131.89 32768 1000 814.50 814.68 814.61 153.43 65536 640 1771.24 1772.24 1771.84 141.06 131072 320 3902.79 3905.95 3904.67 128.01 262144 160 7210.08 7220.81 7216.88 138.49 524288 80 14346.90 14395.71 14376.11 138.93 1048576 40 28188.75 28362.88 28305.04 141.03 2097152 20 56054.60 56731.65 56507.38 141.01 4194304 10 112639.09 115359.70 114464.02 138.70 #----------------------------------------------------------------------------- # Benchmarking Exchange # ( #processes = 8 ) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 29.71 29.76 29.73 0.00 1 1000 30.69 30.73 30.71 0.12 2 1000 29.59 29.64 29.62 0.26 4 1000 29.64 29.72 29.68 0.51 8 1000 29.72 29.78 29.76 1.02 16 1000 30.12 30.18 30.15 2.02 32 1000 30.22 30.27 30.25 4.03 64 1000 30.48 30.54 30.51 7.99 128 1000 34.91 34.96 34.93 13.97 256 1000 37.14 37.19 37.16 26.26 512 1000 41.55 41.61 41.59 46.94 1024 1000 53.37 53.41 53.39 73.14 2048 1000 79.09 79.19 79.13 98.65 4096 1000 135.61 135.75 135.69 115.10 8192 1000 253.64 253.98 253.81 123.04 16384 1000 502.83 503.18 503.05 124.21 32768 1000 1010.85 1011.73 1011.31 123.55 65536 640 1770.57 1772.44 1771.71 141.05 131072 320 3415.14 3422.77 3419.70 146.08 262144 160 7199.27 7221.33 7212.39 138.48 524288 80 13778.60 13878.57 13836.05 144.11 1048576 40 27192.15 27583.50 27413.94 145.01 2097152 20 55319.70 56825.55 55990.50 140.78 4194304 10 105671.60 114626.00 110858.45 139.58 #---------------------------------------------------------------- # Benchmarking Allreduce # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.05 0.05 0.05 4 1000 4.51 4.51 4.51 8 1000 4.49 4.49 4.49 16 1000 4.58 4.58 4.58 32 1000 4.68 4.68 4.68 64 1000 4.75 4.75 4.75 128 1000 5.80 5.80 5.80 256 1000 6.59 6.59 6.59 512 1000 8.69 8.69 8.69 1024 1000 12.54 12.54 12.54 2048 1000 20.44 20.45 20.45 4096 1000 36.10 36.10 36.10 8192 1000 67.98 67.98 67.98 16384 1000 136.20 136.20 136.20 32768 1000 272.57 272.58 272.57 65536 640 760.84 760.88 760.86 131072 320 2693.79 2693.98 2693.89 262144 160 5351.55 5351.76 5351.65 524288 80 10982.65 10983.75 10983.20 1048576 40 21896.90 21901.20 21899.05 2097152 20 43909.95 43925.50 43917.72 4194304 10 89924.30 89924.80 89924.55 #---------------------------------------------------------------- # Benchmarking Allreduce # ( #processes = 4 ) # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.05 0.05 0.05 4 1000 34.30 34.32 34.31 8 1000 33.21 33.24 33.23 16 1000 33.91 33.92 33.91 32 1000 34.36 34.40 34.38 64 1000 34.74 34.77 34.75 128 1000 41.12 41.14 41.13 256 1000 44.02 44.05 44.04 512 1000 51.85 51.89 51.87 1024 1000 65.19 65.20 65.20 2048 1000 100.93 100.96 100.95 4096 1000 174.17 174.21 174.19 8192 1000 328.97 329.06 329.02 16384 1000 736.51 736.75 736.63 32768 1000 1339.98 1340.28 1340.14 65536 640 2665.48 2665.80 2665.67 131072 320 11591.16 11600.27 11595.73 262144 160 18137.24 18160.18 18148.69 524288 80 29042.25 29081.25 29061.68 1048576 40 53432.05 53499.73 53465.57 2097152 20 105497.75 105690.50 105593.92 4194304 10 232849.10 233558.20 233207.88 #---------------------------------------------------------------- # Benchmarking Allreduce # ( #processes = 8 ) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.05 0.05 0.05 4 1000 62.73 62.78 62.76 8 1000 62.66 62.68 62.68 16 1000 63.92 63.97 63.94 32 1000 64.46 64.50 64.49 64 1000 64.96 65.02 65.00 128 1000 75.97 75.99 75.98 256 1000 83.91 83.96 83.94 512 1000 94.75 94.80 94.78 1024 1000 120.62 120.66 120.64 2048 1000 180.96 181.03 181.00 4096 1000 306.44 306.52 306.48 8192 1000 563.27 563.39 563.33 16384 1000 1659.88 1660.28 1660.06 32768 1000 3131.90 3132.61 3132.29 65536 640 6351.33 6352.62 6351.99 131072 320 16723.73 16736.82 16731.17 262144 160 25196.01 25223.78 25208.38 524288 80 41363.58 41419.39 41390.89 1048576 40 76214.82 76387.57 76301.63 2097152 20 150619.20 151264.24 150947.62 4194304 10 318144.70 320264.60 319171.21 #---------------------------------------------------------------- # Benchmarking Reduce # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.09 0.09 0.09 4 1000 2.37 2.37 2.37 8 1000 2.48 2.48 2.48 16 1000 2.42 2.42 2.42 32 1000 2.44 2.44 2.44 64 1000 2.55 2.56 2.55 128 1000 3.20 3.20 3.20 256 1000 3.95 3.95 3.95 512 1000 5.40 5.41 5.40 1024 1000 8.29 8.30 8.29 2048 1000 13.99 14.00 13.99 4096 1000 25.41 25.42 25.41 8192 1000 49.75 49.78 49.76 16384 1000 103.22 103.28 103.25 32768 1000 176.01 176.09 176.05 65536 640 873.78 874.25 874.01 131072 320 1975.07 1977.18 1976.12 262144 160 4000.82 4008.83 4004.82 524288 80 7890.19 7921.67 7905.93 1048576 40 15578.12 15700.65 15639.39 2097152 20 30880.40 31364.30 31122.35 4194304 10 65243.21 66895.31 66069.26 #---------------------------------------------------------------- # Benchmarking Reduce # ( #processes = 4 ) # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.09 0.09 0.09 4 1000 20.46 20.51 20.49 8 1000 20.56 20.60 20.59 16 1000 21.59 21.64 21.61 32 1000 21.44 21.49 21.47 64 1000 21.91 21.96 21.93 128 1000 25.70 25.75 25.72 256 1000 31.62 31.66 31.64 512 1000 39.27 39.34 39.30 1024 1000 54.41 54.49 54.45 2048 1000 85.79 85.93 85.86 4096 1000 133.02 133.25 133.14 8192 1000 222.75 223.12 222.93 16384 1000 468.49 468.90 468.73 32768 1000 819.31 820.01 819.70 65536 640 5486.61 5490.72 5487.91 131072 320 7774.35 7786.70 7780.53 262144 160 11481.44 11510.91 11494.92 524288 80 18730.32 18827.99 18775.50 1048576 40 33770.98 34272.93 34042.38 2097152 20 59492.36 61365.05 60496.43 4194304 10 115002.11 123233.90 119488.50 #---------------------------------------------------------------- # Benchmarking Reduce # ( #processes = 8 ) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.09 0.09 0.09 4 1000 28.82 28.90 28.86 8 1000 28.97 29.03 29.00 16 1000 30.09 30.14 30.12 32 1000 30.53 30.58 30.56 64 1000 31.02 31.10 31.06 128 1000 38.07 38.16 38.11 256 1000 51.94 52.07 52.00 512 1000 62.53 62.64 62.58 1024 1000 87.47 87.64 87.56 2048 1000 136.64 136.94 136.79 4096 1000 215.56 216.04 215.80 8192 1000 380.77 381.63 381.18 16384 1000 718.33 719.44 718.93 32768 1000 1289.65 1291.68 1290.77 65536 640 6681.53 6691.37 6686.82 131072 320 9919.59 9945.50 9933.30 262144 160 14765.31 14860.26 14818.88 524288 80 23881.00 24155.65 24020.02 1048576 40 42489.02 43516.17 43022.86 2097152 20 75364.00 79439.55 77417.73 4194304 10 148737.10 166400.00 157945.67 #---------------------------------------------------------------- # Benchmarking Reduce_scatter # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 6.88 6.88 6.88 4 1000 8.02 8.02 8.02 8 1000 8.93 8.93 8.93 16 1000 8.90 8.90 8.90 32 1000 8.98 8.98 8.98 64 1000 9.15 9.15 9.15 128 1000 9.91 9.91 9.91 256 1000 11.12 11.12 11.12 512 1000 13.32 13.32 13.32 1024 1000 9.16 9.16 9.16 2048 1000 13.05 13.05 13.05 4096 1000 21.22 21.22 21.22 8192 1000 37.07 37.08 37.07 16384 1000 69.82 69.83 69.82 32768 1000 141.23 141.23 141.23 65536 640 285.62 285.63 285.62 131072 320 934.32 934.33 934.33 262144 160 2648.51 2648.56 2648.54 524288 80 5527.91 5528.10 5528.01 1048576 40 11146.32 11146.45 11146.39 2097152 20 22335.20 22341.65 22338.43 4194304 10 44650.50 44650.70 44650.60 #---------------------------------------------------------------- # Benchmarking Reduce_scatter # ( #processes = 4 ) # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 34.59 34.62 34.60 4 1000 36.03 36.06 36.05 8 1000 36.32 36.36 36.35 16 1000 37.03 37.06 37.05 32 1000 37.93 37.94 37.93 64 1000 38.63 38.66 38.65 128 1000 40.28 40.29 40.28 256 1000 45.96 46.00 45.98 512 1000 51.72 51.75 51.74 1024 1000 84.98 85.03 85.01 2048 1000 95.75 95.79 95.77 4096 1000 124.06 124.11 124.09 8192 1000 186.54 186.61 186.57 16384 1000 304.68 304.79 304.73 32768 1000 552.29 552.47 552.37 65536 640 1095.78 1096.00 1095.91 131072 320 1853.00 1853.67 1853.37 262144 160 3671.19 3672.19 3671.57 524288 80 12684.07 12718.02 12701.02 1048576 40 21869.15 21938.98 21903.99 2097152 20 38792.25 38936.15 38863.66 4194304 10 73018.30 73284.51 73154.80 #---------------------------------------------------------------- # Benchmarking Reduce_scatter # ( #processes = 8 ) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 64.77 64.80 64.79 4 1000 68.35 68.39 68.37 8 1000 67.58 67.62 67.60 16 1000 68.59 68.63 68.61 32 1000 70.36 70.39 70.38 64 1000 70.70 70.74 70.72 128 1000 75.08 75.12 75.10 256 1000 87.40 87.44 87.42 512 1000 97.12 97.18 97.15 1024 1000 220.53 220.58 220.56 2048 1000 237.15 237.20 237.17 4096 1000 261.84 261.89 261.86 8192 1000 336.13 336.17 336.15 16384 1000 501.75 501.81 501.78 32768 1000 846.97 847.08 847.02 65536 640 1546.53 1546.78 1546.65 131072 320 3025.03 3025.85 3025.46 262144 160 5226.34 5229.42 5227.98 524288 80 9721.70 9729.85 9726.63 1048576 40 25459.33 25551.60 25506.34 2097152 20 47776.35 47954.10 47858.69 4194304 10 90894.20 91297.20 91091.32 #---------------------------------------------------------------- # Benchmarking Allgather # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.09 0.09 0.09 1 1000 3.94 3.95 3.95 2 1000 5.15 5.15 5.15 4 1000 4.01 4.01 4.01 8 1000 4.04 4.04 4.04 16 1000 4.07 4.07 4.07 32 1000 4.15 4.15 4.15 64 1000 4.29 4.29 4.29 128 1000 4.85 4.85 4.85 256 1000 5.96 5.96 5.96 512 1000 7.79 7.80 7.80 1024 1000 11.63 11.63 11.63 2048 1000 19.39 19.39 19.39 4096 1000 34.30 34.30 34.30 8192 1000 64.75 64.75 64.75 16384 1000 129.25 129.25 129.25 32768 1000 249.62 249.62 249.62 65536 640 633.88 633.90 633.89 131072 320 1695.05 1695.08 1695.06 262144 160 3687.63 3687.75 3687.69 524288 80 7389.77 7390.06 7389.92 1048576 40 14795.95 14796.30 14796.13 2097152 20 29583.65 29587.10 29585.38 4194304 10 59111.01 59111.30 59111.15 #---------------------------------------------------------------- # Benchmarking Allgather # ( #processes = 4 ) # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.09 0.10 0.09 1 1000 33.05 33.08 33.07 2 1000 33.18 33.22 33.21 4 1000 33.00 33.03 33.02 8 1000 33.61 33.63 33.61 16 1000 33.96 34.01 33.99 32 1000 34.24 34.26 34.25 64 1000 40.83 40.85 40.84 128 1000 42.99 43.03 43.00 256 1000 50.86 50.89 50.88 512 1000 61.28 61.30 61.29 1024 1000 91.76 91.79 91.77 2048 1000 157.05 157.11 157.08 4096 1000 286.06 286.09 286.08 8192 1000 603.51 603.59 603.55 16384 1000 1121.76 1122.15 1121.95 32768 1000 2063.36 2063.80 2063.59 65536 640 4181.10 4182.97 4182.05 131072 320 8537.72 8538.25 8538.02 262144 160 16631.33 16632.37 16631.93 524288 80 31674.88 31677.33 31676.29 1048576 40 61626.48 61633.35 61630.01 2097152 20 112737.40 113895.40 113316.48 4194304 10 281895.40 281923.10 281909.75 #---------------------------------------------------------------- # Benchmarking Allgather # ( #processes = 8 ) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.09 0.09 0.09 1 1000 62.91 62.93 62.92 2 1000 62.88 62.91 62.89 4 1000 63.48 63.51 63.49 8 1000 64.57 64.62 64.60 16 1000 64.70 64.74 64.72 32 1000 71.28 71.32 71.30 64 1000 79.45 79.47 79.46 128 1000 88.26 88.30 88.27 256 1000 106.73 106.78 106.75 512 1000 150.56 150.65 150.60 1024 1000 233.00 233.09 233.04 2048 1000 402.47 402.60 402.54 4096 1000 803.71 803.96 803.85 8192 1000 1704.90 1705.18 1705.03 16384 1000 3455.00 3455.60 3455.29 32768 1000 6853.22 6853.97 6853.54 65536 640 13295.84 13297.24 13296.43 131072 320 22511.00 22511.99 22511.46 262144 160 39097.41 39121.89 39109.46 524288 80 76512.89 76622.33 76568.71 1048576 40 150322.30 150783.52 150552.98 2097152 20 323737.85 325651.95 324712.03 4194304 10 678441.20 686751.50 682590.61 #---------------------------------------------------------------- # Benchmarking Allgatherv # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.10 0.11 0.10 1 1000 5.57 5.57 5.57 2 1000 5.71 5.71 5.71 4 1000 5.73 5.73 5.73 8 1000 5.76 5.76 5.76 16 1000 5.84 5.84 5.84 32 1000 5.97 5.97 5.97 64 1000 6.34 6.35 6.35 128 1000 7.16 7.16 7.16 256 1000 8.53 8.54 8.53 512 1000 11.30 11.30 11.30 1024 1000 16.93 16.93 16.93 2048 1000 28.19 28.19 28.19 4096 1000 50.86 50.86 50.86 8192 1000 96.72 96.72 96.72 16384 1000 190.57 190.57 190.57 32768 1000 384.95 384.96 384.96 65536 640 1644.97 1645.01 1644.99 131072 320 3534.70 3535.31 3535.01 262144 160 3679.03 3679.08 3679.05 524288 80 7404.36 7404.53 7404.44 1048576 40 14725.55 14725.80 14725.68 2097152 20 29465.35 29465.50 29465.43 4194304 10 58995.60 59010.40 59003.00 #---------------------------------------------------------------- # Benchmarking Allgatherv # ( #processes = 4 ) # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.11 0.11 0.11 1 1000 34.47 34.50 34.49 2 1000 34.49 34.51 34.50 4 1000 34.54 34.57 34.55 8 1000 35.06 35.09 35.07 16 1000 35.62 35.66 35.64 32 1000 36.18 36.21 36.20 64 1000 43.62 43.66 43.65 128 1000 48.68 48.71 48.69 256 1000 54.91 54.94 54.93 512 1000 71.35 71.43 71.39 1024 1000 103.57 103.60 103.58 2048 1000 178.36 178.43 178.39 4096 1000 323.42 323.46 323.44 8192 1000 692.24 692.31 692.26 16384 1000 1295.66 1296.09 1295.89 32768 1000 8121.78 8124.45 8123.11 65536 640 11383.49 11388.41 11385.96 131072 320 5505.36 5508.34 5506.85 262144 160 10980.96 10986.56 10984.08 524288 80 20626.95 20665.14 20646.02 1048576 40 41407.40 41499.13 41453.31 2097152 20 84467.05 84817.15 84642.21 4194304 10 168807.90 170161.60 169488.53 #---------------------------------------------------------------- # Benchmarking Allgatherv # ( #processes = 8 ) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.11 0.12 0.12 1 1000 67.58 67.60 67.59 2 1000 66.46 66.50 66.47 4 1000 67.65 67.68 67.66 8 1000 68.52 68.56 68.54 16 1000 69.07 69.09 69.08 32 1000 77.45 77.48 77.47 64 1000 87.28 87.34 87.31 128 1000 97.00 97.06 97.03 256 1000 116.33 116.40 116.36 512 1000 172.90 172.99 172.94 1024 1000 254.32 254.45 254.38 2048 1000 442.99 443.11 443.07 4096 1000 902.05 902.28 902.18 8192 1000 1871.04 1871.53 1871.33 16384 1000 8715.24 8718.11 8716.66 32768 1000 12999.59 13002.72 13001.18 65536 640 8043.66 8046.12 8044.99 131072 320 13126.15 13131.57 13129.12 262144 160 24699.54 24720.01 24711.80 524288 80 48057.02 48141.01 48109.94 1048576 40 96156.40 96415.08 96298.70 2097152 20 187293.80 187951.20 187630.58 4194304 10 484367.10 490443.10 487873.71 #---------------------------------------------------------------- # Benchmarking Alltoall # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 5.53 5.53 5.53 1 1000 5.90 5.90 5.90 2 1000 6.05 6.05 6.05 4 1000 5.83 5.83 5.83 8 1000 5.88 5.88 5.88 16 1000 5.89 5.89 5.89 32 1000 6.50 6.50 6.50 64 1000 7.06 7.07 7.06 128 1000 4.85 4.86 4.85 256 1000 5.84 5.84 5.84 512 1000 8.86 8.86 8.86 1024 1000 11.90 11.91 11.91 2048 1000 19.13 19.14 19.14 4096 1000 34.24 34.25 34.24 8192 1000 64.40 64.40 64.40 16384 1000 129.74 129.74 129.74 32768 1000 262.93 262.93 262.93 65536 640 850.64 850.64 850.64 131072 320 1806.37 1806.39 1806.38 262144 160 3701.12 3701.15 3701.13 524288 80 7434.71 7434.75 7434.73 1048576 40 14755.45 14755.68 14755.56 2097152 20 29551.45 29551.55 29551.50 4194304 10 59253.80 59260.09 59256.95 #---------------------------------------------------------------- # Benchmarking Alltoall # ( #processes = 4 ) # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 34.12 34.14 34.13 1 1000 35.72 35.76 35.74 2 1000 35.25 35.27 35.26 4 1000 35.58 35.61 35.60 8 1000 35.93 35.97 35.96 16 1000 42.97 42.98 42.98 32 1000 46.34 46.37 46.35 64 1000 60.48 60.52 60.50 128 1000 69.65 69.69 69.68 256 1000 72.97 73.00 72.98 512 1000 77.95 77.98 77.97 1024 1000 98.28 98.33 98.31 2048 1000 148.87 148.92 148.90 4096 1000 261.32 261.42 261.37 8192 1000 472.31 472.48 472.41 16384 1000 855.87 855.96 855.93 32768 1000 1495.69 1495.91 1495.81 65536 640 3335.22 3335.61 3335.43 131072 320 6552.33 6552.82 6552.61 262144 160 13072.64 13073.03 13072.89 524288 80 25049.83 25050.42 25050.13 1048576 40 50434.48 50435.90 50435.33 2097152 20 105799.06 105802.75 105800.31 4194304 10 213121.81 213239.19 213180.00 #---------------------------------------------------------------- # Benchmarking Alltoall # ( #processes = 8 ) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 66.20 66.24 66.22 1 1000 68.13 68.18 68.16 2 1000 69.86 69.90 69.88 4 1000 77.50 77.53 77.51 8 1000 85.83 85.86 85.84 16 1000 95.44 95.48 95.46 32 1000 178.92 178.97 178.95 64 1000 178.83 178.94 178.88 128 1000 207.88 207.95 207.92 256 1000 213.16 213.23 213.20 512 1000 225.37 225.44 225.42 1024 1000 299.01 299.12 299.08 2048 1000 454.73 454.88 454.82 4096 1000 818.93 819.17 819.05 8192 1000 1497.92 1498.36 1498.18 16384 1000 2637.08 2637.39 2637.31 32768 1000 4843.32 4843.52 4843.40 65536 640 9027.44 9027.95 9027.78 131072 320 17225.92 17227.75 17227.22 262144 160 33946.34 33951.21 33949.57 524288 80 66524.76 66540.39 66535.00 1048576 40 132303.95 132363.55 132338.90 2097152 20 331270.00 332678.45 332091.42 4194304 10 653667.89 659281.19 656810.47 #---------------------------------------------------------------- # Benchmarking Bcast # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.07 0.07 0.07 1 1000 2.05 2.06 2.06 2 1000 2.02 2.02 2.02 4 1000 2.08 2.08 2.08 8 1000 2.05 2.06 2.06 16 1000 2.12 2.12 2.12 32 1000 2.10 2.10 2.10 64 1000 2.15 2.15 2.15 128 1000 2.69 2.69 2.69 256 1000 3.35 3.35 3.35 512 1000 4.65 4.65 4.65 1024 1000 7.28 7.28 7.28 2048 1000 12.45 12.45 12.45 4096 1000 22.51 22.52 22.52 8192 1000 43.05 43.07 43.06 16384 1000 91.82 91.82 91.82 32768 1000 186.44 186.44 186.44 65536 640 369.25 369.27 369.26 131072 320 876.01 876.19 876.10 262144 160 1749.81 1750.17 1749.99 524288 80 3633.89 3634.00 3633.94 1048576 40 7189.68 7190.78 7190.23 2097152 20 14357.70 14357.95 14357.83 4194304 10 28684.20 28684.40 28684.30 #---------------------------------------------------------------- # Benchmarking Bcast # ( #processes = 4 ) # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.07 0.07 0.07 1 1000 18.76 18.81 18.79 2 1000 18.82 18.85 18.83 4 1000 18.81 18.85 18.83 8 1000 18.84 18.87 18.86 16 1000 19.01 19.05 19.03 32 1000 19.28 19.32 19.30 64 1000 19.50 19.55 19.54 128 1000 21.09 21.14 21.12 256 1000 23.08 23.12 23.11 512 1000 27.94 27.99 27.97 1024 1000 37.08 37.15 37.12 2048 1000 54.76 54.87 54.82 4096 1000 91.14 91.30 91.24 8192 1000 157.61 157.89 157.79 16384 1000 448.57 448.69 448.64 32768 1000 869.64 869.82 869.74 65536 640 1575.21 1575.26 1575.24 131072 320 2831.28 2831.48 2831.37 262144 160 5451.23 5455.87 5453.62 524288 80 10657.37 10658.52 10657.98 1048576 40 19648.57 19651.90 19650.50 2097152 20 37982.60 37988.75 37985.34 4194304 10 74870.20 75971.09 75422.25 #---------------------------------------------------------------- # Benchmarking Bcast # ( #processes = 8 ) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.07 0.07 0.07 1 1000 29.15 29.21 29.18 2 1000 29.26 29.33 29.30 4 1000 29.11 29.18 29.14 8 1000 28.88 28.95 28.92 16 1000 29.43 29.49 29.47 32 1000 29.55 29.61 29.59 64 1000 29.78 29.85 29.83 128 1000 30.64 30.74 30.71 256 1000 32.76 32.85 32.81 512 1000 38.88 38.98 38.94 1024 1000 52.61 52.73 52.70 2048 1000 80.95 81.16 81.09 4096 1000 139.57 139.89 139.79 8192 1000 253.58 254.14 253.97 16384 1000 636.06 636.26 636.15 32768 1000 1175.74 1176.14 1175.95 65536 640 1829.30 1829.75 1829.61 131072 320 3525.12 3527.09 3526.00 262144 160 6639.05 6646.07 6642.41 524288 80 12923.09 12952.21 12937.70 1048576 40 24414.65 24529.47 24471.60 2097152 20 45620.65 45693.30 45656.37 4194304 10 89398.80 91121.30 90249.30 #--------------------------------------------------- # Benchmarking Barrier # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #--------------------------------------------------- #repetitions t_min[usec] t_max[usec] t_avg[usec] 1000 2.85 2.85 2.85 #--------------------------------------------------- # Benchmarking Barrier # ( #processes = 4 ) # ( 4 additional processes waiting in MPI_Barrier) #--------------------------------------------------- #repetitions t_min[usec] t_max[usec] t_avg[usec] 1000 33.42 33.45 33.43 #--------------------------------------------------- # Benchmarking Barrier # ( #processes = 8 ) #--------------------------------------------------- #repetitions t_min[usec] t_max[usec] t_avg[usec] 1000 61.93 61.96 61.95 #===================================================== # # Thanks for using PMB2.2 # # The Pallas team kindly requests that you # give us as much feedback for PMB as possible. # # It would be very helpful when you sent the # output tables of your run(s) of PMB to # # ####################### # # # # # pmb@pallas.com # # # # # ####################### # # You might also add # # - personal information (institution, motivation # for using PMB) # - basic information about the machine you used # (number of CPUs, processor type e.t.c.) # #=====================================================