#--------------------------------------------------- # PALLAS MPI Benchmark Suite V2.2, MPI-1 part #--------------------------------------------------- # Date : Thu Apr 22 15:13:05 2004 # Machine : i686# System : Linux # Release : 2.4.22-1.2174.nptlsmp # Version : #1 SMP Wed Feb 18 16:22:03 EST 2004 # # Minimum message length in bytes: 0 # Maximum message length in bytes: 4194304 # # MPI_Datatype : MPI_BYTE # MPI_Datatype for reductions : MPI_FLOAT # MPI_Op : MPI_SUM # # # List of Benchmarks to run: # PingPong # PingPing # Sendrecv # Exchange # Allreduce # Reduce # Reduce_scatter # Allgather # Allgatherv # Alltoall # Bcast # Barrier #--------------------------------------------------- # Benchmarking PingPong # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #--------------------------------------------------- #bytes #repetitions t[usec] Mbytes/sec 0 1000 60.09 0.00 1 1000 60.53 0.02 2 1000 60.15 0.03 4 1000 60.35 0.06 8 1000 60.29 0.13 16 1000 60.45 0.25 32 1000 60.91 0.50 64 1000 61.82 0.99 128 1000 63.90 1.91 256 1000 67.20 3.63 512 1000 75.47 6.47 1024 1000 91.05 10.73 2048 1000 113.66 17.18 4096 1000 126.19 30.96 8192 1000 188.86 41.37 16384 1000 284.17 54.99 32768 1000 482.00 64.83 65536 640 962.13 64.96 131072 320 2480.96 50.38 262144 160 3986.78 62.71 524288 80 7401.14 67.56 1048576 40 14879.62 67.21 2097152 20 29041.18 68.87 4194304 10 57777.15 69.23 #--------------------------------------------------- # Benchmarking PingPing # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #--------------------------------------------------- #bytes #repetitions t[usec] Mbytes/sec 0 1000 64.69 0.00 1 1000 64.33 0.01 2 1000 63.78 0.03 4 1000 64.15 0.06 8 1000 64.18 0.12 16 1000 64.57 0.24 32 1000 65.11 0.47 64 1000 66.79 0.91 128 1000 68.30 1.79 256 1000 71.31 3.42 512 1000 83.51 5.85 1024 1000 99.40 9.82 2048 1000 126.10 15.49 4096 1000 148.85 26.24 8192 1000 248.29 31.47 16384 1000 443.35 35.24 32768 1000 739.23 42.27 65536 640 1613.52 38.74 131072 320 3905.41 32.01 262144 160 7804.84 32.03 524288 80 15165.51 32.97 1048576 40 26801.18 37.31 2097152 20 52537.10 38.07 4194304 10 161540.90 24.76 #----------------------------------------------------------------------------- # Benchmarking Sendrecv # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 610.48 611.01 610.75 0.00 1 1000 610.84 610.94 610.89 0.00 2 1000 611.23 611.40 611.31 0.01 4 1000 610.65 610.78 610.72 0.01 8 1000 611.29 611.43 611.36 0.02 16 1000 611.40 611.51 611.46 0.05 32 1000 611.46 611.62 611.54 0.10 64 1000 610.99 611.11 611.05 0.20 128 1000 613.61 613.69 613.65 0.40 256 1000 612.89 612.98 612.94 0.80 512 1000 617.79 617.91 617.85 1.58 1024 1000 622.33 622.50 622.41 3.14 2048 1000 639.19 639.29 639.24 6.11 4096 1000 663.00 664.14 663.57 11.76 8192 1000 715.02 716.21 715.61 21.82 16384 1000 827.07 827.56 827.32 37.76 32768 1000 1334.43 1334.99 1334.71 46.82 65536 640 2393.33 2395.30 2394.32 52.19 131072 320 7909.09 7910.67 7909.88 31.60 262144 160 16803.62 16824.01 16813.82 29.72 524288 80 30562.45 30605.91 30584.18 32.67 1048576 40 53393.67 53480.87 53437.27 37.40 2097152 20 96974.65 97342.35 97158.50 41.09 4194304 10 196076.90 197566.40 196821.65 40.49 #----------------------------------------------------------------------------- # Benchmarking Sendrecv # ( #processes = 4 ) # ( 4 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 308.15 309.22 308.71 0.00 1 1000 308.58 309.70 309.01 0.01 2 1000 308.45 309.57 308.90 0.01 4 1000 308.47 309.56 308.90 0.02 8 1000 308.40 309.47 308.82 0.05 16 1000 784.52 784.61 784.56 0.04 32 1000 308.69 309.78 309.16 0.20 64 1000 309.07 310.19 309.53 0.39 128 1000 404.54 405.58 405.01 0.60 256 1000 407.77 409.37 408.43 1.19 512 1000 315.53 316.61 315.95 3.08 1024 1000 320.67 321.72 321.07 6.07 2048 1000 335.34 335.94 335.75 11.63 4096 1000 360.10 361.31 360.64 21.62 8192 1000 430.67 432.06 431.22 36.16 16384 1000 686.74 688.44 687.65 45.39 32768 1000 969.99 971.95 971.05 64.30 65536 640 2324.90 2329.28 2327.24 53.66 131072 320 6185.41 6205.29 6195.76 40.29 262144 160 17149.26 17190.04 17168.30 29.09 524288 80 23312.12 23486.04 23408.00 42.58 1048576 40 42145.63 42852.00 42538.63 46.67 2097152 20 88972.05 92549.55 91036.55 43.22 4194304 10 224824.60 235933.00 232271.20 33.91 #----------------------------------------------------------------------------- # Benchmarking Sendrecv # ( #processes = 8 ) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 1770.72 1808.77 1785.53 0.00 1 1000 1822.95 1826.15 1824.66 0.00 2 1000 952.19 954.12 953.32 0.00 4 1000 1268.01 1270.19 1269.18 0.01 8 1000 1294.31 1297.14 1295.77 0.01 16 1000 1238.65 1240.93 1239.51 0.02 32 1000 1261.11 1262.41 1261.72 0.05 64 1000 838.36 840.43 839.18 0.15 128 1000 274.22 276.05 275.19 0.88 256 1000 179.64 181.19 180.30 2.69 512 1000 201.64 203.76 202.89 4.79 1024 1000 269.04 270.95 270.11 7.21 2048 1000 322.60 325.41 324.31 12.00 4096 1000 407.26 410.15 408.91 19.05 8192 1000 593.12 595.41 594.39 26.24 16384 1000 853.96 856.37 855.21 36.49 32768 1000 1585.53 1590.96 1588.84 39.28 65536 640 4809.99 4845.72 4830.31 25.80 131072 320 7558.02 7577.38 7570.22 32.99 262144 160 11862.49 11932.34 11902.12 41.90 524288 80 22871.00 23278.77 23106.52 42.96 1048576 40 46158.80 47354.45 46921.66 42.23 2097152 20 90843.90 96796.65 94114.32 41.32 4194304 10 183931.20 205149.10 195070.36 39.00 #----------------------------------------------------------------------------- # Benchmarking Exchange # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 78.66 78.73 78.69 0.00 1 1000 78.34 78.42 78.38 0.05 2 1000 79.32 79.36 79.34 0.10 4 1000 79.01 79.03 79.02 0.19 8 1000 78.87 78.92 78.90 0.39 16 1000 80.31 80.34 80.33 0.76 32 1000 81.98 82.04 82.01 1.49 64 1000 81.73 81.79 81.76 2.98 128 1000 84.50 84.50 84.50 5.78 256 1000 88.46 88.50 88.48 11.03 512 1000 105.04 105.09 105.07 18.58 1024 1000 124.86 124.88 124.87 31.28 2048 1000 166.72 166.74 166.73 46.85 4096 1000 262.95 263.02 262.98 59.41 8192 1000 379.43 379.59 379.51 82.33 16384 1000 720.75 720.85 720.80 86.70 32768 1000 1463.27 1463.31 1463.29 85.42 65536 640 3238.92 3240.45 3239.69 77.15 131072 320 7728.53 7729.59 7729.06 64.69 262144 160 15994.99 15997.11 15996.05 62.51 524288 80 28642.51 28644.49 28643.50 69.82 1048576 40 53597.12 53598.67 53597.90 74.63 2097152 20 109389.00 109408.70 109398.85 73.12 4194304 10 213712.20 213824.30 213768.25 74.83 #----------------------------------------------------------------------------- # Benchmarking Exchange # ( #processes = 4 ) # ( 4 additional processes waiting in MPI_Barrier) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 78.61 78.70 78.64 0.00 1 1000 80.13 80.22 80.16 0.05 2 1000 80.26 80.41 80.31 0.09 4 1000 80.58 80.70 80.62 0.19 8 1000 80.26 80.39 80.33 0.38 16 1000 80.93 81.00 80.98 0.75 32 1000 81.16 81.29 81.25 1.50 64 1000 82.99 83.09 83.06 2.94 128 1000 84.65 84.75 84.72 5.76 256 1000 89.49 89.62 89.58 10.90 512 1000 102.65 102.72 102.69 19.01 1024 1000 121.82 121.99 121.94 32.02 2048 1000 165.97 166.10 166.06 47.03 4096 1000 223.79 223.98 223.91 69.76 8192 1000 441.68 442.20 441.91 70.67 16384 1000 744.54 745.17 744.81 83.87 32768 1000 1674.93 1675.95 1675.38 74.58 65536 640 4308.56 4312.73 4310.85 57.97 131072 320 8175.23 8185.26 8179.96 61.09 262144 160 16863.81 16893.41 16880.70 59.19 524288 80 33073.58 33275.74 33185.28 60.10 1048576 40 61970.85 62599.75 62319.09 63.90 2097152 20 128545.90 131826.15 130520.99 60.69 4194304 10 242547.30 254700.00 249935.05 62.82 #----------------------------------------------------------------------------- # Benchmarking Exchange # ( #processes = 8 ) #----------------------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] Mbytes/sec 0 1000 94.52 94.73 94.65 0.00 1 1000 95.20 95.34 95.26 0.04 2 1000 94.85 94.97 94.93 0.08 4 1000 95.60 95.83 95.71 0.16 8 1000 95.39 95.53 95.46 0.32 16 1000 95.65 95.71 95.69 0.64 32 1000 95.91 96.14 96.05 1.27 64 1000 97.47 97.64 97.56 2.50 128 1000 99.13 99.27 99.21 4.92 256 1000 103.29 103.42 103.35 9.44 512 1000 118.06 118.25 118.16 16.52 1024 1000 138.54 138.86 138.67 28.13 2048 1000 204.36 204.66 204.53 38.17 4096 1000 296.12 296.52 296.33 52.70 8192 1000 583.97 584.67 584.34 53.45 16384 1000 1272.70 1274.02 1273.41 49.06 32768 1000 7313.31 7399.74 7357.28 16.89 65536 640 85265.10 85537.59 85398.29 2.92 131072 320 145579.67 146052.38 145890.88 3.42 262144 160 346835.08 353136.49 350781.95 2.83 524288 80 583991.63 601001.50 592293.13 3.33 1048576 40 1311817.50 1339205.28 1330845.85 2.99 2097152 20 3222245.00 3498070.05 3403568.63 2.29 4194304 10 929159.00 990897.20 967217.36 16.15 #---------------------------------------------------------------- # Benchmarking Allreduce # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.06 0.06 0.06 4 1000 611.19 611.41 611.30 8 1000 611.35 611.47 611.41 16 1000 611.65 612.73 612.19 32 1000 611.70 611.77 611.73 64 1000 610.48 610.59 610.53 128 1000 612.67 612.78 612.72 256 1000 613.97 614.08 614.03 512 1000 617.76 617.84 617.80 1024 1000 623.20 623.29 623.25 2048 1000 639.16 639.18 639.17 4096 1000 663.57 664.72 664.15 8192 1000 718.13 718.29 718.21 16384 1000 837.41 837.82 837.61 32768 1000 1640.48 1641.30 1640.89 65536 640 3685.26 3686.48 3685.87 131072 320 8055.36 8057.93 8056.65 262144 160 16626.14 16640.14 16633.14 524288 80 32830.84 32887.10 32858.97 1048576 40 64795.30 65019.50 64907.40 2097152 20 136817.80 137663.65 137240.72 4194304 10 273378.90 276705.00 275041.95 #---------------------------------------------------------------- # Benchmarking Allreduce # ( #processes = 4 ) # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.06 0.06 0.06 4 1000 818.71 819.83 819.52 8 1000 818.94 820.06 819.73 16 1000 818.85 819.99 819.65 32 1000 819.02 820.15 819.79 64 1000 819.28 820.41 820.06 128 1000 820.62 821.73 821.40 256 1000 823.60 824.71 824.40 512 1000 830.61 831.70 831.36 1024 1000 841.86 843.02 842.43 2048 1000 875.71 876.73 876.47 4096 1000 941.78 943.20 942.31 8192 1000 1026.74 1027.28 1027.07 16384 1000 1631.29 1632.20 1631.88 32768 1000 2890.96 2891.32 2891.13 65536 640 5683.88 5688.56 5686.72 131072 320 8018.85 8023.20 8021.27 262144 160 19552.16 19555.02 19553.97 524288 80 34215.50 34230.46 34219.84 1048576 40 66048.05 66071.65 66064.10 2097152 20 133836.60 133916.10 133868.48 4194304 10 253790.70 254801.80 254316.45 #---------------------------------------------------------------- # Benchmarking Allreduce # ( #processes = 8 ) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.06 0.07 0.06 4 1000 230.74 230.90 230.85 8 1000 229.79 229.93 229.86 16 1000 230.32 230.46 230.38 32 1000 232.54 232.69 232.62 64 1000 236.45 236.59 236.54 128 1000 242.88 243.03 242.97 256 1000 256.40 256.67 256.51 512 1000 289.31 289.64 289.45 1024 1000 347.26 347.55 347.41 2048 1000 463.82 464.08 463.95 4096 1000 624.71 625.03 624.85 8192 1000 1130.73 1131.05 1130.90 16384 1000 2632.23 2632.59 2632.44 32768 1000 5341.18 5342.42 5341.74 65536 640 10530.35 10534.72 10532.91 131072 320 30025.13 30058.20 30042.41 262144 160 62436.83 62572.62 62497.55 524288 80 124390.34 124805.29 124632.72 1048576 40 246293.90 247735.02 246989.34 2097152 20 350416.25 351613.05 351031.56 4194304 10 4619756.30 5000156.70 4848255.41 #---------------------------------------------------------------- # Benchmarking Reduce # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.09 0.11 0.10 4 1000 61.41 61.47 61.44 8 1000 61.90 61.96 61.93 16 1000 61.96 62.02 61.99 32 1000 62.22 62.29 62.26 64 1000 63.42 63.51 63.47 128 1000 65.63 65.70 65.66 256 1000 69.34 69.41 69.37 512 1000 77.85 77.95 77.90 1024 1000 93.56 93.67 93.62 2048 1000 116.75 116.87 116.81 4096 1000 132.03 132.13 132.08 8192 1000 199.86 199.97 199.91 16384 1000 297.04 297.16 297.10 32768 1000 596.21 596.48 596.34 65536 640 1334.97 1335.78 1335.37 131072 320 3305.07 3308.31 3306.69 262144 160 6789.95 6806.97 6798.46 524288 80 12808.11 12866.94 12837.52 1048576 40 24691.42 24906.85 24799.14 2097152 20 48872.30 49749.85 49311.08 4194304 10 94360.30 97845.20 96102.75 #---------------------------------------------------------------- # Benchmarking Reduce # ( #processes = 4 ) # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.09 0.09 0.09 4 1000 1370.85 1371.49 1371.23 8 1000 482.14 483.26 482.93 16 1000 441.61 442.17 441.97 32 1000 433.33 433.91 433.69 64 1000 444.46 445.02 444.82 128 1000 499.52 500.54 500.13 256 1000 538.57 539.73 539.36 512 1000 635.65 636.87 636.29 1024 1000 663.51 665.14 664.49 2048 1000 1050.61 1052.98 1052.26 4096 1000 4370.46 4373.13 4372.17 8192 1000 1758.86 1763.09 1761.81 16384 1000 3252.61 3259.73 3257.59 32768 1000 5950.97 5960.07 5956.82 65536 640 12336.00 12369.12 12356.39 131072 320 32511.45 32614.40 32567.56 262144 160 64231.28 64587.52 64440.82 524288 80 150400.36 151762.07 151238.45 1048576 40 304361.97 309950.05 308020.43 2097152 20 491373.15 513244.00 506045.00 4194304 10 921958.60 1007552.30 977042.23 #---------------------------------------------------------------- # Benchmarking Reduce # ( #processes = 8 ) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.09 0.10 0.09 4 1000 4538.60 4543.60 4541.81 8 1000 4525.54 4531.24 4528.53 16 1000 2950.39 2956.57 2954.38 32 1000 2797.29 2805.89 2801.65 64 1000 3573.84 3581.98 3578.59 128 1000 3916.74 3924.92 3920.28 256 1000 3803.59 3809.24 3806.95 512 1000 2805.06 2814.89 2810.87 1024 1000 2800.47 2808.67 2805.36 2048 1000 3623.46 3633.30 3629.60 4096 1000 3880.74 3896.29 3889.99 8192 1000 7889.09 7914.58 7902.14 16384 1000 10251.99 10282.90 10270.12 32768 1000 18563.46 18618.26 18595.94 65536 640 32793.96 32937.77 32875.85 131072 320 126829.05 127499.03 127286.66 262144 160 164102.88 165732.40 165156.33 524288 80 450904.24 470008.47 463014.27 1048576 40 1110371.52 1134560.17 1127312.17 2097152 20 1311786.95 1495543.85 1410009.04 4194304 10 3609637.80 4621573.60 4053122.62 #---------------------------------------------------------------- # Benchmarking Reduce_scatter # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 611.53 611.54 611.54 4 1000 611.86 611.94 611.90 8 1000 611.71 611.85 611.78 16 1000 611.46 611.60 611.53 32 1000 611.41 611.54 611.47 64 1000 611.51 611.64 611.57 128 1000 611.31 611.46 611.38 256 1000 611.76 611.83 611.79 512 1000 614.09 614.16 614.13 1024 1000 618.00 618.06 618.03 2048 1000 622.54 622.62 622.58 4096 1000 639.13 640.23 639.68 8192 1000 663.58 663.64 663.61 16384 1000 715.82 717.05 716.44 32768 1000 825.25 826.76 826.01 65536 640 1645.95 1647.08 1646.52 131072 320 3341.69 3344.31 3343.00 262144 160 7090.24 7099.93 7095.08 524288 80 16776.55 16804.92 16790.74 1048576 40 32895.77 33002.72 32949.25 2097152 20 64664.35 65085.50 64874.92 4194304 10 139898.00 141619.30 140758.65 #---------------------------------------------------------------- # Benchmarking Reduce_scatter # ( #processes = 4 ) # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 146.26 146.29 146.27 4 1000 150.16 150.27 150.22 8 1000 151.44 151.55 151.48 16 1000 151.77 151.86 151.81 32 1000 154.66 154.76 154.71 64 1000 156.57 156.73 156.66 128 1000 159.41 159.56 159.47 256 1000 165.23 165.30 165.26 512 1000 176.75 176.81 176.79 1024 1000 231.73 231.81 231.77 2048 1000 256.98 257.12 257.07 4096 1000 307.93 308.05 307.99 8192 1000 384.21 384.38 384.31 16384 1000 473.57 473.65 473.62 32768 1000 778.01 778.10 778.07 65536 640 1653.33 1653.76 1653.53 131072 320 3487.17 3488.62 3487.94 262144 160 6970.79 6978.79 6975.48 524288 80 13156.55 13178.39 13166.97 1048576 40 27676.78 27724.78 27708.35 2097152 20 55374.60 55619.25 55495.28 4194304 10 105825.00 107238.00 106665.97 #---------------------------------------------------------------- # Benchmarking Reduce_scatter # ( #processes = 8 ) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 246.04 246.23 246.14 4 1000 249.09 249.23 249.17 8 1000 249.03 249.16 249.09 16 1000 251.02 251.28 251.17 32 1000 253.34 253.57 253.46 64 1000 255.79 256.03 255.91 128 1000 261.84 261.99 261.92 256 1000 272.28 272.47 272.36 512 1000 300.63 300.73 300.67 1024 1000 557.07 557.14 557.11 2048 1000 581.59 581.68 581.63 4096 1000 644.16 644.31 644.24 8192 1000 771.07 771.25 771.20 16384 1000 1038.02 1038.31 1038.17 32768 1000 1373.44 1373.72 1373.55 65536 640 2646.32 2646.89 2646.56 131072 320 6141.77 6143.23 6142.44 262144 160 11748.11 11751.90 11749.60 524288 80 26074.50 26126.70 26101.92 1048576 40 44617.87 44709.65 44672.22 2097152 20 94008.35 94444.85 94221.53 4194304 10 189423.80 190163.10 189818.90 #---------------------------------------------------------------- # Benchmarking Allgather # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.09 0.11 0.10 1 1000 68.77 68.83 68.80 2 1000 68.37 68.43 68.40 4 1000 69.04 69.04 69.04 8 1000 68.70 68.76 68.73 16 1000 68.38 68.43 68.40 32 1000 68.43 68.47 68.45 64 1000 70.17 70.21 70.19 128 1000 72.43 72.47 72.45 256 1000 76.73 76.78 76.75 512 1000 89.39 89.45 89.42 1024 1000 105.44 105.49 105.46 2048 1000 132.80 132.87 132.83 4096 1000 154.91 154.98 154.95 8192 1000 242.61 242.71 242.66 16384 1000 457.50 457.55 457.52 32768 1000 982.63 982.79 982.71 65536 640 2139.52 2139.70 2139.61 131072 320 4249.27 4249.52 4249.40 262144 160 8946.77 8948.96 8947.87 524288 80 16648.27 16648.90 16648.59 1048576 40 31886.08 31889.65 31887.86 2097152 20 63136.70 63159.55 63148.13 4194304 10 123489.80 123588.20 123539.00 #---------------------------------------------------------------- # Benchmarking Allgather # ( #processes = 4 ) # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.09 0.10 0.09 1 1000 139.98 140.11 140.04 2 1000 139.65 139.67 139.67 4 1000 139.65 139.74 139.70 8 1000 140.27 140.34 140.31 16 1000 141.81 141.87 141.84 32 1000 143.33 143.45 143.39 64 1000 146.05 146.14 146.10 128 1000 152.73 152.84 152.76 256 1000 163.95 164.02 163.98 512 1000 191.26 191.35 191.29 1024 1000 237.05 237.23 237.14 2048 1000 303.30 303.49 303.39 4096 1000 431.15 431.38 431.27 8192 1000 692.61 692.89 692.79 16384 1000 1354.26 1354.66 1354.48 32768 1000 2795.82 2796.30 2796.03 65536 640 5689.15 5690.29 5689.94 131072 320 11382.31 11382.96 11382.66 262144 160 21977.51 21987.33 21982.95 524288 80 42208.06 42234.60 42222.23 1048576 40 141867.08 143247.08 142504.70 2097152 20 299044.20 304098.85 301396.23 4194304 10 542434.80 560369.00 550714.85 #---------------------------------------------------------------- # Benchmarking Allgather # ( #processes = 8 ) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.09 0.12 0.10 1 1000 945.39 945.98 945.81 2 1000 949.43 950.67 950.18 4 1000 944.73 945.89 945.58 8 1000 945.59 946.78 945.84 16 1000 946.55 947.17 947.00 32 1000 951.97 953.30 953.02 64 1000 952.66 953.85 953.40 128 1000 963.98 964.63 964.44 256 1000 982.51 983.30 982.71 512 1000 1035.48 1035.70 1035.63 1024 1000 1121.80 1123.06 1122.17 2048 1000 1613.75 1614.99 1614.09 4096 1000 2595.38 2596.20 2595.78 8192 1000 4340.55 4341.92 4341.17 16384 1000 8994.20 8998.84 8996.43 32768 1000 29959.24 30093.08 30018.67 65536 640 338898.73 339076.10 338985.27 131072 320 474769.56 474800.71 474782.84 262144 160 87392.55 87491.77 87443.51 524288 80 172545.33 172915.89 172762.89 1048576 40 424800.42 427868.05 425888.12 2097152 20 973304.25 989851.35 979795.11 4194304 10 4749396.10 5852027.30 5237895.85 #---------------------------------------------------------------- # Benchmarking Allgatherv # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.11 0.11 0.11 1 1000 610.59 610.91 610.75 2 1000 611.44 612.18 611.81 4 1000 611.29 611.61 611.45 8 1000 611.23 611.65 611.44 16 1000 611.05 611.48 611.26 32 1000 612.08 612.46 612.27 64 1000 611.04 611.47 611.25 128 1000 612.37 612.76 612.56 256 1000 614.53 615.28 614.91 512 1000 617.49 617.80 617.65 1024 1000 622.76 623.11 622.94 2048 1000 639.48 640.31 639.89 4096 1000 664.54 664.70 664.62 8192 1000 741.28 742.20 741.74 16384 1000 831.92 833.14 832.53 32768 1000 2251.90 2252.23 2252.06 65536 640 4560.54 4560.72 4560.63 131072 320 8683.90 8684.11 8684.00 262144 160 14202.16 14214.19 14208.18 524288 80 29361.36 29383.21 29372.29 1048576 40 56106.82 56215.85 56161.34 2097152 20 118416.90 118802.50 118609.70 4194304 10 235873.10 237382.00 236627.55 #---------------------------------------------------------------- # Benchmarking Allgatherv # ( #processes = 4 ) # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.11 0.13 0.11 1 1000 1068.09 1069.23 1068.63 2 1000 1304.25 1304.43 1304.35 4 1000 1534.34 1535.50 1534.80 8 1000 1069.91 1070.51 1070.29 16 1000 1068.88 1070.04 1069.69 32 1000 1078.99 1080.25 1079.61 64 1000 1090.72 1091.29 1090.99 128 1000 1108.65 1109.49 1109.14 256 1000 1228.42 1229.92 1229.45 512 1000 1372.86 1374.05 1373.24 1024 1000 1726.21 1727.75 1726.70 2048 1000 2351.85 2352.96 2352.54 4096 1000 3439.06 3443.08 3441.02 8192 1000 6055.78 6061.05 6058.27 16384 1000 11455.14 11468.27 11459.32 32768 1000 28088.85 28115.04 28099.55 65536 640 53373.14 53420.22 53390.17 131072 320 118754.67 118899.22 118823.47 262144 160 252323.93 252824.81 252577.00 524288 80 484155.70 486001.15 485277.24 1048576 40 994810.00 1002455.33 999611.49 2097152 20 1475658.70 1476941.90 1476344.55 4194304 10 337038.00 341460.90 339253.05 #---------------------------------------------------------------- # Benchmarking Allgatherv # ( #processes = 8 ) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.11 0.13 0.12 1 1000 245.68 245.93 245.81 2 1000 241.89 241.99 241.94 4 1000 244.35 244.54 244.44 8 1000 242.92 243.10 243.00 16 1000 245.78 245.91 245.86 32 1000 249.07 249.12 249.09 64 1000 258.44 258.55 258.50 128 1000 279.35 279.55 279.45 256 1000 312.56 312.80 312.66 512 1000 382.55 382.71 382.64 1024 1000 491.39 491.61 491.48 2048 1000 792.95 793.31 793.15 4096 1000 1691.23 1691.83 1691.55 8192 1000 3593.81 3594.85 3594.48 16384 1000 7199.79 7202.18 7201.05 32768 1000 14642.20 14646.00 14644.45 65536 640 23536.10 23547.09 23541.43 131072 320 72288.58 72336.22 72313.01 262144 160 208981.46 211093.40 210396.36 524288 80 2737973.28 2745668.61 2742695.36 1048576 40 4456010.65 4457580.83 4456950.15 2097152 20 674774.80 680295.15 678123.23 4194304 10 1354147.30 1375249.40 1365214.11 #---------------------------------------------------------------- # Benchmarking Alltoall # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 74.11 74.13 74.12 1 1000 74.35 74.44 74.40 2 1000 74.42 74.45 74.44 4 1000 74.37 74.43 74.40 8 1000 74.25 74.31 74.28 16 1000 75.62 75.68 75.65 32 1000 76.24 76.27 76.25 64 1000 78.17 78.19 78.18 128 1000 72.70 72.74 72.72 256 1000 76.33 76.34 76.34 512 1000 87.35 87.40 87.38 1024 1000 104.76 104.84 104.80 2048 1000 130.72 130.81 130.76 4096 1000 151.95 152.05 152.00 8192 1000 238.44 238.53 238.48 16384 1000 425.31 425.39 425.35 32768 1000 1041.53 1041.62 1041.57 65536 640 2250.50 2251.19 2250.85 131072 320 4387.80 4388.01 4387.90 262144 160 8779.78 8782.28 8781.03 524288 80 17141.24 17146.91 17144.08 1048576 40 31755.83 31766.27 31761.05 2097152 20 62477.30 62532.60 62504.95 4194304 10 122287.30 122443.40 122365.35 #---------------------------------------------------------------- # Benchmarking Alltoall # ( #processes = 4 ) # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 144.40 144.45 144.42 1 1000 146.52 146.56 146.54 2 1000 146.54 146.58 146.56 4 1000 146.57 146.65 146.61 8 1000 149.10 149.19 149.15 16 1000 152.95 153.03 152.99 32 1000 158.60 158.71 158.68 64 1000 98.99 99.05 99.02 128 1000 102.01 102.07 102.05 256 1000 106.24 106.27 106.25 512 1000 118.81 118.89 118.85 1024 1000 144.40 144.47 144.44 2048 1000 212.93 213.01 212.98 4096 1000 301.23 301.30 301.27 8192 1000 556.62 556.79 556.70 16384 1000 1508.87 1509.39 1509.08 32768 1000 3628.09 3629.36 3628.60 65536 640 9121.20 9121.72 9121.44 131072 320 26651.15 26662.07 26656.09 262144 160 57652.72 59156.81 58765.63 524288 80 109608.69 109752.86 109679.75 1048576 40 228815.97 229259.40 229039.16 2097152 20 467309.55 470325.60 469110.46 4194304 10 996409.40 1006065.50 1001713.28 #---------------------------------------------------------------- # Benchmarking Alltoall # ( #processes = 8 ) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 1354.11 1355.31 1354.71 1 1000 1301.38 1303.30 1302.20 2 1000 1424.58 1426.86 1425.64 4 1000 1439.21 1440.76 1439.92 8 1000 1486.44 1487.47 1486.96 16 1000 1730.95 1732.21 1731.60 32 1000 1030.63 1032.44 1031.45 64 1000 982.88 984.48 983.66 128 1000 989.01 990.86 990.12 256 1000 1027.46 1028.59 1028.16 512 1000 1298.61 1300.95 1299.71 1024 1000 1392.45 1394.34 1393.64 2048 1000 1464.90 1467.14 1465.79 4096 1000 2019.08 2022.04 2021.06 8192 1000 3436.40 3440.74 3438.50 16384 1000 7442.37 7448.55 7446.52 32768 1000 118388.25 118433.51 118404.49 65536 640 305757.25 305923.65 305848.31 131072 320 215908.84 216239.51 216194.13 262144 160 86887.45 86939.95 86918.86 524288 80 170257.89 170448.96 170378.45 1048576 40 459313.85 461535.78 460510.47 2097152 20 1279813.95 1291017.20 1285504.38 4194304 10 2223055.80 2246387.60 2234376.70 #---------------------------------------------------------------- # Benchmarking Bcast # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.07 0.07 0.07 1 1000 607.25 608.23 607.74 2 1000 607.21 608.26 607.73 4 1000 606.64 607.62 607.13 8 1000 607.18 608.27 607.72 16 1000 606.51 607.54 607.03 32 1000 607.25 608.30 607.78 64 1000 607.39 608.45 607.92 128 1000 607.57 608.62 608.09 256 1000 608.68 609.76 609.22 512 1000 610.68 611.75 611.22 1024 1000 613.01 614.13 613.57 2048 1000 621.75 622.85 622.30 4096 1000 634.72 635.92 635.32 8192 1000 662.96 664.23 663.59 16384 1000 786.62 786.94 786.78 32768 1000 1456.92 1457.46 1457.19 65536 640 1624.85 1626.63 1625.74 131072 320 2627.32 2628.08 2627.70 262144 160 6164.01 6165.49 6164.75 524288 80 11765.85 11768.11 11766.98 1048576 40 22979.48 22984.23 22981.85 2097152 20 42482.30 42489.50 42485.90 4194304 10 83999.30 84048.80 84024.05 #---------------------------------------------------------------- # Benchmarking Bcast # ( #processes = 4 ) # ( 4 additional processes waiting in MPI_Barrier) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.07 0.08 0.07 1 1000 63.85 63.99 63.90 2 1000 63.11 63.21 63.14 4 1000 63.19 63.32 63.24 8 1000 63.29 63.41 63.33 16 1000 63.34 63.44 63.37 32 1000 63.63 63.79 63.70 64 1000 65.15 65.27 65.20 128 1000 67.85 68.04 67.92 256 1000 71.87 71.99 71.91 512 1000 83.34 83.49 83.40 1024 1000 100.90 101.09 100.98 2048 1000 131.12 131.35 131.22 4096 1000 165.96 166.22 166.09 8192 1000 259.26 259.63 259.47 16384 1000 639.98 640.28 640.09 32768 1000 1129.90 1130.06 1130.00 65536 640 19837.82 19871.58 19849.60 131072 320 47730.82 47816.02 47764.23 262144 160 78812.98 79034.57 78912.79 524288 80 156723.14 157135.02 156863.42 1048576 40 294893.63 297729.35 296106.63 2097152 20 588131.95 600192.65 593861.53 4194304 10 1165719.10 1213348.10 1188965.82 #---------------------------------------------------------------- # Benchmarking Bcast # ( #processes = 8 ) #---------------------------------------------------------------- #bytes #repetitions t_min[usec] t_max[usec] t_avg[usec] 0 1000 0.07 0.08 0.07 1 1000 1936.46 1944.93 1940.97 2 1000 1917.07 1927.81 1922.08 4 1000 2130.73 2140.78 2135.07 8 1000 1877.88 1887.37 1882.12 16 1000 1898.35 1910.74 1904.86 32 1000 2016.36 2034.85 2021.99 64 1000 1945.43 1950.96 1947.94 128 1000 1866.14 1874.37 1870.00 256 1000 1872.89 1884.15 1879.78 512 1000 1473.51 1482.60 1477.88 1024 1000 1649.91 1659.96 1655.05 2048 1000 2731.22 2745.83 2738.21 4096 1000 3719.18 3736.23 3727.19 8192 1000 7163.80 7194.96 7181.03 16384 1000 14281.38 14299.69 14293.28 32768 1000 23931.99 23959.59 23949.97 65536 640 48928.37 48973.86 48949.48 131072 320 90586.52 90787.92 90717.47 262144 160 196964.54 197538.24 197227.98 524288 80 374444.95 376159.80 375162.24 1048576 40 965020.77 985582.85 972216.05 2097152 20 1691953.30 1731348.25 1711582.34 4194304 10 3155059.70 3308119.40 3237432.63 #--------------------------------------------------- # Benchmarking Barrier # ( #processes = 2 ) # ( 6 additional processes waiting in MPI_Barrier) #--------------------------------------------------- #repetitions t_min[usec] t_max[usec] t_avg[usec] 1000 65.20 65.27 65.24 #--------------------------------------------------- # Benchmarking Barrier # ( #processes = 4 ) # ( 4 additional processes waiting in MPI_Barrier) #--------------------------------------------------- #repetitions t_min[usec] t_max[usec] t_avg[usec] 1000 978.16 979.28 978.47 #--------------------------------------------------- # Benchmarking Barrier # ( #processes = 8 ) #--------------------------------------------------- #repetitions t_min[usec] t_max[usec] t_avg[usec] 1000 3446.32 3448.86 3447.20 #===================================================== # # Thanks for using PMB2.2 # # The Pallas team kindly requests that you # give us as much feedback for PMB as possible. # # It would be very helpful when you sent the # output tables of your run(s) of PMB to # # ####################### # # # # # pmb@pallas.com # # # # # ####################### # # You might also add # # - personal information (institution, motivation # for using PMB) # - basic information about the machine you used # (number of CPUs, processor type e.t.c.) # #=====================================================