Results of parallel matrix-vector product (2000x2000)
esp3:ce107/work/SP2-tutorial% mpirun -np 1 mxv1x1
mxv achieves 71.7463784360419794 Mflop/s
( 100 iterations in 11.1476009999999999 secs)
blas_mxv achieves 99.5692428890716457 Mflop/s
( 100 iterations in 8.03260100000000143 secs)
pblas_mxv achieves 99.3758876175258337 Mflop/s
( 100 iterations in 8.04823000000000022 secs)
esp3:ce107/work/SP2-tutorial% mpirun -np 2 mxv2x1
mxv achieves 144.098112112943511 Mflop/s
( 100 iterations in 5.55038500000000035 secs)
blas_mxv achieves 196.387819535357920 Mflop/s
( 100 iterations in 4.07255399999999934 secs)
pblas_mxv achieves 191.058016613304318 Mflop/s
( 100 iterations in 4.18616300000000052 secs)
esp3:ce107/work/SP2-tutorial% mpirun -np 2 mxv1x2
mxv achieves 141.535026822285602 Mflop/s
( 100 iterations in 5.65089799999999975 secs)
blas_mxv achieves 189.705882352941160 Mflop/s
( 100 iterations in 4.21600000000000019 secs)
pblas_mxv achieves 192.144836557969569 Mflop/s
( 100 iterations in 4.16248500000000021 secs)
esp3:ce107/work/SP2-tutorial% mpirun -np 4 mxv4x1
mxv achieves 293.794500334458462 Mflop/s
( 100 iterations in 2.72231099999999993 secs)
blas_mxv achieves 357.190228582989107 Mflop/s
( 100 iterations in 2.23914299999999988 secs)
pblas_mxv achieves 333.598053969641626 Mflop/s
( 100 iterations in 2.39749600000000029 secs)
esp3:ce107/work/SP2-tutorial% mpirun -np 4 mxv2x2
mxv achieves 278.894992792248672 Mflop/s
( 100 iterations in 2.86774599999999991 secs)
blas_mxv achieves 374.601125676851041 Mflop/s
( 100 iterations in 2.13507099999999994 secs)
pblas_mxv achieves 368.168987631952461 Mflop/s
( 100 iterations in 2.17237200000000019 secs)
esp3:ce107/work/SP2-tutorial% mpirun -np 4 mxv1x4
mxv achieves 259.983844464778826 Mflop/s
( 100 iterations in 3.07634499999999989 secs)
blas_mxv achieves 316.182595251093915 Mflop/s
( 100 iterations in 2.52955100000000010 secs)
pblas_mxv achieves 356.925460682862081 Mflop/s
( 100 iterations in 2.24080399999999980 secs)
esp3:ce107/work/SP2-tutorial% mpirun -np 5 mxv5x1
mxv achieves 359.216729672568420 Mflop/s
( 100 iterations in 2.22651099999999991 secs)
blas_mxv achieves 477.774352872022462 Mflop/s
( 100 iterations in 1.67401199999999983 secs)
pblas_mxv achieves 422.731741350331333 Mflop/s
( 100 iterations in 1.89198000000000022 secs)
esp3:ce107/work/SP2-tutorial% mpirun -np 5 mxv1x5
mxv achieves 313.590120695732480 Mflop/s
( 100 iterations in 2.55046300000000015 secs)
blas_mxv achieves 400.509575574023586 Mflop/s
( 100 iterations in 1.99695600000000040 secs)
pblas_mxv achieves 431.336679904823484 Mflop/s
( 100 iterations in 1.85423599999999933 secs)
esp3:ce107/work/SP2-tutorial% mpirun -np 8 mxv8x1
mxv achieves 592.582713744318085 Mflop/s
( 100 iterations in 1.34968500000000002 secs)
blas_mxv achieves 718.815383619852810 Mflop/s
( 100 iterations in 1.11266400000000010 secs)
pblas_mxv achieves 588.967355685324378 Mflop/s
( 100 iterations in 1.35796999999999990 secs)
esp3:ce107/work/SP2-tutorial% mpirun -np 8 mxv4x2
mxv achieves 554.555949619860826 Mflop/s
( 100 iterations in 1.44223499999999993 secs)
blas_mxv achieves 675.531290410040128 Mflop/s
( 100 iterations in 1.18395700000000015 secs)
pblas_mxv achieves 628.763698683982966 Mflop/s
( 100 iterations in 1.27201999999999993 secs)
esp3:ce107/work/SP2-tutorial% mpirun -np 8 mxv2x4
mxv achieves 513.695330754794327 Mflop/s
( 100 iterations in 1.55695399999999995 secs)
blas_mxv achieves 661.038172102998942 Mflop/s
( 100 iterations in 1.20991500000000007 secs)
pblas_mxv achieves 672.701749546022938 Mflop/s
( 100 iterations in 1.18893700000000013 secs)
esp3:ce107/work/SP2-tutorial% mpirun -np 8 mxv1x8
mxv achieves 424.600271599773635 Mflop/s
( 100 iterations in 1.88365399999999994 secs)
blas_mxv achieves 532.799689833038769 Mflop/s
( 100 iterations in 1.50112699999999988 secs)
pblas_mxv achieves 608.740657299864438 Mflop/s
( 100 iterations in 1.31386000000000003 secs)
esp3:ce107/work/SP2-tutorial% mpirun -np 10 mxv10x1
mxv achieves 693.866680084776931 Mflop/s
( 100 iterations in 1.15267100000000000 secs)
blas_mxv achieves 926.472939066854678 Mflop/s
( 100 iterations in 0.863274000000000097 secs)
pblas_mxv achieves 685.106346527783558 Mflop/s
( 100 iterations in 1.16741000000000028 secs)
esp3:ce107/work/SP2-tutorial% mpirun -np 10 mxv5x2
mxv achieves 680.478734401441557 Mflop/s
( 100 iterations in 1.17534899999999998 secs)
blas_mxv achieves 890.225550519576359 Mflop/s
( 100 iterations in 0.898424000000000111 secs)
pblas_mxv achieves 785.257810605398276 Mflop/s
( 100 iterations in 1.01851900000000040 secs)
esp3:ce107/work/SP2-tutorial% mpirun -np 10 mxv2x5
mxv achieves 597.735063910119948 Mflop/s
( 100 iterations in 1.33805100000000010 secs)
blas_mxv achieves 780.977657477143339 Mflop/s
( 100 iterations in 1.02410099999999993 secs)
pblas_mxv achieves 800.676741031429287 Mflop/s
( 100 iterations in 0.998905000000000154 secs)
esp3:ce107/work/SP2-tutorial% mpirun -np 10 mxv1x10
mxv achieves 491.453332825371206 Mflop/s
( 100 iterations in 1.62741800000000003 secs)
blas_mxv achieves 595.773272440011056 Mflop/s
( 100 iterations in 1.34245700000000001 secs)
pblas_mxv achieves 701.677428411004712 Mflop/s
( 100 iterations in 1.13984000000000041 secs)
esp3:ce107/work/SP2-tutorial% mpirun -np 16 mxv16x1
mxv achieves 1029.89237508788460 Mflop/s
( 100 iterations in 0.776585999999999999 secs)
blas_mxv achieves 1387.64547896150430 Mflop/s
( 100 iterations in 0.576371999999999884 secs)
pblas_mxv achieves 884.117391511013921 Mflop/s
( 100 iterations in 0.904630999999999963 secs)
esp3:ce107/work/SP2-tutorial% mpirun -np 16 mxv8x2
mxv achieves 1109.28957201228286 Mflop/s
( 100 iterations in 0.721001999999999921 secs)
blas_mxv achieves 1304.94796850373109 Mflop/s
( 100 iterations in 0.612898000000000165 secs)
pblas_mxv achieves 1050.37961018545116 Mflop/s
( 100 iterations in 0.761439000000000199 secs)
esp3:ce107/work/SP2-tutorial% mpirun -np 16 mxv4x4
mxv achieves 1008.24573058736166 Mflop/s
( 100 iterations in 0.793259000000000047 secs)
blas_mxv achieves 1140.61771337380696 Mflop/s
( 100 iterations in 0.701198999999999906 secs)
pblas_mxv achieves 1098.56601285643660 Mflop/s
( 100 iterations in 0.728039999999999798 secs)
esp3:ce107/work/SP2-tutorial% mpirun -np 16 mxv2x8
mxv achieves 848.785985586132483 Mflop/s
( 100 iterations in 0.942286999999999986 secs)
blas_mxv achieves 1019.49637032268754 Mflop/s
( 100 iterations in 0.784505000000000008 secs)
pblas_mxv achieves 1084.06006110206022 Mflop/s
( 100 iterations in 0.737781999999999716 secs)
esp3:ce107/work/SP2-tutorial% mpirun -np 16 mxv1x16
mxv achieves 613.464934013017910 Mflop/s
( 100 iterations in 1.30374199999999996 secs)
blas_mxv achieves 697.934992041551482 Mflop/s
( 100 iterations in 1.14595199999999986 secs)
pblas_mxv achieves 871.402251612769987 Mflop/s
( 100 iterations in 0.917830999999999619 secs)
esp3:ce107/work/SP2-tutorial% mpirun -np 20 mxv20x1
mxv achieves 1400.48223656908112 Mflop/s
( 100 iterations in 0.571088999999999958 secs)
blas_mxv achieves 1730.97774920950269 Mflop/s
( 100 iterations in 0.462050999999999989 secs)
pblas_mxv achieves 967.376737480224392 Mflop/s
( 100 iterations in 0.826771999999999840 secs)
esp3:ce107/work/SP2-tutorial% mpirun -np 20 mxv10x2
mxv achieves 1249.38491459099737 Mflop/s
( 100 iterations in 0.640155000000000030 secs)
blas_mxv achieves 1647.25508974636205 Mflop/s
( 100 iterations in 0.485535000000000050 secs)
pblas_mxv achieves 1203.05411487226365 Mflop/s
( 100 iterations in 0.664808000000000066 secs)
esp3:ce107/work/SP2-tutorial% mpirun -np 20 mxv5x4
mxv achieves 1096.47258818529531 Mflop/s
( 100 iterations in 0.729430000000000023 secs)
blas_mxv achieves 1480.59290359485249 Mflop/s
( 100 iterations in 0.540189000000000141 secs)
pblas_mxv achieves 1289.22646412152676 Mflop/s
( 100 iterations in 0.620372000000000146 secs)
esp3:ce107/work/SP2-tutorial% mpirun -np 20 mxv4x5
mxv achieves 1160.42876957987755 Mflop/s
( 100 iterations in 0.689228000000000063 secs)
blas_mxv achieves 1355.79544067589745 Mflop/s
( 100 iterations in 0.589911999999999992 secs)
pblas_mxv achieves 1269.02023006743389 Mflop/s
( 100 iterations in 0.630249999999999755 secs)
esp3:ce107/work/SP2-tutorial% mpirun -np 20 mxv2x10
mxv achieves 971.719535353314427 Mflop/s
( 100 iterations in 0.823076999999999948 secs)
blas_mxv achieves 1121.80678498142220 Mflop/s
( 100 iterations in 0.712957000000000063 secs)
pblas_mxv achieves 1189.50062687486115 Mflop/s
( 100 iterations in 0.672383000000000175 secs)
esp3:ce107/work/SP2-tutorial% mpirun -np 20 mxv1x20
mxv achieves 665.560450497381112 Mflop/s
( 100 iterations in 1.20169400000000004 secs)
blas_mxv achieves 721.238387412685825 Mflop/s
( 100 iterations in 1.10892599999999986 secs)
pblas_mxv achieves 989.497568320242067 Mflop/s
( 100 iterations in 0.808288999999999813 secs)