Back to CFM home             Brown University



Results of parallel matrix-vector product (2000x2000)

esp3:ce107/work/SP2-tutorial% mpirun -np 1 mxv1x1
 mxv achieves  71.7463784360419794  Mflop/s
 ( 100  iterations in  11.1476009999999999  secs)
 blas_mxv achieves  99.5692428890716457  Mflop/s
 ( 100  iterations in  8.03260100000000143  secs)
 pblas_mxv achieves  99.3758876175258337  Mflop/s
 ( 100  iterations in  8.04823000000000022  secs)

esp3:ce107/work/SP2-tutorial% mpirun -np 2 mxv2x1
 mxv achieves  144.098112112943511  Mflop/s
 ( 100  iterations in  5.55038500000000035  secs)
 blas_mxv achieves  196.387819535357920  Mflop/s
 ( 100  iterations in  4.07255399999999934  secs)
 pblas_mxv achieves  191.058016613304318  Mflop/s
 ( 100  iterations in  4.18616300000000052  secs)

esp3:ce107/work/SP2-tutorial% mpirun -np 2 mxv1x2
 mxv achieves  141.535026822285602  Mflop/s
 ( 100  iterations in  5.65089799999999975  secs)
 blas_mxv achieves  189.705882352941160  Mflop/s
 ( 100  iterations in  4.21600000000000019  secs)
 pblas_mxv achieves  192.144836557969569  Mflop/s
 ( 100  iterations in  4.16248500000000021  secs)

esp3:ce107/work/SP2-tutorial% mpirun -np 4 mxv4x1
 mxv achieves  293.794500334458462  Mflop/s
 ( 100  iterations in  2.72231099999999993  secs)
 blas_mxv achieves  357.190228582989107  Mflop/s
 ( 100  iterations in  2.23914299999999988  secs)
 pblas_mxv achieves  333.598053969641626  Mflop/s
 ( 100  iterations in  2.39749600000000029  secs)


esp3:ce107/work/SP2-tutorial% mpirun -np 4 mxv2x2
 mxv achieves  278.894992792248672  Mflop/s
 ( 100  iterations in  2.86774599999999991  secs)
 blas_mxv achieves  374.601125676851041  Mflop/s
 ( 100  iterations in  2.13507099999999994  secs)
 pblas_mxv achieves  368.168987631952461  Mflop/s
 ( 100  iterations in  2.17237200000000019  secs)

esp3:ce107/work/SP2-tutorial% mpirun -np 4 mxv1x4
 mxv achieves  259.983844464778826  Mflop/s
 ( 100  iterations in  3.07634499999999989  secs)
 blas_mxv achieves  316.182595251093915  Mflop/s
 ( 100  iterations in  2.52955100000000010  secs)
 pblas_mxv achieves  356.925460682862081  Mflop/s
 ( 100  iterations in  2.24080399999999980  secs)

esp3:ce107/work/SP2-tutorial% mpirun -np 5 mxv5x1
 mxv achieves  359.216729672568420  Mflop/s
 ( 100  iterations in  2.22651099999999991  secs)
 blas_mxv achieves  477.774352872022462  Mflop/s
 ( 100  iterations in  1.67401199999999983  secs)
 pblas_mxv achieves  422.731741350331333  Mflop/s
 ( 100  iterations in  1.89198000000000022  secs)

esp3:ce107/work/SP2-tutorial% mpirun -np 5 mxv1x5
 mxv achieves  313.590120695732480  Mflop/s
 ( 100  iterations in  2.55046300000000015  secs)
 blas_mxv achieves  400.509575574023586  Mflop/s
 ( 100  iterations in  1.99695600000000040  secs)
 pblas_mxv achieves  431.336679904823484  Mflop/s
 ( 100  iterations in  1.85423599999999933  secs)

esp3:ce107/work/SP2-tutorial% mpirun -np 8 mxv8x1
 mxv achieves  592.582713744318085  Mflop/s
 ( 100  iterations in  1.34968500000000002  secs)
 blas_mxv achieves  718.815383619852810  Mflop/s
 ( 100  iterations in  1.11266400000000010  secs)
 pblas_mxv achieves  588.967355685324378  Mflop/s
 ( 100  iterations in  1.35796999999999990  secs)

esp3:ce107/work/SP2-tutorial% mpirun -np 8 mxv4x2
 mxv achieves  554.555949619860826  Mflop/s
 ( 100  iterations in  1.44223499999999993  secs)
 blas_mxv achieves  675.531290410040128  Mflop/s
 ( 100  iterations in  1.18395700000000015  secs)
 pblas_mxv achieves  628.763698683982966  Mflop/s
 ( 100  iterations in  1.27201999999999993  secs)

esp3:ce107/work/SP2-tutorial% mpirun -np 8 mxv2x4
 mxv achieves  513.695330754794327  Mflop/s
 ( 100  iterations in  1.55695399999999995  secs)
 blas_mxv achieves  661.038172102998942  Mflop/s
 ( 100  iterations in  1.20991500000000007  secs)
 pblas_mxv achieves  672.701749546022938  Mflop/s
 ( 100  iterations in  1.18893700000000013  secs)

esp3:ce107/work/SP2-tutorial% mpirun -np 8 mxv1x8
 mxv achieves  424.600271599773635  Mflop/s
 ( 100  iterations in  1.88365399999999994  secs)
 blas_mxv achieves  532.799689833038769  Mflop/s
 ( 100  iterations in  1.50112699999999988  secs)
 pblas_mxv achieves  608.740657299864438  Mflop/s
 ( 100  iterations in  1.31386000000000003  secs)

esp3:ce107/work/SP2-tutorial% mpirun -np 10 mxv10x1
 mxv achieves  693.866680084776931  Mflop/s
 ( 100  iterations in  1.15267100000000000  secs)
 blas_mxv achieves  926.472939066854678  Mflop/s
 ( 100  iterations in  0.863274000000000097  secs)
 pblas_mxv achieves  685.106346527783558  Mflop/s
 ( 100  iterations in  1.16741000000000028  secs)

esp3:ce107/work/SP2-tutorial% mpirun -np 10 mxv5x2
 mxv achieves  680.478734401441557  Mflop/s
 ( 100  iterations in  1.17534899999999998  secs)
 blas_mxv achieves  890.225550519576359  Mflop/s
 ( 100  iterations in  0.898424000000000111  secs)
 pblas_mxv achieves  785.257810605398276  Mflop/s
 ( 100  iterations in  1.01851900000000040  secs)

esp3:ce107/work/SP2-tutorial% mpirun -np 10 mxv2x5
 mxv achieves  597.735063910119948  Mflop/s
 ( 100  iterations in  1.33805100000000010  secs)
 blas_mxv achieves  780.977657477143339  Mflop/s
 ( 100  iterations in  1.02410099999999993  secs)
 pblas_mxv achieves  800.676741031429287  Mflop/s
 ( 100  iterations in  0.998905000000000154  secs)

esp3:ce107/work/SP2-tutorial% mpirun -np 10 mxv1x10
 mxv achieves  491.453332825371206  Mflop/s
 ( 100  iterations in  1.62741800000000003  secs)
 blas_mxv achieves  595.773272440011056  Mflop/s
 ( 100  iterations in  1.34245700000000001  secs)
 pblas_mxv achieves  701.677428411004712  Mflop/s
 ( 100  iterations in  1.13984000000000041  secs)

esp3:ce107/work/SP2-tutorial% mpirun -np 16 mxv16x1
 mxv achieves  1029.89237508788460  Mflop/s
 ( 100  iterations in  0.776585999999999999  secs)
 blas_mxv achieves  1387.64547896150430  Mflop/s
 ( 100  iterations in  0.576371999999999884  secs)
 pblas_mxv achieves  884.117391511013921  Mflop/s
 ( 100  iterations in  0.904630999999999963  secs)

esp3:ce107/work/SP2-tutorial% mpirun -np 16 mxv8x2
 mxv achieves  1109.28957201228286  Mflop/s
 ( 100  iterations in  0.721001999999999921  secs)
 blas_mxv achieves  1304.94796850373109  Mflop/s
 ( 100  iterations in  0.612898000000000165  secs)
 pblas_mxv achieves  1050.37961018545116  Mflop/s
 ( 100  iterations in  0.761439000000000199  secs)

esp3:ce107/work/SP2-tutorial% mpirun -np 16 mxv4x4
 mxv achieves  1008.24573058736166  Mflop/s
 ( 100  iterations in  0.793259000000000047  secs)
 blas_mxv achieves  1140.61771337380696  Mflop/s
 ( 100  iterations in  0.701198999999999906  secs)
 pblas_mxv achieves  1098.56601285643660  Mflop/s
 ( 100  iterations in  0.728039999999999798  secs)

esp3:ce107/work/SP2-tutorial% mpirun -np 16 mxv2x8
 mxv achieves  848.785985586132483  Mflop/s
 ( 100  iterations in  0.942286999999999986  secs)
 blas_mxv achieves  1019.49637032268754  Mflop/s
 ( 100  iterations in  0.784505000000000008  secs)
 pblas_mxv achieves  1084.06006110206022  Mflop/s
 ( 100  iterations in  0.737781999999999716  secs)

esp3:ce107/work/SP2-tutorial% mpirun -np 16 mxv1x16
 mxv achieves  613.464934013017910  Mflop/s
 ( 100  iterations in  1.30374199999999996  secs)
 blas_mxv achieves  697.934992041551482  Mflop/s
 ( 100  iterations in  1.14595199999999986  secs)
 pblas_mxv achieves  871.402251612769987  Mflop/s
 ( 100  iterations in  0.917830999999999619  secs)

esp3:ce107/work/SP2-tutorial% mpirun -np 20 mxv20x1
 mxv achieves  1400.48223656908112  Mflop/s
 ( 100  iterations in  0.571088999999999958  secs)
 blas_mxv achieves  1730.97774920950269  Mflop/s
 ( 100  iterations in  0.462050999999999989  secs)
 pblas_mxv achieves  967.376737480224392  Mflop/s
 ( 100  iterations in  0.826771999999999840  secs)

esp3:ce107/work/SP2-tutorial% mpirun -np 20 mxv10x2
 mxv achieves  1249.38491459099737  Mflop/s
 ( 100  iterations in  0.640155000000000030  secs)
 blas_mxv achieves  1647.25508974636205  Mflop/s
 ( 100  iterations in  0.485535000000000050  secs)
 pblas_mxv achieves  1203.05411487226365  Mflop/s
 ( 100  iterations in  0.664808000000000066  secs)

esp3:ce107/work/SP2-tutorial% mpirun -np 20 mxv5x4
 mxv achieves  1096.47258818529531  Mflop/s
 ( 100  iterations in  0.729430000000000023  secs)
 blas_mxv achieves  1480.59290359485249  Mflop/s
 ( 100  iterations in  0.540189000000000141  secs)
 pblas_mxv achieves  1289.22646412152676  Mflop/s
 ( 100  iterations in  0.620372000000000146  secs)

esp3:ce107/work/SP2-tutorial% mpirun -np 20 mxv4x5
 mxv achieves  1160.42876957987755  Mflop/s
 ( 100  iterations in  0.689228000000000063  secs)
 blas_mxv achieves  1355.79544067589745  Mflop/s
 ( 100  iterations in  0.589911999999999992  secs)
 pblas_mxv achieves  1269.02023006743389  Mflop/s
 ( 100  iterations in  0.630249999999999755  secs)

esp3:ce107/work/SP2-tutorial% mpirun -np 20 mxv2x10
 mxv achieves  971.719535353314427  Mflop/s
 ( 100  iterations in  0.823076999999999948  secs)
 blas_mxv achieves  1121.80678498142220  Mflop/s
 ( 100  iterations in  0.712957000000000063  secs)
 pblas_mxv achieves  1189.50062687486115  Mflop/s
 ( 100  iterations in  0.672383000000000175  secs)

esp3:ce107/work/SP2-tutorial% mpirun -np 20 mxv1x20
 mxv achieves  665.560450497381112  Mflop/s
 ( 100  iterations in  1.20169400000000004  secs)
 blas_mxv achieves  721.238387412685825  Mflop/s
 ( 100  iterations in  1.10892599999999986  secs)
 pblas_mxv achieves  989.497568320242067  Mflop/s
 ( 100  iterations in  0.808288999999999813  secs)