file: Hlib/src/Solve_cg.c

/*---------------------------------------------------------------------------* * RCS Information * * * * $Source: /users/sjs/Nektar/lib/src/RCS/Solve_cg.c,v $ * $Revision: 1.2 $ * $Date: 1994/10/19 18:23:54 $ * $Author: sjs $ * $State: Exp $ *---------------------------------------------------------------------------*/ #include <stdio.h> #include <stdlib.h> #include <math.h> #include <veclib.h> #include "hotel.h" //-------------------------------------------------------------------------- // WARNING THE FULL ITERATIVE SOLVER ASSUMES THAT THE SYSTEM IS NOT SINGULAR // i.e. THERE MUST BE AT LEAST ONE FIXED DEGREE OF FREEDOM //-------------------------------------------------------------------------- /* Private functions */ static void setup_RHS (Element_List *U, Element_List *F, double *r, double *u0, Bndry *Ubc, Bsystem *B, SolveType Stype); static void precon_cg(Element_List *E, Bsystem *B, SolveType Stype); static void fill_helm_diag(Element_List *U, Metric *lambda); static void fill_mass_diag(Element_List *U); void A_cg(Element_List *U, Element_List *Uf, Bsystem *B, double *p, double *w, SolveType Stype); /* ------------------------------------------------------------------------ * * Solve_CG() - Conjugate Gradient Solver * * * * This routine solves the algebraic system * * * * A u = - B f * * * * using the conjugate gradient method. The inputs are the initial guess * * "Uo" , the force "f", the boundary conditions and the global matrix * * system. * * * * ------------------------------------------------------------------------ */ double *mult_cg; static void scatter_modes(Element_List *U, double *u0, Bsystem *B); static void gather_modes(Element_List *U, double *u0, Bsystem *B); static void solve_system_CG (Element_List *U, Element_List *Uf, double *rhs, double *uo, Bsystem *B, SolveType Stype); int Ntot; void Solve_CG(Element_List *U, Element_List *Uf, Bndry *Ubc, Bsystem *Ubsys, SolveType Stype){ int i; Element *E; Ntot = Ubsys->nglobal; for(E=U->fhead;E;E = E->next) Ntot += E->Nmodes-E->Nbmodes; double *RHS = dvector(0,Ntot-1); double *U0 = dvector(0,Ntot-1); mult_cg = dvector(0, Ubsys->nsolve-1); dzero(Ubsys->nsolve,mult_cg,1); for(E=U->fhead;E;E = E->next) for(i=0;i<E->Nbmodes;++i) if(Ubsys->bmap[E->id][i] < Ubsys->nsolve) mult_cg[Ubsys->bmap[E->id][i]] += 1.; #ifdef PARALLEL DO_PARALLEL parallel_gather(mult_cg, Ubsys); #endif dzero(Ntot, RHS, 1); dzero(Ntot, U0, 1); #if 1 if(!Ubsys->Pmat){ setup_signchange(U,Ubsys); precon_cg(U,Ubsys,Stype); } #endif setup_RHS (U,Uf,RHS,U0,Ubc,Ubsys,Stype); solve_system_CG(U,Uf,RHS,U0,Ubsys,Stype); U->Set_state('t'); free(RHS); free(U0); } double full_tol; static double full_tolv[3]; static int full_toli=0; static void Set_tolerance(Element_List *E_L, Element_List *Ef_L){ Element *E = E_L->fhead; full_tol += ddot(Ef_L->hjtot, Ef_L->base_hj, 1, Ef_L->base_hj,1); #ifdef PARALLEL /* gather full_tolerances together to make global full_tolerance */ DO_PARALLEL{ double tmp; gdsum(&full_tol,1,&tmp); } #endif full_tol = sqrt(full_tol); if(E->type != 'p'){ full_tolv[full_toli] = full_tol; if(E->dim() == 2){ //OVERLOAD CALL: dim: Pyr.h(?), Quad.h(?), Tet.h(?), Tri.h(?), Hex.h(?), Prism.h(?) full_tol = sqrt(full_tolv[0]*full_tolv[0] + full_tolv[1]*full_tolv[1])/2.; full_toli = (++full_toli)%2; } else{ full_tol = sqrt(full_tolv[0]*full_tolv[0] + full_tolv[1]*full_tolv[1] +full_tolv[2]*full_tolv[2])/3.; full_toli = (++full_toli)%3; } } /* for those wierd cases make sure full_tol is not less than 10e-3 */ full_tol = (full_tol < 10e-3)? 1.0: full_tol; } /* ------------------------------------------------------------------------ * * Setup_RHS() - Compute the RHS of the algebraic system * * * * This routine computes the initial residual of the discrete algebraic * * system, defined as: * * * * r = -(w,f) - (w,h) + a(w,g) * * b * * * * where (.,.)_b is the L2-inner product defined over the boundary. * * It also returns the dsaveraged initial field in uo * * ------------------------------------------------------------------------ */ static void setup_RHS (Element_List *U, Element_List *F, double *r, double *u0, Bndry *Ubc, Bsystem *B, SolveType Stype) { int i; Bndry *bc; Element *E; #if 1 i = B->nglobal; SignChange(U,B); for(E=U->fhead;E;E=E->next){ dscatr(E->Nbmodes,E->vert->hj,B->bmap[E->id],u0); dcopy (E->Nmodes-E->Nbmodes, E->vert[0].hj +E->Nbmodes, 1, u0+i, 1); i+= E->Nmodes-E->Nbmodes; } SignChange(U,B); #else gather_modes(U, u0, B); dvdiv(B->nsolve, u0, 1, mult_cg, 1, u0, 1); #endif if(F->fhead->state == 'p') F->Iprod(F); //OVERLOAD CALL: Iprod: InnerProd.c(Element), Element_List.c(Element_List), InnerProd.c(Tri), InnerProd.c(Quad), InnerProd.c(Tet), InnerProd.c(Pyr), InnerProd.c(Prism), InnerProd.c(Hex) if(Stype == Helm) dneg (F->hjtot, F->base_hj, 1); for(bc = Ubc; bc; bc = bc->next) if(bc->type == 'F' || bc->type == 'R') F->flist[bc->elmt->id]->Add_flux_terms(bc); //OVERLOAD CALL: Add_flux_terms: Boundary.c(Tri), Boundary.c(Quad), Boundary.c(Tet), Boundary.c(Pyr), Boundary.c(Prism), Boundary.c(Hex), Boundary.c(Element) Set_tolerance(U,F); /* set solver tolerances */ //OVERLOAD CALL: Set_tolerance: Solve.c(?), Solve_cg.c(?) /* compute r = F - A u */ U->Trans(U,J_to_Q); //OVERLOAD CALL: Trans: Element_List.c(Element_List), Transforms.c(Element), Transforms.c(Tri), Transforms.c(Quad), Transforms.c(Tet), Transforms.c(Pyr), Transforms.c(Prism), Transforms.c(Hex) if(Stype == Helm) U->HelmHoltz (B->lambda); //OVERLOAD CALL: HelmHoltz: InnerProd.c(Quad), InnerProd.c(Tet), InnerProd.c(Pyr), InnerProd.c(Prism), InnerProd.c(Hex), InnerProd.c(Element), Element_List.c(Element_List), InnerProd.c(Tri) else U->Iprod(U); //OVERLOAD CALL: Iprod: InnerProd.c(Element), Element_List.c(Element_List), InnerProd.c(Tri), InnerProd.c(Quad), InnerProd.c(Tet), InnerProd.c(Pyr), InnerProd.c(Prism), InnerProd.c(Hex) dvsub(U->hjtot, F->base_hj, 1, U->base_hj, 1, U->base_hj, 1); gather_modes(U, r, B); dzero(B->nglobal-B->nsolve, r+B->nsolve, 1); // mask knowns return; } static void full_gddot(double *alpha, double *r, double *s, Bsystem *B){ *alpha = 0.0; #ifdef PARALLEL int i; double tmp = 0.0; DO_PARALLEL{ for(i=0;i<B->nsolve;++i) *alpha += B->pll->mult[i]*r[i]*s[i]; *alpha += ddot(Ntot-B->nglobal, r+B->nglobal, 1, s+B->nglobal, 1); gdsum(alpha,1,&tmp); } else #endif *alpha += ddot(Ntot, r, 1, s, 1); } void PreconFullOverlap(Bsystem *B, double *pin, double *zout); #ifdef Lanczos #define MAX_lanc_iter 990 #endif static void solve_system_CG (Element_List *U, Element_List *Uf, double *rhs, double *uo, Bsystem *B, SolveType Stype){ int iter = 0; double tolcg, alpha, beta, eps, rtz, rtz_old, epsfac, epsloc; double *u = (double*)0; double *r = (double*)0; double *w = (double*)0; double *z = (double*)0; double *p = (double*)0; double *PC = (double*)0; int MAX_ITERATIONS; //OVERLOAD CALL: MAX_ITERATIONS: Solve.c(?), SolveR.c(?), Solve_Stokes.c(?) /* Temporary arrays */ u = dvector(0,Ntot-1); /* Solution */ p = dvector(0,Ntot-1); /* Solution */ r = dvector(0,Ntot-1); /* residual */ z = dvector(0,Ntot-1); /* precondition solution */ w = dvector(0,Ntot-1); /* A*Search direction */ dcopy (Ntot, rhs, 1, r, 1); dcopy (Ntot, rhs, 1, p, 1); dzero (Ntot, w, 1); dzero (Ntot, u, 1); dzero (Ntot, z, 1); PC = B->Pmat->info.diag.idiag; tolcg = (U->fhead->type == 'p')? dparam("TOLCGP"):dparam("TOLCG"); epsfac = (full_tol)? 1.0/full_tol : 1.0; full_gddot(&epsloc, r, r, B); eps = sqrt(epsloc)*epsfac; if (option("verbose") > 1) printf("\tField %c: %3d iterations, error = %#14.6g %lg %lg\n", U->fhead->type, iter, eps/epsfac, epsfac, tolcg); #ifdef PARALLEL DO_PARALLEL{ if(B->pll->nsolve == B->pll->nglobal){ // i.e. singular solve fprintf(stderr, "singular full iterative solve\n"); exit(-1); } MAX_ITERATIONS = B->pll->nsolve*10; //OVERLOAD CALL: MAX_ITERATIONS: Solve.c(?), SolveR.c(?), Solve_Stokes.c(?) } else #endif { MAX_ITERATIONS = Ntot*10; //OVERLOAD CALL: MAX_ITERATIONS: Solve.c(?), SolveR.c(?), Solve_Stokes.c(?) if(B->nsolve == B->nglobal){ // i.e. singular solve fprintf(stderr, "singular full iterative solve\n"); exit(-1); } } #ifdef Lanczos double *beta_l,*alpha_l;/* define the vectors to be used in the Lanczos Algorithm */ double *D_l, *E_l; /* define additional vectors for the Lanczos Alg. */ int info =0; alpha_l = dvector(0,MAX_ITERATIONS-1); /* Lanczos Alg */ //OVERLOAD CALL: MAX_ITERATIONS: Solve.c(?), SolveR.c(?), Solve_Stokes.c(?) beta_l = dvector(0,MAX_ITERATIONS-1); /* Lanczos Alg */ //OVERLOAD CALL: MAX_ITERATIONS: Solve.c(?), SolveR.c(?), Solve_Stokes.c(?) dzero (MAX_ITERATIONS,alpha_l,1); /* zero the matrices */ //OVERLOAD CALL: MAX_ITERATIONS: Solve.c(?), SolveR.c(?), Solve_Stokes.c(?) dzero (MAX_ITERATIONS,beta_l,1); /* zero the matrices */ //OVERLOAD CALL: MAX_ITERATIONS: Solve.c(?), SolveR.c(?), Solve_Stokes.c(?) #endif /* =========================================================== * * ---- Conjugate Gradient Iteration ---- * * =========================================================== */ while (eps > tolcg && iter++ < MAX_ITERATIONS ){ //OVERLOAD CALL: MAX_ITERATIONS: Solve.c(?), SolveR.c(?), Solve_Stokes.c(?) if(B->Precon == Pre_Diag) dvmul(Ntot,PC,1,r,1,z,1); else if(B->Precon == Pre_Overlap) PreconFullOverlap(B, r, z); full_gddot(&rtz, r, z, B); if (iter > 1) { /* Update search direction */ beta = rtz / rtz_old; #ifdef Lanczos if (iter <= MAX_lanc_iter) beta_l[iter] = beta; //OVERLOAD CALL: MAX_lanc_iter: Solve.c(?), Solve_cg.c(?) #endif dsvtvp(Ntot, beta, p, 1, z, 1, p, 1); } else dcopy(Ntot, z, 1, p, 1); A_cg(U,Uf,B,p,w,Stype); // need to do global sum ?? full_gddot(&alpha, p, w, B); alpha = rtz/alpha; #ifdef Lanczos if (iter <= MAX_lanc_iter) alpha_l[iter] = alpha; //OVERLOAD CALL: MAX_lanc_iter: Solve.c(?), Solve_cg.c(?) #endif daxpy(Ntot, alpha, p, 1, u, 1); /* Update solution... */ daxpy(Ntot,-alpha, w, 1, r, 1); /* ...and residual */ rtz_old = rtz; full_gddot(&epsloc, r, r, B); eps = sqrt(epsloc)*epsfac; #ifdef PARALLEL ROOTONLY #endif if (option("verbose") > 1) printf("\tField %c: %3d iterations, error = %#14.6g\n", U->fhead->type, iter, eps/epsfac); } /* =========================================================== * * End of Loop * * =========================================================== */ #ifdef Lanczos /* Start to set up the Lanczos matrix, T, which is a tria-diagonal matrix and call the Lapack routine "dsterf" to compute all the eigenvalues of a tria-diagonal matrix */ if (iter > MAX_lanc_iter){ //OVERLOAD CALL: MAX_lanc_iter: Solve.c(?), Solve_cg.c(?) iter = MAX_lanc_iter; //OVERLOAD CALL: MAX_lanc_iter: Solve.c(?), Solve_cg.c(?) printf("Lanczos: Conj.Gradient not fully converged, stop at 990 iter.\n"); }; D_l = dvector(0, iter-1); /* allocate memory */ E_l = dvector(0, iter-1); dzero (iter, D_l, 1); dzero (iter, E_l, 1); D_l[0] = 1.0/alpha_l[1]; int i; for (i=1; i < iter; ++i) D_l[i] = beta_l[i+1]/alpha_l[i] + 1.0/alpha_l[i+1]; for (i=0;i<iter-1;++i) E_l[i] = -1.0*sqrt(beta_l[i+2])/(alpha_l[i+1]); dsterf(iter,D_l,E_l,info); if (info) error_msg("Lanczos Method, dsterf -- info is not zero!!"); //OVERLOAD CALL: error_msg: gen_utils.h(?), hotel.h(?) /* print maximum and minimum eigenvalues */ printf("Minimum and Maximum Eigenvalues are:%.8f,%.8f\n",D_l[0],D_l[iter-1]); printf("Condition number(MAXeig/MINeig) is:%.8f\n",(D_l[iter-1])/D_l[0]); #endif /* Save solution and clean up */ dvadd(Ntot, u, 1, uo, 1, p, 1); scatter_modes(U, p, B); if (iter > MAX_ITERATIONS){ //OVERLOAD CALL: MAX_ITERATIONS: Solve.c(?), SolveR.c(?), Solve_Stokes.c(?) error_msg (Bsolve_CG failed to converge); //OVERLOAD CALL: error_msg: gen_utils.h(?), hotel.h(?) } else if (option("verbose") > 1) printf("\tField %c: %3d iterations, error = %#14.6g %lg %lg\n", U->fhead->type, iter, eps/epsfac, epsfac, tolcg); /* Free temporary vectors */ free(u); free(r); free(w); free(z); free(p); return; } static void precon_cg(Element_List *U, Bsystem *B, SolveType Stype){ int ltot; double *p; double *store; ltot = U->hjtot; store = dvector(0,ltot-1); dzero(ltot, store, 1); p = dvector(0,Ntot-1); dzero(Ntot, p, 1); B->Pmat = (MatPre *) calloc(1,sizeof(MatPre)); //OVERLOAD CALL: MatPre: nekstruct.h(?), nekstruct.h(?); MatPre: nekstruct.h(?), nekstruct.h(?) B->Pmat->info.diag.ndiag = Ntot; B->Pmat->info.diag.idiag = p; // store contents of E dcopy(ltot, U->base_hj,1,store,1); if(Stype == Helm) fill_helm_diag(U, B->lambda); else fill_mass_diag(U); // do "double sigchange since coefficients are all +ve SignChange(U,B); gather_modes(U, p, B); dvrecp(Ntot,p,1,p,1); dcopy (ltot, store, 1, U->base_hj, 1); free(store); return; } static void fill_mass_diag(Element_List *U){ Element *E; for(E=U->fhead;E;E=E->next) E->fill_diag_massmat(); //OVERLOAD CALL: fill_diag_massmat: Matrix.c(Tri), Matrix.c(Quad), Matrix.c(Tet), Matrix.c(Pyr), Matrix.c(Prism), Matrix.c(Hex), Matrix.c(Element) } static void fill_helm_diag(Element_List *U, Metric *lambda){ Element *E; for(E=U->fhead;E;E=E->next) E->fill_diag_helmmat(lambda+E->id); //OVERLOAD CALL: fill_diag_helmmat: Matrix.c(Tri), Matrix.c(Quad), Matrix.c(Tet), Matrix.c(Pyr), Matrix.c(Prism), Matrix.c(Hex), Matrix.c(Element) } static void gather_modes(Element_List *U, double *u0, Bsystem *B){ register int i; Element *E = U->fhead; double *sc = B->signchange; dzero (Ntot, u0, 1); for(;E;E = E->next){ for(i = 0; i < E->Nbmodes; ++i) u0[B->bmap[E->id][i]] += sc[i]*E->vert[0].hj[i]; sc += E->Nbmodes; } #ifdef PARALLEL DO_PARALLEL parallel_gather(u0,B); #endif u0 += B->nglobal; for(E=U->fhead;E;E=E->next){ dcopy(E->Nmodes-E->Nbmodes, E->vert[0].hj+E->Nbmodes, 1, u0, 1); u0 += E->Nmodes-E->Nbmodes; } } static void scatter_modes(Element_List *U, double *p, Bsystem *B){ Element *E = U->fhead; int cnt = B->nglobal; double *sc = B->signchange; dzero(U->hjtot, U->base_hj, 1); for(;E;E = E->next){ dgathr(E->Nbmodes, p, B->bmap[E->id], E->vert[0].hj); dvmul (E->Nbmodes, sc, 1, E->vert[0].hj, 1, E->vert[0].hj, 1); sc += E->Nbmodes; dcopy(E->Nmodes-E->Nbmodes, p+cnt, 1, E->vert[0].hj+E->Nbmodes, 1); cnt += E->Nmodes-E->Nbmodes; } } void A_cg(Element_List *U, Element_List *Uf, Bsystem *B, double *p, double *w, SolveType Stype){ scatter_modes(U, p, B); #if 1 U->Trans(U,J_to_Q); //OVERLOAD CALL: Trans: Element_List.c(Element_List), Transforms.c(Element), Transforms.c(Tri), Transforms.c(Quad), Transforms.c(Tet), Transforms.c(Pyr), Transforms.c(Prism), Transforms.c(Hex) if(Stype == Helm) U->HelmHoltz (B->lambda); //OVERLOAD CALL: HelmHoltz: InnerProd.c(Quad), InnerProd.c(Tet), InnerProd.c(Pyr), InnerProd.c(Prism), InnerProd.c(Hex), InnerProd.c(Element), Element_List.c(Element_List), InnerProd.c(Tri) else U->Iprod(U); //OVERLOAD CALL: Iprod: InnerProd.c(Element), Element_List.c(Element_List), InnerProd.c(Tri), InnerProd.c(Quad), InnerProd.c(Tet), InnerProd.c(Pyr), InnerProd.c(Prism), InnerProd.c(Hex) #else Element *E; for(E=U->fhead;E;E=E->next){ E->Trans(E, J_to_Q); //OVERLOAD CALL: Trans: Element_List.c(Element_List), Transforms.c(Element), Transforms.c(Tri), Transforms.c(Quad), Transforms.c(Tet), Transforms.c(Pyr), Transforms.c(Prism), Transforms.c(Hex) E->HelmHoltz(B->lambda+E->id); //OVERLOAD CALL: HelmHoltz: InnerProd.c(Quad), InnerProd.c(Tet), InnerProd.c(Pyr), InnerProd.c(Prism), InnerProd.c(Hex), InnerProd.c(Element), Element_List.c(Element_List), InnerProd.c(Tri) } #endif gather_modes(U, w, B); dzero(B->nglobal-B->nsolve, w+B->nsolve, 1); }


Back to Source File Index


C++ to HTML Conversion by ctoohtml