diff --git a/src/include/sz.h b/src/include/sz.h index 22382f0f..05f5a91e 100644 --- a/src/include/sz.h +++ b/src/include/sz.h @@ -17,14 +17,14 @@ #include "Common.h" int omp_sz( - long unsigned int ib, - long unsigned int ihfbit, - struct BindStruct *X, - long unsigned int *list_1_, - long unsigned int *list_2_1_, - long unsigned int *list_2_2_, - long unsigned int *list_jb_ - ); + long unsigned int ib, + long unsigned int ihfbit, + struct BindStruct *X, + long unsigned int *list_1_, + long unsigned int *list_2_1_, + long unsigned int *list_2_2_, + long unsigned int *list_jb_ +); int omp_sz_hacker( long unsigned int ib, @@ -76,8 +76,17 @@ int omp_sz_spin_hacker( long unsigned int *list_2_1_, long unsigned int *list_2_2_, long unsigned int *list_jb_ - ); +); +int omp_sz_Kondo_hacker( + long unsigned int ib, + long unsigned int ihfbit, + struct BindStruct *X, + long unsigned int *list_1_, + long unsigned int *list_2_1_, + long unsigned int *list_2_2_, + long unsigned int *list_jb_ +); int omp_sz_GeneralSpin( @@ -106,11 +115,25 @@ int sz( long unsigned int *list_2_2_ ); -int Read_sz -( - struct BindStruct *X, - const long unsigned int irght, - const long unsigned int ilft, - const long unsigned int ihfbit, - long unsigned int *i_max - ); +int Read_sz( + struct BindStruct *X, + const long unsigned int irght, + const long unsigned int ilft, + const long unsigned int ihfbit, + long unsigned int *i_max +); + +/*[s] func. for refactoring */ +int count_localized_spins(struct BindStruct *X); +void calculate_jb_GeneralSpin(struct BindStruct *X, long unsigned int *list_jb, long int *list_2_1_Sz,long int *list_2_2_Sz, long unsigned int ihfbit,long unsigned int ilftdim,unsigned int N); +void calculate_jb_Spin_m1(struct BindStruct *X, long unsigned int *list_jb, long unsigned int *list_1_, long unsigned int *list_2_1,long unsigned int *list_2_2,\ +long unsigned int ihfbit,long unsigned int irght,long unsigned int ilft,long unsigned int ibpatn, unsigned int N); +void calculate_jb_Spin_Old(struct BindStruct *X, long unsigned int *list_jb, long unsigned int ihfbit,unsigned int N); +void calculate_jb_Spin_Hacker(struct BindStruct *X, long unsigned int *list_jb, long unsigned int ihfbit,unsigned int N); +void calculate_jb_Kondo(struct BindStruct *X, long unsigned int *list_jb, long unsigned int ihfbit); +void calculate_jb_KondoGC(struct BindStruct *X, int num_loc, long unsigned int *list_jb, long unsigned int ihfbit); +void calculate_jb_Hubbard(struct BindStruct *X,long unsigned int *list_jb, long unsigned int ihfbit, unsigned int N2); +void calculate_jb_Hubbard_Hacker(struct BindStruct *X,long unsigned int *list_jb, long unsigned int ihfbit, unsigned int N2); +void calculate_jb_HubbardNCoserved(struct BindStruct *X,long unsigned int *list_jb, long unsigned int ihfbit, unsigned int N2); +void calculate_jb_HubbardNCoserved_Hacker(struct BindStruct *X,long unsigned int *list_jb, long unsigned int ihfbit, unsigned int N2); +/*[e] func. for refactoring */ diff --git a/src/sz.c b/src/sz.c index 028f5491..7e62423b 100644 --- a/src/sz.c +++ b/src/sz.c @@ -36,15 +36,6 @@ * */ -int omp_sz_Kondo_hacker( - long unsigned int ib, - long unsigned int ihfbit, - struct BindStruct *X, - long unsigned int *list_1_, - long unsigned int *list_2_1_, - long unsigned int *list_2_2_, - long unsigned int *list_jb_ -); /** * @@ -59,776 +50,359 @@ int omp_sz_Kondo_hacker( * @author Takahiro Misawa (The University of Tokyo) * @author Kazuyoshi Yoshimi (The University of Tokyo) */ -int sz -( - struct BindStruct *X, - long unsigned int *list_1_, - long unsigned int *list_2_1_, - long unsigned int *list_2_2_ - ) -{ - FILE *fp,*fp_err; - char sdt[D_FileNameMax],sdt_err[D_FileNameMax]; - long unsigned int *HilbertNumToSz; - long unsigned int i,icnt; - long unsigned int ib,jb,ib_start,ib_end, sdim_div, sdim_rest; - - long unsigned int j; - long unsigned int div; - long unsigned int num_up,num_down; - long unsigned int irght,ilft,ihfbit; - long unsigned int *jbthread; - - //*[s] for omp parall - int mythread; - unsigned int all_up,all_down,tmp_res,num_threads; - long unsigned int tmp_1,tmp_2,tmp_3; - long int **comb, **comb2; - //*[e] for omp parall - - // [s] for Kondo - unsigned int N_all_up, N_all_down; - unsigned int all_loc; - long unsigned int num_loc, div_down; - unsigned int num_loc_up; - int icheck_loc; - int ihfSpinDown=0; - // [e] for Kondo - - long unsigned int i_max=0; - double idim=0.0; - long unsigned int div_up; - - // [s] for general spin - long int *list_2_1_Sz = NULL; - long int *list_2_2_Sz = NULL; - if(X->Def.iFlgGeneralSpin==TRUE){ - list_2_1_Sz = li_1d_allocate(X->Check.sdim+2); - list_2_2_Sz = li_1d_allocate((X->Def.Tpow[X->Def.Nsite-1]*X->Def.SiteToBit[X->Def.Nsite-1]/X->Check.sdim)+2); - for(j=0; jCheck.sdim+2;j++){ - list_2_1_Sz[j]=0; - } - for(j=0; j< (X->Def.Tpow[X->Def.Nsite-1]*X->Def.SiteToBit[X->Def.Nsite-1]/X->Check.sdim)+2; j++){ - list_2_2_Sz[j]=0; - } - } - // [e] for general spin - - long unsigned int *list_jb; - list_jb = lui_1d_allocate(X->Large.SizeOflistjb); - for(i=0; iLarge.SizeOflistjb; i++){ - list_jb[i]=0; - } - -//hacker - int hacker; - long unsigned int tmp_i,tmp_j,tmp_pow,max_tmp_i; - long unsigned int ia,ja; - long unsigned int ibpatn=0; -//hacker - - int iSpnup, iMinup,iAllup; - unsigned int N2=0; - unsigned int N=0; - fprintf(stdoutMPI, "%s", cProStartCalcSz); - TimeKeeper(X, cFileNameSzTimeKeep, cInitalSz, "w"); - TimeKeeper(X, cFileNameTimeKeep, cInitalSz, "a"); - - if(X->Check.idim_max!=0){ - switch(X->Def.iCalcModel){ - case HubbardGC: - case HubbardNConserved: - case Hubbard: - N2=2*X->Def.Nsite; - idim = pow(2.0,N2); - break; - case KondoGC: - case Kondo: - N2 = 2*X->Def.Nsite; - N = X->Def.Nsite; - idim = pow(2.0,N2); - for(j=0;jDef.LocSpn[j]); - } - break; - case SpinGC: - case Spin: - N=X->Def.Nsite; - if(X->Def.iFlgGeneralSpin==FALSE){ - idim = pow(2.0, N); - } - else{ - idim=1; - for(j=0; jDef.SiteToBit[j]; - } - } - break; - } - comb = li_2d_allocate(X->Def.Nsite+1,X->Def.Nsite+1); - i_max=X->Check.idim_max; - - switch(X->Def.iCalcModel){ - case HubbardNConserved: - case Hubbard: - case KondoGC: - case Kondo: - case Spin: - if(X->Def.iFlgGeneralSpin==FALSE){ - if(GetSplitBitByModel(X->Def.Nsite, X->Def.iCalcModel, &irght, &ilft, &ihfbit)!=0){ - exitMPI(-1); - } - X->Large.irght=irght; - X->Large.ilft=ilft; - X->Large.ihfbit=ihfbit; - //fprintf(stdoutMPI, "idim=%lf irght=%ld ilft=%ld ihfbit=%ld \n",idim,irght,ilft,ihfbit); - } - else{ - ihfbit=X->Check.sdim; - //fprintf(stdoutMPI, "idim=%lf ihfbit=%ld \n",idim, ihfbit); - } - break; - default: - break; - } - - icnt=1; - jb=0; - - if(X->Def.READ==1){ - if(Read_sz(X, irght, ilft, ihfbit, &i_max)!=0){ - exitMPI(-1); - } - } - else{ - sprintf(sdt, cFileNameSzTimeKeep, X->Def.CDataFileHead); -#ifdef _OPENMP - num_threads = omp_get_max_threads(); -#else - num_threads=1; -#endif - childfopenMPI(sdt,"a", &fp); - fprintf(fp, "num_threads==%d\n",num_threads); - fclose(fp); +int sz( + struct BindStruct *X, + long unsigned int *list_1_, + long unsigned int *list_2_1_, + long unsigned int *list_2_2_ +){ + FILE *fp,*fp_err; + char sdt[D_FileNameMax],sdt_err[D_FileNameMax]; + long unsigned int *HilbertNumToSz; + long unsigned int i,icnt; + long unsigned int ib,jb,ib_start,ib_end, sdim_div, sdim_rest; - //*[s] omp parallel - - TimeKeeper(X, cFileNameSzTimeKeep, cOMPSzStart, "a"); - TimeKeeper(X, cFileNameTimeKeep, cOMPSzStart, "a"); - switch(X->Def.iCalcModel){ - case HubbardGC: - icnt = X->Def.Tpow[2*X->Def.Nsite-1]*2+0;/*Tpow[2*X->Def.Nsit]=1*/ - break; + long unsigned int j; + long unsigned int div; + long unsigned int num_up,num_down; + long unsigned int irght,ilft,ihfbit; + long unsigned int *jbthread; + + /*[s] for omp parall*/ + int mythread; + unsigned int all_up,all_down,tmp_res,num_threads; + long unsigned int tmp_1,tmp_2,tmp_3; + long int **comb, **comb2; + /*[e] for omp parall*/ + + /*[s] for Kondo*/ + unsigned int N_all_up, N_all_down; + unsigned int all_loc; + long unsigned int num_loc, div_down; + unsigned int num_loc_up; + int icheck_loc; + int ihfSpinDown=0; + /*[e] for Kondo*/ - case SpinGC: - if(X->Def.iFlgGeneralSpin==FALSE){ - icnt = X->Def.Tpow[X->Def.Nsite-1]*2+0;/*Tpow[X->Def.Nsit]=1*/ - } - else{ - icnt = X->Def.Tpow[X->Def.Nsite-1]*X->Def.SiteToBit[X->Def.Nsite-1]; - } - break; - - case KondoGC: - // this part can not be parallelized - jb = 0; - num_loc=0; - for(j=X->Def.Nsite/2; j< X->Def.Nsite ;j++){ // counting # of localized spins - if(X->Def.LocSpn[j] != ITINERANT){ // //ITINERANT ==0 -> itinerant - num_loc += 1; - } - } + long unsigned int i_max=0; + double idim=0.0; + long unsigned int div_up; - for(ib=0;ibCheck.sdim;ib++){ - list_jb[ib]=jb; - i=ib*ihfbit; - icheck_loc=1; - for(j=(X->Def.Nsite+1)/2; j< X->Def.Nsite ;j++){ - div_up = i & X->Def.Tpow[2*j]; - div_up = div_up/X->Def.Tpow[2*j]; - div_down = i & X->Def.Tpow[2*j+1]; - div_down = div_down/X->Def.Tpow[2*j+1]; - if(X->Def.LocSpn[j] != ITINERANT){ - if(X->Def.Nsite%2==1 && j==(X->Def.Nsite/2)){ - icheck_loc= icheck_loc; - } - else{ - icheck_loc = icheck_loc*(div_up^div_down);// exclude doubllly ocupited site - } - } + /*[s] for general spin*/ + long int *list_2_1_Sz = NULL; + long int *list_2_2_Sz = NULL; + if(X->Def.iFlgGeneralSpin==TRUE){ + list_2_1_Sz = li_1d_allocate(X->Check.sdim+2); + list_2_2_Sz = li_1d_allocate((X->Def.Tpow[X->Def.Nsite-1]*X->Def.SiteToBit[X->Def.Nsite-1]/X->Check.sdim)+2); + for(j=0; jCheck.sdim+2;j++){ + list_2_1_Sz[j]=0; } - if(icheck_loc == 1){ - if(X->Def.Nsite%2==1 && X->Def.LocSpn[X->Def.Nsite/2] != ITINERANT){ - jb +=X->Def.Tpow[X->Def.Nsite-1-(X->Def.NLocSpn-num_loc)]; - }else{ - jb +=X->Def.Tpow[X->Def.Nsite-(X->Def.NLocSpn-num_loc)]; - } + for(j=0; j< (X->Def.Tpow[X->Def.Nsite-1]*X->Def.SiteToBit[X->Def.Nsite-1]/X->Check.sdim)+2; j++){ + list_2_2_Sz[j]=0; } - } + } + /*[e] for general spin*/ - icnt = 0; -#pragma omp parallel for default(none) reduction(+:icnt) private(ib) firstprivate(ihfbit, N2, X) shared(list_1_, list_2_1_, list_2_2_, list_jb) - for(ib=0;ibCheck.sdim;ib++){ - icnt+=omp_sz_KondoGC(ib, ihfbit, X, list_1_, list_2_1_, list_2_2_, list_jb); - } - break; + long unsigned int *list_jb; + list_jb = lui_1d_allocate(X->Large.SizeOflistjb); + for(i=0; iLarge.SizeOflistjb; i++){ + list_jb[i]=0; + } - case Hubbard: - hacker = X->Def.read_hacker; - if(hacker==0){ - // this part can not be parallelized - jb = 0; - for(ib=0;ibCheck.sdim;ib++){ // sdim = 2^(N/2) - list_jb[ib] = jb; - i = ib*ihfbit; - //[s] counting # of up and down electrons - num_up = 0; - for(j=0;j<=N2-2;j+=2){ // even -> up spin - div=i & X->Def.Tpow[j]; - div=div/X->Def.Tpow[j]; - num_up+=div; - } - num_down=0; - for(j=1;j<=N2-1;j+=2){ // odd -> down spin - div=i & X->Def.Tpow[j]; - div=div/X->Def.Tpow[j]; - num_down+=div; - } - //[e] counting # of up and down electrons - tmp_res = X->Def.Nsite%2; // even Ns-> 0, odd Ns -> 1 - all_up = (X->Def.Nsite+tmp_res)/2; - all_down = (X->Def.Nsite-tmp_res)/2; - - tmp_1 = Binomial(all_up,X->Def.Nup-num_up,comb,all_up); - tmp_2 = Binomial(all_down,X->Def.Ndown-num_down,comb,all_down); - jb += tmp_1*tmp_2; + /*hacker*/ + int hacker; + long unsigned int tmp_i,tmp_j,tmp_pow,max_tmp_i; + long unsigned int ia,ja; + long unsigned int ibpatn=0; + /*hacker*/ + + int iSpnup, iMinup,iAllup; + unsigned int N2 = 0; + unsigned int N = 0; + fprintf(stdoutMPI, "%s", cProStartCalcSz); + TimeKeeper(X, cFileNameSzTimeKeep, cInitalSz, "w"); + TimeKeeper(X, cFileNameTimeKeep, cInitalSz, "a"); + + if(X->Check.idim_max!=0){ + /*[s] calculating the maximum size of Hilbert dimensions*/ + switch(X->Def.iCalcModel){ + case HubbardGC: + case HubbardNConserved: + case Hubbard: + N2=2*X->Def.Nsite; + idim = pow(2.0,N2); + break; + case KondoGC: + case Kondo: + N2 = 2*X->Def.Nsite; + N = X->Def.Nsite; + idim = pow(2.0,N2); + for(j=0;jDef.LocSpn[j]); + } + break; + case SpinGC: + case Spin: + N=X->Def.Nsite; + if(X->Def.iFlgGeneralSpin==FALSE){ + idim = pow(2.0, N); + }else{ + idim=1; + for(j=0; jDef.SiteToBit[j]; + } + } + break; } + /*[e] calculating the maximum size of Hilbert dimensions*/ - //#pragma omp barrier - TimeKeeper(X, cFileNameSzTimeKeep, cOMPSzMid, "a"); - TimeKeeper(X, cFileNameTimeKeep, cOMPSzMid, "a"); - - icnt = 0; - for(ib=0;ibCheck.sdim;ib++){ - icnt+=omp_sz(ib,ihfbit, X, list_1_, list_2_1_, list_2_2_, list_jb); + comb = li_2d_allocate(X->Def.Nsite+1,X->Def.Nsite+1); /*comb=Binomial coefficient */ + i_max = X->Check.idim_max; /*i_max, idim_max : Hilbert dimensions*/ + + /*[s] calculating irght, ilht, ihfbit*/ + switch(X->Def.iCalcModel){ + case HubbardNConserved: + case Hubbard: + case KondoGC: + case Kondo: + case Spin: + if(X->Def.iFlgGeneralSpin==FALSE){ + if(GetSplitBitByModel(X->Def.Nsite, X->Def.iCalcModel, &irght, &ilft, &ihfbit)!=0){ + exitMPI(-1); + } + X->Large.irght=irght; + X->Large.ilft=ilft; + X->Large.ihfbit=ihfbit; + //fprintf(stdoutMPI, "idim=%lf irght=%ld ilft=%ld ihfbit=%ld \n",idim,irght,ilft,ihfbit); + }else{ + ihfbit=X->Check.sdim; + //fprintf(stdoutMPI, "idim=%lf ihfbit=%ld \n",idim, ihfbit); + } + break; + default: + break; } - break; - }else if(hacker==1){ - jbthread = lui_1d_allocate(nthreads); - #pragma omp parallel default(none) \ - shared(X,list_jb,ihfbit,N2,nthreads,jbthread) \ - private(ib,i,j,num_up,num_down,div,tmp_res,tmp_1,tmp_2,jb,all_up,all_down, \ - comb2,mythread,sdim_div,sdim_rest,ib_start,ib_end) - { - jb = 0; -#ifdef _OPENMP - mythread = omp_get_thread_num(); -#else - mythread = 0; -#endif - comb2 = li_2d_allocate(X->Def.Nsite+1,X->Def.Nsite+1); - // - // explict loop decomposition is nessesary to fix the asignment to each thread - // - sdim_div = X->Check.sdim / nthreads; - sdim_rest = X->Check.sdim % nthreads; - if(mythread < sdim_rest){ - ib_start = sdim_div*mythread + mythread; - ib_end = ib_start + sdim_div + 1; - } - else{ - ib_start = sdim_div*mythread + sdim_rest; - ib_end = ib_start + sdim_div; - } - // - for(ib=ib_start;ibDef.Tpow[j]; - div=div/X->Def.Tpow[j]; - num_up+=div; - } - num_down=0; - for(j=1;j<=N2-1;j+=2){ - div=i & X->Def.Tpow[j]; - div=div/X->Def.Tpow[j]; - num_down+=div; - } - - tmp_res = X->Def.Nsite%2; // even Ns-> 0, odd Ns -> 1 - all_up = (X->Def.Nsite+tmp_res)/2; - all_down = (X->Def.Nsite-tmp_res)/2; + /*[e] calculating irght, ilht, ihfbit*/ + + icnt=1; + jb=0; - tmp_1 = Binomial(all_up,X->Def.Nup-num_up,comb2,all_up); - tmp_2 = Binomial(all_down,X->Def.Ndown-num_down,comb2,all_down); - jb += tmp_1*tmp_2; - } - free_li_2d_allocate(comb2); - if(mythread != nthreads-1) jbthread[mythread+1] = jb; - #pragma omp barrier - #pragma omp single - { - jbthread[0] = 0; - for(j=1;jDef.READ==1){ + if(Read_sz(X, irght, ilft, ihfbit, &i_max)!=0){ + exitMPI(-1); } - } - for(ib=ib_start;ibCheck.sdim;ib++){ - icnt+=omp_sz_hacker(ib,ihfbit,X,list_1_, list_2_1_, list_2_2_, list_jb); - } - break; - } - else{ - fprintf(stderr, "Error: CalcHS in ModPara file must be 0 or 1 for Hubbard model."); - return -1; - } - - case HubbardNConserved: - hacker = X->Def.read_hacker; - if(hacker==0){ - // this part can not be parallelized - jb = 0; - iSpnup=0; - iMinup=0; - iAllup=X->Def.Ne; - if(X->Def.Ne > X->Def.Nsite){ - iMinup = X->Def.Ne-X->Def.Nsite; - iAllup = X->Def.Nsite; - } - for(ib=0;ibCheck.sdim;ib++){ - list_jb[ib]=jb; - i=ib*ihfbit; - num_up=0; - for(j=0;j<=N2-2;j+=2){ - div=i & X->Def.Tpow[j]; - div=div/X->Def.Tpow[j]; - num_up+=div; - } - num_down=0; - for(j=1;j<=N2-1;j+=2){ - div=i & X->Def.Tpow[j]; - div=div/X->Def.Tpow[j]; - num_down+=div; - } - tmp_res = X->Def.Nsite%2; // even Ns-> 0, odd Ns -> 1 - all_up = (X->Def.Nsite+tmp_res)/2; - all_down = (X->Def.Nsite-tmp_res)/2; - - for(iSpnup=iMinup; iSpnup<= iAllup; iSpnup++){ - tmp_1 = Binomial(all_up, iSpnup-num_up,comb,all_up); - tmp_2 = Binomial(all_down, X->Def.Ne-iSpnup-num_down,comb,all_down); - jb += tmp_1*tmp_2; - } - } - //#pragma omp barrier - TimeKeeper(X, cFileNameSzTimeKeep, cOMPSzMid, "a"); - TimeKeeper(X, cFileNameTimeKeep, cOMPSzMid, "a"); - - icnt = 0; -#pragma omp parallel for default(none) reduction(+:icnt) private(ib) firstprivate(ihfbit, N2, X) shared(list_1_, list_2_1_, list_2_2_, list_jb) - for(ib=0;ibCheck.sdim;ib++){ - icnt+=omp_sz(ib,ihfbit, X,list_1_, list_2_1_, list_2_2_, list_jb); - } - break; - } - else if(hacker==1){ - iMinup=0; - iAllup=X->Def.Ne; - if(X->Def.Ne > X->Def.Nsite){ - iMinup = X->Def.Ne-X->Def.Nsite; - iAllup = X->Def.Nsite; - } - jbthread = lui_1d_allocate(nthreads); - #pragma omp parallel default(none) \ - shared(X,iMinup,iAllup,list_jb,ihfbit,N2,nthreads,jbthread) \ - private(ib,i,j,num_up,num_down,div,tmp_res,tmp_1,tmp_2,iSpnup,jb,all_up,all_down,comb2, \ - mythread,sdim_rest,sdim_div,ib_start,ib_end) - { - jb = 0; - iSpnup=0; + }else{ + sprintf(sdt, cFileNameSzTimeKeep, X->Def.CDataFileHead); #ifdef _OPENMP - mythread = omp_get_thread_num(); + num_threads = omp_get_max_threads(); #else - mythread = 0; + num_threads = 1; #endif - comb2 = li_2d_allocate(X->Def.Nsite+1,X->Def.Nsite+1); - // - // explict loop decomposition is nessesary to fix the asignment to each thread - // - sdim_div = X->Check.sdim / nthreads; - sdim_rest = X->Check.sdim % nthreads; - if(mythread < sdim_rest){ - ib_start = sdim_div*mythread + mythread; - ib_end = ib_start + sdim_div + 1; - } - else{ - ib_start = sdim_div*mythread + sdim_rest; - ib_end = ib_start + sdim_div; - } - // - for(ib=ib_start;ibDef.Tpow[j]; - div=div/X->Def.Tpow[j]; - num_up+=div; - } - num_down=0; - for(j=1;j<=N2-1;j+=2){ - div=i & X->Def.Tpow[j]; - div=div/X->Def.Tpow[j]; - num_down+=div; - } - tmp_res = X->Def.Nsite%2; // even Ns-> 0, odd Ns -> 1 - all_up = (X->Def.Nsite+tmp_res)/2; - all_down = (X->Def.Nsite-tmp_res)/2; - - for(iSpnup=iMinup; iSpnup<= iAllup; iSpnup++){ - tmp_1 = Binomial(all_up, iSpnup-num_up,comb2,all_up); - tmp_2 = Binomial(all_down, X->Def.Ne-iSpnup-num_down,comb2,all_down); - jb += tmp_1*tmp_2; - } - } - free_li_2d_allocate(comb2); - if(mythread != nthreads-1) jbthread[mythread+1] = jb; - #pragma omp barrier - #pragma omp single - { - jbthread[0] = 0; - for(j=1;jCheck.sdim;ib++){ - icnt+=omp_sz_hacker(ib,ihfbit, X,list_1_, list_2_1_, list_2_2_, list_jb); - } - - break; - } - else{ - fprintf(stderr, "Error: CalcHS in ModPara file must be 0 or 1 for Hubbard model."); - return -1; - } - - case Kondo: - // this part can not be parallelized - N_all_up = X->Def.Nup; - N_all_down = X->Def.Ndown; - fprintf(stdoutMPI, cStateNupNdown, N_all_up,N_all_down); - - jb = 0; - num_loc=0; - for(j=X->Def.Nsite/2; j< X->Def.Nsite ;j++){// counting localized # of spins - if(X->Def.LocSpn[j] != ITINERANT){ - num_loc += 1; - } - } - - for(ib=0;ibCheck.sdim;ib++){ //sdim = 2^(N/2) - list_jb[ib] = jb; - i = ib*ihfbit; // ihfbit=pow(2,((Nsite+1)/2)) - num_up = 0; - num_down = 0; - icheck_loc = 1; - - for(j=X->Def.Nsite/2; j< X->Def.Nsite ;j++){ - div_up = i & X->Def.Tpow[2*j]; - div_up = div_up/X->Def.Tpow[2*j]; - div_down = i & X->Def.Tpow[2*j+1]; - div_down = div_down/X->Def.Tpow[2*j+1]; - if(X->Def.LocSpn[j] == ITINERANT){ - num_up += div_up; - num_down += div_down; - }else{ - num_up += div_up; - num_down += div_down; - if(X->Def.Nsite%2==1 && j==(X->Def.Nsite/2)){ // odd site - icheck_loc= icheck_loc; - ihfSpinDown=div_down; - if(div_down ==0){ - num_up += 1; - } - }else{ - icheck_loc = icheck_loc*(div_up^div_down);// exclude empty or doubly occupied site - } - } - } - - if(icheck_loc == 1){ // itinerant of local spins without holon or doublon - tmp_res = X->Def.Nsite%2; // even Ns-> 0, odd Ns -> 1 - all_loc = X->Def.NLocSpn-num_loc; // # of local spins - all_up = (X->Def.Nsite+tmp_res)/2-all_loc; - all_down = (X->Def.Nsite-tmp_res)/2-all_loc; - if(X->Def.Nsite%2==1 && X->Def.LocSpn[X->Def.Nsite/2] != ITINERANT){ - all_up = (X->Def.Nsite)/2-all_loc; - all_down = (X->Def.Nsite)/2-all_loc; + childfopenMPI(sdt,"a", &fp); + fprintf(fp, "num_threads==%d\n",num_threads); + fclose(fp); + + //*[s] omp parallel + + TimeKeeper(X, cFileNameSzTimeKeep, cOMPSzStart, "a"); + TimeKeeper(X, cFileNameTimeKeep, cOMPSzStart, "a"); + switch(X->Def.iCalcModel){ + case HubbardGC: + icnt = X->Def.Tpow[2*X->Def.Nsite-1]*2+0;/*Tpow[2*X->Def.Nsit]=1*/ + break; + case SpinGC: + if(X->Def.iFlgGeneralSpin==FALSE){ + icnt = X->Def.Tpow[X->Def.Nsite-1]*2+0;/*Tpow[X->Def.Nsit]=1*/ + }else{ + icnt = X->Def.Tpow[X->Def.Nsite-1]*X->Def.SiteToBit[X->Def.Nsite-1]; + } + break; + case KondoGC: + /*[s] this part can not be simply parallelized*/ + num_loc = count_localized_spins(X); + calculate_jb_KondoGC(X,num_loc,list_jb,ihfbit); + /*[e] this part can not be simply parallelized*/ + icnt = 0; + #pragma omp parallel for default(none) \ + reduction(+:icnt) private(ib) firstprivate(ihfbit, N2, X) \ + shared(list_1_, list_2_1_, list_2_2_, list_jb) + for(ib=0;ibCheck.sdim;ib++){ + icnt+=omp_sz_KondoGC(ib, ihfbit, X, list_1_, list_2_1_, list_2_2_, list_jb); + } + break; + + case Hubbard: + hacker = X->Def.read_hacker; + if(hacker==0){ + calculate_jb_Hubbard(X,list_jb,ihfbit,N2); + TimeKeeper(X, cFileNameSzTimeKeep, cOMPSzMid, "a"); + TimeKeeper(X, cFileNameTimeKeep, cOMPSzMid, "a"); + + icnt = 0; + for(ib=0;ibCheck.sdim;ib++){ + icnt += omp_sz(ib,ihfbit, X, list_1_, list_2_1_, list_2_2_, list_jb); + } + break; + }else if(hacker==1){ + calculate_jb_Hubbard_Hacker(X,list_jb,ihfbit,N2); + TimeKeeper(X, cFileNameSzTimeKeep, cOMPSzMid, "a"); + TimeKeeper(X, cFileNameTimeKeep, cOMPSzMid, "a"); + + icnt = 0; + #pragma omp parallel for default(none) \ + reduction(+:icnt) private(ib) firstprivate(ihfbit, X) \ + shared(list_1_, list_2_1_, list_2_2_, list_jb) + for(ib=0;ibCheck.sdim;ib++){ + icnt += omp_sz_hacker(ib,ihfbit,X,list_1_, list_2_1_, list_2_2_, list_jb); + } + break; + }else{ + fprintf(stderr, "Error: CalcHS in ModPara file must be 0 or 1 for Hubbard model."); + return -1; + } + case HubbardNConserved: + hacker = X->Def.read_hacker; + if(hacker==0){ + calculate_jb_HubbardNCoserved(X,list_jb,ihfbit,N2); + TimeKeeper(X, cFileNameSzTimeKeep, cOMPSzMid, "a"); + TimeKeeper(X, cFileNameTimeKeep, cOMPSzMid, "a"); + + icnt = 0; + #pragma omp parallel for default(none) \ + reduction(+:icnt) private(ib) firstprivate(ihfbit, N2, X) \ + shared(list_1_, list_2_1_, list_2_2_, list_jb) + for(ib=0;ibCheck.sdim;ib++){ + icnt+=omp_sz(ib,ihfbit, X,list_1_, list_2_1_, list_2_2_, list_jb); + } + break; + }else if(hacker==1){ + calculate_jb_HubbardNCoserved_Hacker(X,list_jb,ihfbit,N2); + TimeKeeper(X, cFileNameSzTimeKeep, cOMPSzMid, "a"); + TimeKeeper(X, cFileNameTimeKeep, cOMPSzMid, "a"); + + icnt = 0; + #pragma omp parallel for default(none) \ + reduction(+:icnt) private(ib) firstprivate(ihfbit, N2, X) \ + shared(list_1_, list_2_1_, list_2_2_, list_jb) + for(ib=0;ibCheck.sdim;ib++){ + icnt+=omp_sz_hacker(ib,ihfbit, X,list_1_, list_2_1_, list_2_2_, list_jb); + } + break; + }else{ + fprintf(stderr, "Error: CalcHS in ModPara file must be 0 or 1 for Hubbard model."); + return -1; + } + + case Kondo: + calculate_jb_Kondo(X, list_jb,ihfbit); + TimeKeeper(X, cFileNameSzTimeKeep, cOMPSzMid, "a"); + TimeKeeper(X, cFileNameTimeKeep, cOMPSzMid, "a"); + + hacker = X->Def.read_hacker; + if(hacker==0){ + icnt = 0; + #pragma omp parallel for default(none) \ + reduction(+:icnt) private(ib) firstprivate(ihfbit, N2, X) \ + shared(list_1_, list_2_1_, list_2_2_, list_jb) + for(ib=0;ibCheck.sdim;ib++){ + icnt+=omp_sz_Kondo(ib,ihfbit, X, list_1_, list_2_1_, list_2_2_, list_jb); + } + }else if(hacker==1){ + icnt = 0; + #pragma omp parallel for default(none) \ + reduction(+:icnt) private(ib) firstprivate(ihfbit, N2, X)\ + shared(list_1_, list_2_1_, list_2_2_, list_jb) + for(ib=0;ibCheck.sdim;ib++){ + icnt+=omp_sz_Kondo_hacker(ib,ihfbit, X, list_1_, list_2_1_, list_2_2_, list_jb); + } + } + break; + case Spin: + if(X->Def.iFlgGeneralSpin==FALSE){ + hacker = X->Def.read_hacker; + if(hacker == -1){ + calculate_jb_Spin_m1(X,list_jb,list_1_,list_2_1_,list_2_2_,ihfbit,irght,ilft,ibpatn, N); + }else if(hacker == 1){ + calculate_jb_Spin_Hacker(X,list_jb,ihfbit,N); + TimeKeeper(X, cFileNameSzTimeKeep, cOMPSzMid, "a"); + TimeKeeper(X, cFileNameTimeKeep, cOMPSzMid, "a"); + icnt = 0; + #pragma omp parallel for default(none)\ + reduction(+:icnt) private(ib) \ + firstprivate(ihfbit, N, X, list_1_, list_2_1_, list_2_2_, list_jb) + for(ib=0;ibCheck.sdim;ib++){ + icnt+=omp_sz_spin_hacker(ib,ihfbit,N,X, list_1_, list_2_1_, list_2_2_, list_jb); + } + }else if(hacker == 0){ + calculate_jb_Spin_Old(X,list_jb,ihfbit,N); + //#pragma omp barrier + TimeKeeper(X, cFileNameSzTimeKeep, cOMPSzMid, "a"); + TimeKeeper(X, cFileNameTimeKeep, cOMPSzMid, "a"); + + icnt = 0; + #pragma omp parallel for default(none) reduction(+:icnt)\ + private(ib) firstprivate(ihfbit, N, X)\ + shared(list_1_, list_2_1_, list_2_2_, list_jb) + for(ib=0;ibCheck.sdim;ib++){ + icnt+=omp_sz_spin(ib,ihfbit,N,X,list_1_, list_2_1_, list_2_2_, list_jb); + } + }else{ + fprintf(stderr, "Error: CalcHS in ModPara file must be -1 or 0 or 1 for Spin model."); + return -1; + } + }else{ + long unsigned int ilftdim = (X->Def.Tpow[X->Def.Nsite-1]*X->Def.SiteToBit[X->Def.Nsite-1])/ihfbit; + calculate_jb_GeneralSpin(X,list_jb,list_2_1_Sz,list_2_2_Sz,ihfbit,ilftdim,N); + TimeKeeper(X, cFileNameSzTimeKeep, cOMPSzMid, "a"); + TimeKeeper(X, cFileNameTimeKeep, cOMPSzMid, "a"); + + icnt = 0; + #pragma omp parallel for default(none)\ + reduction(+:icnt) private(ib) firstprivate(ilftdim, ihfbit, X)\ + shared(list_1_, list_2_1_, list_2_2_, list_2_1_Sz, list_2_2_Sz,list_jb) + for(ib=0;ibDef.Nsite%2==1 && X->Def.LocSpn[X->Def.Nsite/2] != ITINERANT){ - if(ihfSpinDown !=0){ - tmp_2 = Binomial(all_up, X->Def.Nup-num_up-num_loc_up, comb, all_up); - tmp_3 = Binomial(all_down, X->Def.Ndown-num_down-(all_loc-num_loc_up), comb, all_down); - } - else{ - tmp_2 = Binomial(all_up, X->Def.Nup-num_up-num_loc_up, comb, all_up); - tmp_3 = Binomial(all_down, X->Def.Ndown-num_down-(all_loc-num_loc_up), comb, all_down); + if(X->Def.iFlgCalcSpec == CALCSPEC_NOT){ + if(X->Def.iCalcModel==HubbardNConserved){ + X->Def.iCalcModel=Hubbard; } - } - else{ - tmp_2 = Binomial(all_up, X->Def.Nup-num_up-num_loc_up, comb, all_up); - tmp_3 = Binomial(all_down, X->Def.Ndown-num_down-(all_loc-num_loc_up), comb, all_down); - } - jb += tmp_1*tmp_2*tmp_3; - } - } - - } - //#pragma omp barrier - TimeKeeper(X, cFileNameSzTimeKeep, cOMPSzMid, "a"); - TimeKeeper(X, cFileNameTimeKeep, cOMPSzMid, "a"); - - hacker = X->Def.read_hacker; - if(hacker==0){ - icnt = 0; -#pragma omp parallel for default(none) reduction(+:icnt) private(ib) firstprivate(ihfbit, N2, X) shared(list_1_, list_2_1_, list_2_2_, list_jb) - for(ib=0;ibCheck.sdim;ib++){ - icnt+=omp_sz_Kondo(ib,ihfbit, X, list_1_, list_2_1_, list_2_2_, list_jb); - } - }else if(hacker==1){ - icnt = 0; -#pragma omp parallel for default(none) reduction(+:icnt) private(ib) firstprivate(ihfbit, N2, X) shared(list_1_, list_2_1_, list_2_2_, list_jb) - for(ib=0;ibCheck.sdim;ib++){ - icnt+=omp_sz_Kondo_hacker(ib,ihfbit, X, list_1_, list_2_1_, list_2_2_, list_jb); - } - } - break; - - case Spin: - // this part can not be parallelized - if(X->Def.iFlgGeneralSpin==FALSE){ - hacker = X->Def.read_hacker; - //printf(" rank=%d:Ne=%ld ihfbit=%ld sdim=%ld\n", myrank,X->Def.Ne,ihfbit,X->Check.sdim); - // using hacker's delight only + no open mp - if(hacker == -1){ - icnt = 1; - tmp_pow = 1; - tmp_i = 0; - jb = 0; - ja = 0; - while(tmp_pow < X->Def.Tpow[X->Def.Ne]){ - tmp_i += tmp_pow; - tmp_pow = tmp_pow*2; - } - //printf("DEBUG: %ld %ld %ld %ld\n",tmp_i,X->Check.sdim,X->Def.Tpow[X->Def.Ne],X->Def.Nsite); - if(X->Def.Nsite%2==0){ - max_tmp_i = X->Check.sdim*X->Check.sdim; - }else{ - max_tmp_i = X->Check.sdim*X->Check.sdim*2-1; - } - while(tmp_iCheck.sdim;ib++){ - list_jb[ib]=jb; - i=ib*ihfbit; - num_up=0; - for(j=0;jDef.Tpow[j]; - div_up = div_up/X->Def.Tpow[j]; - num_up +=div_up; - } - all_up = (X->Def.Nsite+1)/2; - tmp_1 = Binomial(all_up,X->Def.Ne-num_up,comb,all_up); - jb += tmp_1; - } - //#pragma omp barrier - TimeKeeper(X, cFileNameSzTimeKeep, cOMPSzMid, "a"); - TimeKeeper(X, cFileNameTimeKeep, cOMPSzMid, "a"); - - icnt = 0; -#pragma omp parallel for default(none) reduction(+:icnt) private(ib) firstprivate(ihfbit, N, X, list_1_, list_2_1_, list_2_2_, list_jb) - for(ib=0;ibCheck.sdim;ib++){ - icnt+=omp_sz_spin_hacker(ib,ihfbit,N,X, list_1_, list_2_1_, list_2_2_, list_jb); - } - //printf(" rank=%d ib=%ld:Ne=%d icnt=%ld :idim_max=%ld N=%d\n", myrank,ib,X->Def.Ne,icnt,X->Check.idim_max,N); - // old version - }else if(hacker == 0){ - jb = 0; - for(ib=0;ibCheck.sdim;ib++){ - list_jb[ib]=jb; - i=ib*ihfbit; - num_up=0; - for(j=0;jDef.Tpow[j]; - div_up = div_up/X->Def.Tpow[j]; - num_up +=div_up; + + //Error message + if(i_max!=X->Check.idim_max){ + fprintf(stderr, "%s", cErrSz); + fprintf(stderr, cErrSz_ShowDim, i_max, X->Check.idim_max); + strcpy(sdt_err,cFileNameErrorSz); + if(childfopenMPI(sdt_err,"a",&fp_err)!=0){ + exitMPI(-1); } - all_up = (X->Def.Nsite+1)/2; - tmp_1 = Binomial(all_up,X->Def.Ne-num_up,comb,all_up); - jb += tmp_1; - } - //#pragma omp barrier - TimeKeeper(X, cFileNameSzTimeKeep, cOMPSzMid, "a"); - TimeKeeper(X, cFileNameTimeKeep, cOMPSzMid, "a"); - - icnt = 0; -#pragma omp parallel for default(none) reduction(+:icnt) private(ib) firstprivate(ihfbit, N, X) shared(list_1_, list_2_1_, list_2_2_, list_jb) - for(ib=0;ibCheck.sdim;ib++){ - icnt+=omp_sz_spin(ib,ihfbit,N,X,list_1_, list_2_1_, list_2_2_, list_jb); - } - } - else{ - fprintf(stderr, "Error: CalcHS in ModPara file must be -1 or 0 or 1 for Spin model."); - return -1; - } - }else{ - unsigned int Max2Sz=0; - unsigned int irghtsite=1; - long unsigned int itmpSize=1; - int i2Sz=0; - for(j=0; jDef.Nsite; j++){ - itmpSize *= X->Def.SiteToBit[j]; - if(itmpSize==ihfbit){ - break; + fprintf(fp_err, "%s",cErrSz_OutFile); + fclose(fp_err); + exitMPI(-1); } - irghtsite++; - } - for(j=0; jDef.Nsite; j++){ - Max2Sz += X->Def.LocSpn[j]; - } - - HilbertNumToSz = lui_1d_allocate(2*Max2Sz+1); - for(ib=0; ib<2*Max2Sz+1; ib++){ - HilbertNumToSz[ib]=0; - } - - for(ib =0; ibDef.SiteToBit, X->Def.Tpow); - } - list_2_1_Sz[ib]=i2Sz; - HilbertNumToSz[i2Sz+Max2Sz]++; - } - jb = 0; - long unsigned int ilftdim=(X->Def.Tpow[X->Def.Nsite-1]*X->Def.SiteToBit[X->Def.Nsite-1])/ihfbit; - for(ib=0;ibDef.SiteToBit, X->Def.Tpow); - } - list_2_2_Sz[ib]=i2Sz; - if((X->Def.Total2Sz- i2Sz +(int)Max2Sz)>=0 && (X->Def.Total2Sz- i2Sz) <= (int)Max2Sz){ - jb += HilbertNumToSz[X->Def.Total2Sz- i2Sz +Max2Sz]; - } - } - - TimeKeeper(X, cFileNameSzTimeKeep, cOMPSzMid, "a"); - TimeKeeper(X, cFileNameTimeKeep, cOMPSzMid, "a"); - - icnt = 0; -#pragma omp parallel for default(none) reduction(+:icnt) private(ib) firstprivate(ilftdim, ihfbit, X) shared(list_1_, list_2_1_, list_2_2_, list_2_1_Sz, list_2_2_Sz,list_jb) - for(ib=0;ibDef.iFlgCalcSpec == CALCSPEC_NOT){ - if(X->Def.iCalcModel==HubbardNConserved){ - X->Def.iCalcModel=Hubbard; - } - } - - //Error message - //i_max=i_max+1; - if(i_max!=X->Check.idim_max){ - fprintf(stderr, "%s", cErrSz); - fprintf(stderr, cErrSz_ShowDim, i_max, X->Check.idim_max); - strcpy(sdt_err,cFileNameErrorSz); - if(childfopenMPI(sdt_err,"a",&fp_err)!=0){ - exitMPI(-1); + free_li_2d_allocate(comb); } - fprintf(fp_err, "%s",cErrSz_OutFile); - fclose(fp_err); - exitMPI(-1); - } - - free_li_2d_allocate(comb); - } - fprintf(stdoutMPI, "%s", cProEndCalcSz); - + fprintf(stdoutMPI, "%s", cProEndCalcSz); free(list_jb); if(X->Def.iFlgGeneralSpin==TRUE){ free(list_2_1_Sz); free(list_2_2_Sz); } - return 0; + return 0; } /** @@ -1478,7 +1052,7 @@ int omp_sz_spin( /** * @brief efficient version of calculating restricted Hilbert space for spin-1/2 systems - * details of snoob is found in S.H. Warren, Hacker’s Delight, second ed., Addison-Wesley, ISBN: 0321842685, 2012. + * details of snoob is found in S.H. Warren, Hacker's delight(Bs Delight, second ed., Addison-Wesley, ISBN: 0321842685, 2012. * * @param[in] ib upper half bit of i * @param[in] ihfbit 2^(Ns/2) @@ -1683,3 +1257,490 @@ int Read_sz return 0; } + +int count_localized_spins(struct BindStruct *X){ + int num_loc = 0; + for (int j = X->Def.Nsite / 2; j < X->Def.Nsite; j++) { /*counting # of localized spins*/ + if(X->Def.LocSpn[j] != ITINERANT) {/*ITINERANT ==0 -> itinerant*/ + num_loc += 1; + } + } + return num_loc; +} + +void calculate_jb_GeneralSpin(struct BindStruct *X, long unsigned int *list_jb, long int *list_2_1_Sz,long int *list_2_2_Sz, long unsigned int ihfbit,long unsigned int ilftdim,unsigned int N){ + unsigned int Max2Sz=0; + unsigned int irghtsite=1; + long unsigned int itmpSize=1; + int i2Sz=0; + long unsigned int *HilbertNumToSz; + long unsigned int ib,jb,j; + + for(j=0; jDef.Nsite; j++){ + itmpSize *= X->Def.SiteToBit[j]; + if(itmpSize==ihfbit){ + break; + } + irghtsite++; + } + for(j=0; jDef.Nsite; j++){ + Max2Sz += X->Def.LocSpn[j]; + } + + HilbertNumToSz = lui_1d_allocate(2*Max2Sz+1); + for(ib=0; ib<2*Max2Sz+1; ib++){ + HilbertNumToSz[ib]=0; + } + + for(ib =0; ibDef.SiteToBit, X->Def.Tpow); + } + list_2_1_Sz[ib]=i2Sz; + HilbertNumToSz[i2Sz+Max2Sz]++; + } + jb = 0; + for(ib=0;ibDef.SiteToBit, X->Def.Tpow); + } + list_2_2_Sz[ib]=i2Sz; + if((X->Def.Total2Sz- i2Sz +(int)Max2Sz)>=0 && (X->Def.Total2Sz- i2Sz) <= (int)Max2Sz){ + jb += HilbertNumToSz[X->Def.Total2Sz- i2Sz +Max2Sz]; + } + } + free_lui_1d_allocate(HilbertNumToSz); +} + +void calculate_jb_Spin_m1(struct BindStruct *X, long unsigned int *list_jb, long unsigned int *list_1_, long unsigned int *list_2_1_,long unsigned int *list_2_2_,\ +long unsigned int ihfbit,long unsigned int irght,long unsigned int ilft,long unsigned int ibpatn, unsigned int N){ + long unsigned int ia,ja,ib,jb,div_up,i,j,tmp_1; + long unsigned int tmp_pow,tmp_i,tmp_j,icnt,max_tmp_i; + int num_up,all_up; + + icnt = 1; + tmp_pow = 1; + tmp_i = 0; + jb = 0; + ja = 0; + while(tmp_pow < X->Def.Tpow[X->Def.Ne]){ + tmp_i += tmp_pow; + tmp_pow = tmp_pow*2; + } + if(X->Def.Nsite%2==0){ + max_tmp_i = X->Check.sdim*X->Check.sdim; + }else{ + max_tmp_i = X->Check.sdim*X->Check.sdim*2-1; + } + while(tmp_iDef.Nsite+1,X->Def.Nsite+1); + jb = 0; + for(ib=0;ibCheck.sdim;ib++){ + list_jb[ib] = jb; + i = ib*ihfbit; + num_up = 0; + for(j=0;jDef.Tpow[j]; + div_up = div_up/X->Def.Tpow[j]; + num_up += div_up; + } + all_up = (X->Def.Nsite+1)/2; + tmp_1 = Binomial(all_up,X->Def.Ne-num_up,comb,all_up); + jb += tmp_1; + } + free_li_2d_allocate(comb); +} + +void calculate_jb_Spin_Old(struct BindStruct *X, long unsigned int *list_jb, long unsigned int ihfbit,unsigned int N){ + long unsigned int ib,jb,div_up,i,j,tmp_1; + int num_up,all_up; + long int **comb; + comb = li_2d_allocate(X->Def.Nsite+1,X->Def.Nsite+1); + for(ib=0;ibCheck.sdim;ib++){ + list_jb[ib] = jb; + i = ib*ihfbit; + num_up=0; + for(j=0;jDef.Tpow[j]; + div_up = div_up/X->Def.Tpow[j]; + num_up += div_up; + } + all_up = (X->Def.Nsite+1)/2; + tmp_1 = Binomial(all_up,X->Def.Ne-num_up,comb,all_up); + jb += tmp_1; + } + free_li_2d_allocate(comb); +} + +void calculate_jb_Kondo(struct BindStruct *X, long unsigned int *list_jb, long unsigned int ihfbit){ + long unsigned int jb = 0,i,ib,j; + long unsigned int div_up, div_down,ihfSpinDown; + int N_all_up, N_all_down; + int num_up, num_down, icheck_loc; + int all_up, all_down, all_loc,tmp_res,num_loc,num_loc_up,num_loc_down; + long unsigned int tmp_1,tmp_2,tmp_3; + long int **comb; + + comb = li_2d_allocate(X->Def.Nsite+1,X->Def.Nsite+1); + N_all_up = X->Def.Nup; + N_all_down = X->Def.Ndown; + fprintf(stdoutMPI, cStateNupNdown, N_all_up,N_all_down); + + jb = 0; + num_loc = 0; + for(j=X->Def.Nsite/2; j< X->Def.Nsite ;j++){// counting localized # of spins + if(X->Def.LocSpn[j] != ITINERANT){ + num_loc += 1; + } + } + for(ib=0;ibCheck.sdim;ib++){ //sdim = 2^(N/2) + list_jb[ib] = jb; + i = ib*ihfbit; // ihfbit=pow(2,((Nsite+1)/2)) + num_up = 0; + num_down = 0; + icheck_loc = 1; + + for(j=X->Def.Nsite/2; j< X->Def.Nsite ;j++){ + div_up = i & X->Def.Tpow[2*j]; + div_up = div_up/X->Def.Tpow[2*j]; + div_down = i & X->Def.Tpow[2*j+1]; + div_down = div_down/X->Def.Tpow[2*j+1]; + if(X->Def.LocSpn[j] == ITINERANT){ + num_up += div_up; + num_down += div_down; + }else{ + num_up += div_up; + num_down += div_down; + if(X->Def.Nsite%2==1 && j==(X->Def.Nsite/2)){ // odd site + icheck_loc = icheck_loc; + ihfSpinDown = div_down; + if(div_down ==0){ + num_up += 1; + } + }else{ + icheck_loc = icheck_loc*(div_up^div_down);// exclude empty or doubly occupied site + } + } + } + + if(icheck_loc == 1){ // itinerant of local spins without holon or doublon + tmp_res = X->Def.Nsite%2; // even Ns-> 0, odd Ns -> 1 + all_loc = X->Def.NLocSpn-num_loc; // # of local spins + all_up = (X->Def.Nsite+tmp_res)/2-all_loc; + all_down = (X->Def.Nsite-tmp_res)/2-all_loc; + if(X->Def.Nsite%2==1 && X->Def.LocSpn[X->Def.Nsite/2] != ITINERANT){ + all_up = (X->Def.Nsite)/2-all_loc; + all_down = (X->Def.Nsite)/2-all_loc; + } + + for(num_loc_up=0; num_loc_up <= all_loc; num_loc_up++){ + tmp_1 = Binomial(all_loc, num_loc_up, comb, all_loc); + if( X->Def.Nsite%2==1 && X->Def.LocSpn[X->Def.Nsite/2] != ITINERANT){ + if(ihfSpinDown !=0){ + tmp_2 = Binomial(all_up, X->Def.Nup-num_up-num_loc_up, comb, all_up); + tmp_3 = Binomial(all_down, X->Def.Ndown-num_down-(all_loc-num_loc_up), comb, all_down); + }else{ + tmp_2 = Binomial(all_up, X->Def.Nup-num_up-num_loc_up, comb, all_up); + tmp_3 = Binomial(all_down, X->Def.Ndown-num_down-(all_loc-num_loc_up), comb, all_down); + } + }else{ + tmp_2 = Binomial(all_up, X->Def.Nup-num_up-num_loc_up, comb, all_up); + tmp_3 = Binomial(all_down, X->Def.Ndown-num_down-(all_loc-num_loc_up), comb, all_down); + } + jb += tmp_1*tmp_2*tmp_3; + } + } + } + free_li_2d_allocate(comb); +} + +void calculate_jb_KondoGC(struct BindStruct *X, int num_loc, long unsigned int *list_jb, long unsigned int ihfbit){ + long unsigned int jb = 0; + for(long unsigned int ib=0;ib < X->Check.sdim;ib++){ + list_jb[ib] = jb; + long unsigned int i = ib*ihfbit; + int icheck_loc = 1; + for(long unsigned int j=(X->Def.Nsite+1)/2; j< X->Def.Nsite ;j++){ + long unsigned int div_up = i & X->Def.Tpow[2*j]; + div_up = div_up/X->Def.Tpow[2*j]; + long unsigned int div_down = i & X->Def.Tpow[2*j+1]; + div_down = div_down/X->Def.Tpow[2*j+1]; + if(X->Def.LocSpn[j] != ITINERANT){ + if(X->Def.Nsite%2==1 && j==(X->Def.Nsite/2)){ + icheck_loc = icheck_loc; + }else{ + icheck_loc = icheck_loc*(div_up^div_down);// exclude doubllly ocupited site + } + } + }if(icheck_loc == 1){ + if(X->Def.Nsite%2==1 && X->Def.LocSpn[X->Def.Nsite/2] != ITINERANT){ + jb += X->Def.Tpow[X->Def.Nsite-1-(X->Def.NLocSpn-num_loc)]; + }else{ + jb += X->Def.Tpow[X->Def.Nsite-(X->Def.NLocSpn-num_loc)]; + } + } + } +} + +void calculate_jb_Hubbard(struct BindStruct *X,long unsigned int *list_jb, long unsigned int ihfbit, unsigned int N2){ + /*[s] this part can not be parallelized*/ + long unsigned int jb = 0,div,i,j,tmp_1,tmp_2; + long int **comb; + int num_up,num_down; + int tmp_res,all_up,all_down; + comb = li_2d_allocate(X->Def.Nsite+1,X->Def.Nsite+1); + for(long unsigned ib=0;ibCheck.sdim;ib++){ // sdim = 2^(N/2) + list_jb[ib] = jb; + i = ib*ihfbit; + //[s] counting # of up and down electrons + num_up = 0; + for(j=0;j<=N2-2;j+=2){ // even -> up spin + div = i & X->Def.Tpow[j]; + div = div/X->Def.Tpow[j]; + num_up += div; + } + num_down=0; + for(j=1;j<=N2-1;j+=2){ // odd -> down spin + div=i & X->Def.Tpow[j]; + div=div/X->Def.Tpow[j]; + num_down+=div; + } + //[e] counting # of up and down electrons + tmp_res = X->Def.Nsite%2; // even Ns-> 0, odd Ns -> 1 + all_up = (X->Def.Nsite+tmp_res)/2; + all_down = (X->Def.Nsite-tmp_res)/2; + + tmp_1 = Binomial(all_up,X->Def.Nup-num_up,comb,all_up); + tmp_2 = Binomial(all_down,X->Def.Ndown-num_down,comb,all_down); + jb += tmp_1*tmp_2; + } + free_li_2d_allocate(comb); + /*[e] this part can not be parallelized*/ +} + +void calculate_jb_Hubbard_Hacker(struct BindStruct *X,long unsigned int *list_jb, long unsigned int ihfbit, unsigned int N2){ + long unsigned int sdim_div,sdim_rest,ib_start,ib_end; + long unsigned int i,ib,j,jb,div,tmp_res,tmp_1,tmp_2; + int mythread,num_up,num_down,all_up,all_down; + long int **comb2; + long unsigned int *jbthread; + + jbthread = lui_1d_allocate(nthreads); + #pragma omp parallel default(none) \ + shared(X,list_jb,ihfbit,N2,nthreads,jbthread) \ + private(ib,i,j,num_up,num_down,div,tmp_res,tmp_1,tmp_2,jb,all_up,all_down, \ + comb2,mythread,sdim_div,sdim_rest,ib_start,ib_end) + { + jb = 0; + #ifdef _OPENMP + mythread = omp_get_thread_num(); + #else + mythread = 0; + #endif + comb2 = li_2d_allocate(X->Def.Nsite+1,X->Def.Nsite+1); + /* explict loop decomposition is nessesary to fix the asignment to each thread*/ + sdim_div = X->Check.sdim / nthreads; + sdim_rest = X->Check.sdim % nthreads; + if(mythread < sdim_rest){ + ib_start = sdim_div*mythread + mythread; + ib_end = ib_start + sdim_div + 1; + }else{ + ib_start = sdim_div*mythread + sdim_rest; + ib_end = ib_start + sdim_div; + } + for(ib=ib_start;ibDef.Tpow[j]; + div = div/X->Def.Tpow[j]; + num_up += div; + } + num_down=0; + for(j=1;j<=N2-1;j+=2){ + div=i & X->Def.Tpow[j]; + div=div/X->Def.Tpow[j]; + num_down+=div; + } + + tmp_res = X->Def.Nsite%2; // even Ns-> 0, odd Ns -> 1 + all_up = (X->Def.Nsite+tmp_res)/2; + all_down = (X->Def.Nsite-tmp_res)/2; + + tmp_1 = Binomial(all_up,X->Def.Nup-num_up,comb2,all_up); + tmp_2 = Binomial(all_down,X->Def.Ndown-num_down,comb2,all_down); + jb += tmp_1*tmp_2; + } + free_li_2d_allocate(comb2); + if(mythread != nthreads-1) jbthread[mythread+1] = jb; + #pragma omp barrier + #pragma omp single + { + jbthread[0] = 0; + for(j=1;jDef.Nsite+1,X->Def.Nsite+1); + // this part can not be parallelized + jb = 0; + iSpnup = 0; + iMinup = 0; + iAllup = X->Def.Ne; + if(X->Def.Ne > X->Def.Nsite){ + iMinup = X->Def.Ne-X->Def.Nsite; + iAllup = X->Def.Nsite; + } + for(ib=0;ibCheck.sdim;ib++){ + list_jb[ib] = jb; + i = ib*ihfbit; + num_up = 0; + for(j=0;j<=N2-2;j+=2){ + div = i & X->Def.Tpow[j]; + div = div/X->Def.Tpow[j]; + num_up += div; + } + num_down=0; + for(j=1;j<=N2-1;j+=2){ + div = i & X->Def.Tpow[j]; + div = div/X->Def.Tpow[j]; + num_down += div; + } + tmp_res = X->Def.Nsite%2; // even Ns-> 0, odd Ns -> 1 + all_up = (X->Def.Nsite+tmp_res)/2; + all_down = (X->Def.Nsite-tmp_res)/2; + + for(iSpnup=iMinup; iSpnup<= iAllup; iSpnup++){ + tmp_1 = Binomial(all_up, iSpnup-num_up,comb,all_up); + tmp_2 = Binomial(all_down, X->Def.Ne-iSpnup-num_down,comb,all_down); + jb += tmp_1*tmp_2; + } + } + free_li_2d_allocate(comb); +} + +void calculate_jb_HubbardNCoserved_Hacker(struct BindStruct *X,long unsigned int *list_jb, long unsigned int ihfbit, unsigned int N2){ + long unsigned int sdim_div,sdim_rest,ib_start,ib_end; + long unsigned int i,ib,j,jb,div,tmp_res,tmp_1,tmp_2; + int mythread,num_up,num_down,all_up,all_down; + long int **comb2; + long unsigned int *jbthread; + int iSpnup, iMinup,iAllup; + + iMinup = 0; + iAllup = X->Def.Ne; + if(X->Def.Ne > X->Def.Nsite){ + iMinup = X->Def.Ne-X->Def.Nsite; + iAllup = X->Def.Nsite; + } + jbthread = lui_1d_allocate(nthreads); + #pragma omp parallel default(none) \ + shared(X,iMinup,iAllup,list_jb,ihfbit,N2,nthreads,jbthread) \ + private(ib,i,j,num_up,num_down,div,tmp_res,tmp_1,tmp_2,iSpnup,jb,all_up,all_down,comb2, \ + mythread,sdim_rest,sdim_div,ib_start,ib_end) + { + jb = 0; + iSpnup=0; + #ifdef _OPENMP + mythread = omp_get_thread_num(); + #else + mythread = 0; + #endif + comb2 = li_2d_allocate(X->Def.Nsite+1,X->Def.Nsite+1); + /* explict loop decomposition is nessesary to fix the asignment to each thread*/ + sdim_div = X->Check.sdim / nthreads; + sdim_rest = X->Check.sdim % nthreads; + if(mythread < sdim_rest){ + ib_start = sdim_div*mythread + mythread; + ib_end = ib_start + sdim_div + 1; + }else{ + ib_start = sdim_div*mythread + sdim_rest; + ib_end = ib_start + sdim_div; + } + for(ib=ib_start;ibDef.Tpow[j]; + div = div/X->Def.Tpow[j]; + num_up += div; + } + num_down=0; + for(j=1;j<=N2-1;j+=2){ + div=i & X->Def.Tpow[j]; + div=div/X->Def.Tpow[j]; + num_down+=div; + } + tmp_res = X->Def.Nsite%2; // even Ns-> 0, odd Ns -> 1 + all_up = (X->Def.Nsite+tmp_res)/2; + all_down = (X->Def.Nsite-tmp_res)/2; + + for(iSpnup=iMinup; iSpnup<= iAllup; iSpnup++){ + tmp_1 = Binomial(all_up, iSpnup-num_up,comb2,all_up); + tmp_2 = Binomial(all_down, X->Def.Ne-iSpnup-num_down,comb2,all_down); + jb += tmp_1*tmp_2; + } + } + free_li_2d_allocate(comb2); + if(mythread != nthreads-1) jbthread[mythread+1] = jb; + #pragma omp barrier + #pragma omp single + { + jbthread[0] = 0; + for(j=1;j