      subroutine pulayforce(ndim1,ndim2,natoms,nalpha,nbeta,nocc,nconts,
     &                      ncontp,ncontd,icfunc,matloc,nptrs,nptrp,
     &                      nptrd,gradient,density,dsbydx,coeffa,coeffb,
     &                      orbnrgy,wkvec,grad)

c  this subroutine written by alain st-amant of the department of
c  pharmaceutical chemistry, university of california, san francisco.
c  all rights reserved.  this is part of the DeFT project.

      implicit real*8(a-h,o-z)

      include "mpif.h"

      dimension istat(mpi_status_size)

      dimension indexx(36)

      dimension subgrad(3)

      dimension icfunc(*),matloc(*),nptrs(*),nptrp(*),nptrd(*)

      dimension gradient(3,*),density(*),dsbydx(ndim2,*)
      dimension coeffa(ndim1,*),coeffb(ndim1,*),orbnrgy(nocc,*)
      dimension wkvec(*),grad(3,*)

      data zero,two/0.0d0,2.0d0/

      call mpi_comm_rank(mpi_comm_world,myid,ierr)
      call mpi_comm_size(mpi_comm_world,numprocs,ierr)

      ncontractions=nconts+3*ncontp+6*ncontd

      do 1001 i=1,natoms
      do 1001 j=1,3
 1001 grad(j,i)=zero

      do 1002 i=1,ndim2
 1002 density(i)=zero

      if(myid.eq.0) then
                          rewind 51

                          read(51) norbitals

                          do 1003 i=1,norbitals
 1003                     call fastrd(51,orbnrgy(1,i),norbitals)
                    endif

      nloop=ncontractions/numprocs

      if((nloop*numprocs).lt.ncontractions) nloop=nloop+1

      istart=myid*nloop+1
      iend=(myid+1)*nloop

      do 1004 i=istart,iend
      do 1004 j=1,ncontractions
      ij=((max(i,j)*(max(i,j)-1))/2)+min(i,j)
      do 1004 k=1,norbitals
      do 1004 l=1,norbitals
 1004 density(ij)=density(ij)+orbnrgy(l,k)*coeffa(i,k)*coeffa(j,l)

      if(nalpha.eq.nbeta) then

            do 1005 i=1,ncontractions*(ncontractions+1)/2
 1005       density(i)=two*density(i)

                          else

              if(myid.eq.0) then
                                  read(51) norbitals

                                  do 1006 i=1,norbitals
 1006                             call fastrd(51,orbnrgy(1,i),norbitals)
                            endif

            do 1007 i=istart,iend
            do 1007 j=1,ncontractions
            ij=((max(i,j)*(max(i,j)-1))/2)+min(i,j)
            do 1007 k=1,norbitals
            do 1007 l=1,norbitals
 1007       density(ij)=density(ij)+orbnrgy(l,k)*coeffb(i,k)*coeffb(j,l)

                          endif

      call mpi_reduce(density,wkvec,ndim2,
     &                mpi_real8,mpi_sum,0,mpi_comm_world,ierr)

      call mpi_bcast(wkvec,ndim2,mpi_real8,0,mpi_comm_world,ierr)

      ncombos=(nconts*(nconts+1))/2

      nloop=ncombos/numprocs
      if((nloop*numprocs).lt.ncombos) nloop=nloop+1

      istart=myid*nloop+1
      iend=(myid+1)*nloop

      ij=0

      do 1008 iii=1,nconts
      do 1008 jjj=1,iii

      ij=ij+1

      if((ij.lt.istart).or.(ij.gt.iend)) goto 1008

      ii=nptrs(iii)
      jj=nptrs(jjj)

      if(icfunc(ii).eq.icfunc(jj)) goto 1008

      i1=matloc(ii)
      i2=(i1*(i1-1))/2
      i3=matloc(jj)
      i4=i2+i3

      do 1009 i=1,3
 1009 subgrad(i)=zero

      do 1010 i=1,3
 1010 subgrad(i)=subgrad(i)+dsbydx(i4,i)*wkvec(i4)

      do 1011 i=1,3
      grad(i,icfunc(ii))=grad(i,icfunc(ii))-subgrad(i)
 1011 grad(i,icfunc(jj))=grad(i,icfunc(jj))+subgrad(i)

 1008 continue

      ncombos=nconts*ncontp

      nloop=ncombos/numprocs
      if((nloop*numprocs).lt.ncombos) nloop=nloop+1

      istart=myid*nloop+1
      iend=(myid+1)*nloop

      ij=0

      do 1012 iii=1,ncontp
      do 1012 jjj=1,nconts

      ij=ij+1

      if((ij.lt.istart).or.(ij.gt.iend)) goto 1012

      ii=nptrp(iii)
      jj=nptrs(jjj)

      if(icfunc(ii).eq.icfunc(jj)) goto 1012

      i1=matloc(ii)
      i2=(i1*(i1-1))/2
      i3=matloc(jj)
      i4=i2+i3

      indexx(1)=i4
      indexx(2)=i4+i1
      indexx(3)=i4+i1*2+1

      do 1013 i=1,3
 1013 subgrad(i)=zero

      do 1014 i=1,3
      do 1014 j=1,3
 1014 subgrad(i)=subgrad(i)+dsbydx(indexx(j),i)*wkvec(indexx(j))

      do 1015 i=1,3
      grad(i,icfunc(ii))=grad(i,icfunc(ii))-subgrad(i)
 1015 grad(i,icfunc(jj))=grad(i,icfunc(jj))+subgrad(i)

 1012 continue

      ncombos=(ncontp*(ncontp+1))/2

      nloop=ncombos/numprocs
      if((nloop*numprocs).lt.ncombos) nloop=nloop+1

      istart=myid*nloop+1
      iend=(myid+1)*nloop

      ij=0

      do 1016 iii=1,ncontp
      do 1016 jjj=1,iii

      ij=ij+1

      if((ij.lt.istart).or.(ij.gt.iend)) goto 1016

      ii=nptrp(iii)
      jj=nptrp(jjj)

      if(icfunc(ii).eq.icfunc(jj)) goto 1016

      i1=matloc(ii)
      i2=(i1*(i1-1))/2
      i3=matloc(jj)
      i4=i2+i3

      indexx(1)=i4
      indexx(2)=i4+1
      indexx(3)=i4+2
      indexx(4)=i4+i1
      indexx(5)=i4+i1+1
      indexx(6)=i4+i1+2
      indexx(7)=i4+i1*2+1
      indexx(8)=i4+i1*2+2
      indexx(9)=i4+i1*2+3

      do 1017 i=1,3
 1017 subgrad(i)=zero

      do 1018 i=1,3
      do 1018 j=1,9
 1018 subgrad(i)=subgrad(i)+dsbydx(indexx(j),i)*wkvec(indexx(j))

      do 1019 i=1,3
      grad(i,icfunc(ii))=grad(i,icfunc(ii))-subgrad(i)
 1019 grad(i,icfunc(jj))=grad(i,icfunc(jj))+subgrad(i)

 1016 continue

      ncombos=nconts*ncontd

      nloop=ncombos/numprocs
      if((nloop*numprocs).lt.ncombos) nloop=nloop+1

      istart=myid*nloop+1
      iend=(myid+1)*nloop

      ij=0

      do 1020 iii=1,ncontd
      do 1020 jjj=1,nconts

      ij=ij+1

      if((ij.lt.istart).or.(ij.gt.iend)) goto 1020

      ii=nptrd(iii)
      jj=nptrs(jjj)

      if(icfunc(ii).eq.icfunc(jj)) goto 1020

      i1=matloc(ii)
      i2=(i1*(i1-1))/2
      i3=matloc(jj)
      i4=i2+i3

      indexx(1)=i4
      indexx(2)=i4+i1
      indexx(3)=i4+i1*2+1
      indexx(4)=i4+i1*3+3
      indexx(5)=i4+i1*4+6
      indexx(6)=i4+i1*5+10

      do 1021 i=1,3
 1021 subgrad(i)=zero

      do 1022 i=1,3
      do 1022 j=1,6
 1022 subgrad(i)=subgrad(i)+dsbydx(indexx(j),i)*wkvec(indexx(j))

      do 1023 i=1,3
      grad(i,icfunc(ii))=grad(i,icfunc(ii))-subgrad(i)
 1023 grad(i,icfunc(jj))=grad(i,icfunc(jj))+subgrad(i)

 1020 continue

      ncombos=ncontp*ncontd

      nloop=ncombos/numprocs
      if((nloop*numprocs).lt.ncombos) nloop=nloop+1

      istart=myid*nloop+1
      iend=(myid+1)*nloop

      ij=0

      do 1024 iii=1,ncontd
      do 1024 jjj=1,ncontp

      ij=ij+1

      if((ij.lt.istart).or.(ij.gt.iend)) goto 1024

      ii=nptrd(iii)
      jj=nptrp(jjj)

      if(icfunc(ii).eq.icfunc(jj)) goto 1024

      i1=matloc(ii)
      i2=(i1*(i1-1))/2
      i3=matloc(jj)
      i4=i2+i3

      indexx(01)=i4
      indexx(02)=i4+1
      indexx(03)=i4+2
      indexx(04)=i4+i1
      indexx(05)=i4+i1+1
      indexx(06)=i4+i1+2
      indexx(07)=i4+i1*2+1
      indexx(08)=i4+i1*2+2
      indexx(09)=i4+i1*2+3
      indexx(10)=i4+i1*3+3
      indexx(11)=i4+i1*3+4
      indexx(12)=i4+i1*3+5
      indexx(13)=i4+i1*4+6
      indexx(14)=i4+i1*4+7
      indexx(15)=i4+i1*4+8
      indexx(16)=i4+i1*5+10
      indexx(17)=i4+i1*5+11
      indexx(18)=i4+i1*5+12

      do 1025 i=1,3
 1025 subgrad(i)=zero

      do 1026 i=1,3
      do 1026 j=1,18
 1026 subgrad(i)=subgrad(i)+dsbydx(indexx(j),i)*wkvec(indexx(j))

      do 1027 i=1,3
      grad(i,icfunc(ii))=grad(i,icfunc(ii))-subgrad(i)
 1027 grad(i,icfunc(jj))=grad(i,icfunc(jj))+subgrad(i)

 1024 continue

      ncombos=(ncontd*(ncontd+1))/2

      nloop=ncombos/numprocs
      if((nloop*numprocs).lt.ncombos) nloop=nloop+1

      istart=myid*nloop+1
      iend=(myid+1)*nloop

      ij=0

      do 1028 iii=1,ncontd
      do 1028 jjj=1,iii

      ij=ij+1

      if((ij.lt.istart).or.(ij.gt.iend)) goto 1028

      ii=nptrd(iii)
      jj=nptrd(jjj)

      if(icfunc(ii).eq.icfunc(jj)) goto 1028

      i1=matloc(ii)
      i2=(i1*(i1-1))/2
      i3=matloc(jj)
      i4=i2+i3

      indexx(01)=i4
      indexx(02)=i4+1
      indexx(03)=i4+2
      indexx(04)=i4+3
      indexx(05)=i4+4
      indexx(06)=i4+5
      indexx(07)=i4+i1
      indexx(08)=i4+i1+1
      indexx(09)=i4+i1+2
      indexx(10)=i4+i1+3
      indexx(11)=i4+i1+4
      indexx(12)=i4+i1+5
      indexx(13)=i4+i1*2+1
      indexx(14)=i4+i1*2+2
      indexx(15)=i4+i1*2+3
      indexx(16)=i4+i1*2+4
      indexx(17)=i4+i1*2+5
      indexx(18)=i4+i1*2+6
      indexx(19)=i4+i1*3+3
      indexx(20)=i4+i1*3+4
      indexx(21)=i4+i1*3+5
      indexx(22)=i4+i1*3+6
      indexx(23)=i4+i1*3+7
      indexx(24)=i4+i1*3+8
      indexx(25)=i4+i1*4+6
      indexx(26)=i4+i1*4+7
      indexx(27)=i4+i1*4+8
      indexx(28)=i4+i1*4+9
      indexx(29)=i4+i1*4+10
      indexx(30)=i4+i1*4+11
      indexx(31)=i4+i1*5+10
      indexx(32)=i4+i1*5+11
      indexx(33)=i4+i1*5+12
      indexx(34)=i4+i1*5+13
      indexx(35)=i4+i1*5+14
      indexx(36)=i4+i1*5+15

      do 1029 i=1,3
 1029 subgrad(i)=zero

      do 1030 i=1,3
      do 1030 j=1,36
 1030 subgrad(i)=subgrad(i)+dsbydx(indexx(j),i)*wkvec(indexx(j))

      do 1031 i=1,3
      grad(i,icfunc(ii))=grad(i,icfunc(ii))-subgrad(i)
 1031 grad(i,icfunc(jj))=grad(i,icfunc(jj))+subgrad(i)

 1028 continue

      call mpi_reduce(grad,wkvec,3*natoms,
     &                mpi_real8,mpi_sum,0,mpi_comm_world,ierr)

      if(myid.eq.0) then
                          do 1032 i=1,natoms
                          do 1032 j=1,3
 1032                     gradient(j,i)=gradient(j,i)+wkvec(3*(i-1)+j)
                    endif

      return
      end
