diff --git a/newimage.cc b/newimage.cc
index a951376faea6d298c87aaf06f0b7514cff8cb58c..a2a007b5faa4e44033b78c05083cecaae3e42ac6 100644
--- a/newimage.cc
+++ b/newimage.cc
@@ -371,7 +371,80 @@ namespace NEWIMAGE {
     return 0;
   }
 
+  // Volume->ColumnVector
+
+  template<class T>
+  ReturnMatrix volume<T>::vec(const volume<T>& mask) const
+  {
+    if (!samesize(mask,*this)) {imthrow("volume<T>::vec: Mask and volume of different size",3);}
+    ColumnVector ovec(xsize()*ysize()*zsize());
+    for (int vindx=0,k=0; k<zsize(); k++) {
+      for (int j=0; j<ysize(); j++) {
+	for (int i=0; i<xsize(); i++) {
+          ovec.element(vindx) = (mask(i,j,k)>0) ? (*this)(i,j,k) : 0.0;
+          vindx++;
+	}
+      }
+    }
+    ovec.Release();
+    return ovec;
+  }
+
+  // Code multiplication to avoid allocating mask volume
+
+  template <class T>
+  ReturnMatrix volume<T>::vec() const
+  {
+    ColumnVector ovec(xsize()*ysize()*zsize());
+    for (int vindx=0, k=0; k<zsize(); k++) {
+      for (int j=0; j<ysize(); j++) {
+	for (int i=0; i<xsize(); i++) {
+          ovec.element(vindx) = (*this)(i,j,k);
+          vindx++;
+	}
+      }
+    }
+    ovec.Release();
+    return ovec;
+  }
+
+  // Insert ColumnVector into volume
+
+  template <class T>
+  void volume<T>::insert_vec(const ColumnVector&  pvec,
+                             const volume<T>&     mask)
+  {
+    if (pvec.Nrows() != xsize()*ysize()*zsize()) {
+      imthrow("volume<T>::insert_vec: Size mismatch between ColumnVector and image volume",3);
+    }
+    if (!samesize(mask,*this)) {
+      imthrow("volume<T>::insert_vec: Size mismatch between mask and image volume",3);
+    }
+    for (int vindx=0, k=0; k<zsize(); k++) {
+      for (int j=0; j<ysize(); j++) {
+	for (int i=0; i<xsize(); i++) {
+	  (*this)(i,j,k) = (mask(i,j,k) > 0) ? ((T) pvec.element(vindx)) : ((T) 0);
+          vindx++;
+	}
+      }
+    }
+  }
 
+  template <class T>
+  void volume<T>::insert_vec(const ColumnVector&  pvec)
+  {
+    if (pvec.Nrows() != xsize()*ysize()*zsize()) {
+      imthrow("volume<T>::insert_vec: Size mismatch between ColumnVector and image volume",3);
+    }
+    for (int vindx=0, k=0; k<zsize(); k++) {
+      for (int j=0; j<ysize(); j++) {
+	for (int i=0; i<xsize(); i++) {
+	  (*this)(i,j,k) = ((T) pvec.element(vindx));
+          vindx++;
+	}
+      }
+    }
+  }
 
   // ROI functions
 
@@ -533,7 +606,148 @@ namespace NEWIMAGE {
     
   }
   
-  
+  // The following routines are used to obtain an interpolated intensity value and
+  // either a selected partial derivative (dx, dy or dz) or all partial derivatives
+  // at the same location. The routine returning all derivatives is useful for 
+  // non-linear registration of one subject to another (or to an atlas) and the 
+  // routines returning a single derivative are useful e.g. for distortion correction.
+  // Puss J
+
+  template <class T>
+  float volume<T>::interp1partial(// Input
+                                  float x, float y, float z,    // Co-ordinates to get value for
+                                  int     dir,                  // Direction for partial, 0->x, 1->y, 2->z
+                                  // Output
+                                  float  *pderiv)               // Derivative returned here
+  const
+  {
+    if (p_interpmethod != trilinear) {
+      imthrow("Derivatives only implemented for trilinear interpolation",10);
+    }
+    if (dir < 0 || dir > 2) {
+      imthrow("Ivalid derivative direction",11);
+    }
+    int ix = ((int) floor(x));
+    int iy = ((int) floor(y));
+    int iz = ((int) floor(z));
+    float dx = x - ((float) ix);
+    float dy = y - ((float) iy);
+    float dz = z - ((float) iz);
+    float v000, v001, v010, v011, v100, v101, v110, v111;
+    if (!in_neigh_bounds(*this,ix,iy,iz)) {   // We'll have to do some extrapolation
+      v000 = (float) this->operator()(ix,iy,iz);
+      v001 = (float) this->operator()(ix,iy,iz+1);
+      v010 = (float) this->operator()(ix,iy+1,iz);
+      v011 = (float) this->operator()(ix,iy+1,iz+1);
+      v100 = (float) this->operator()(ix+1,iy,iz);
+      v101 = (float) this->operator()(ix+1,iy,iz+1);
+      v110 = (float) this->operator()(ix+1,iy+1,iz);
+      v111 = (float) this->operator()(ix+1,iy+1,iz+1);
+    }
+    else {
+      T t000, t001, t010, t011, t100, t101, t110, t111;
+      this->getneighbours(ix,iy,iz,t000,t001,t010,t011,t100,t101,t110,t111);
+      v000 = ((float) t000); v001 = ((float) t001); v010 = ((float) t010);  
+      v011 = ((float) t011); v100 = ((float) t100); v101 = ((float) t101);  
+      v110 = ((float) t110); v111 = ((float) t111); 
+    }
+    // The (seemingly silly) code multiplication below is to
+    // ensure that in no case does calculating one of the partials
+    // neccessitate any calculation over and above just calculating
+    // the interpolated value.
+    float tmp11, tmp12, tmp13, tmp14;
+    float tmp21, tmp22;
+    if (dir == 0) {            // df/dx
+      float onemdz = 1.0-dz;
+      tmp11 = onemdz*v000 + dz*v001;
+      tmp12 = onemdz*v010 + dz*v011;
+      tmp13 = onemdz*v100 + dz*v101;
+      tmp14 = onemdz*v110 + dz*v111;
+      tmp21 = (1.0-dy)*tmp11 + dy*tmp12;
+      tmp22 = (1.0-dy)*tmp13 + dy*tmp14;
+      *pderiv = tmp22 - tmp21;
+      return((1.0-dx)*tmp21 + dx*tmp22);
+    }
+    else if (dir == 1) {       // df/dy 
+      float onemdz = 1.0-dz;
+      tmp11 = onemdz*v000 + dz*v001;
+      tmp12 = onemdz*v010 + dz*v011;
+      tmp13 = onemdz*v100 + dz*v101;
+      tmp14 = onemdz*v110 + dz*v111;
+      tmp21 = (1.0-dx)*tmp11 + dx*tmp13;
+      tmp22 = (1.0-dx)*tmp12 + dx*tmp14;
+      *pderiv = tmp22 - tmp21;
+      return((1.0-dy)*tmp21 + dy*tmp22);
+    }
+    else if (dir == 2) {       // df/dz
+      float onemdy = 1.0-dy;
+      tmp11 = onemdy*v000 + dy*v010;
+      tmp12 = onemdy*v001 + dy*v011;
+      tmp13 = onemdy*v100 + dy*v110;
+      tmp14 = onemdy*v101 + dy*v111;
+      tmp21 = (1.0-dx)*tmp11 + dx*tmp13;
+      tmp22 = (1.0-dx)*tmp12 + dx*tmp14;
+      *pderiv = tmp22 - tmp21;
+      return((1.0-dz)*tmp21 + dz*tmp22);
+    }
+    return(-1.0); // Should not be reached. Just to stop compiler from complaining.
+  }
+
+  template <class T>
+  float volume<T>::interp3partial(// Input
+                                  float x, float y, float z,              // Co-ordinates to get value for
+                                   // Output
+                                  float *dfdx, float *dfdy, float *dfdz)  // Partials
+  const
+  {
+    if (p_interpmethod != trilinear) {
+      imthrow("Derivatives only implemented for trilinear interpolation",10);
+    }
+    int ix = ((int) floor(x));
+    int iy = ((int) floor(y));
+    int iz = ((int) floor(z));
+    float dx = x - ((float) ix);
+    float dy = y - ((float) iy);
+    float dz = z - ((float) iz);
+    float v000, v001, v010, v011, v100, v101, v110, v111;
+    if (!in_neigh_bounds(*this,ix,iy,iz)) {   // We'll have to do some extrapolation
+      v000 = (float) this->operator()(ix,iy,iz);
+      v001 = (float) this->operator()(ix,iy,iz+1);
+      v010 = (float) this->operator()(ix,iy+1,iz);
+      v011 = (float) this->operator()(ix,iy+1,iz+1);
+      v100 = (float) this->operator()(ix+1,iy,iz);
+      v101 = (float) this->operator()(ix+1,iy,iz+1);
+      v110 = (float) this->operator()(ix+1,iy+1,iz);
+      v111 = (float) this->operator()(ix+1,iy+1,iz+1);
+    }
+    else {
+      T t000, t001, t010, t011, t100, t101, t110, t111;
+      this->getneighbours(ix,iy,iz,t000,t001,t010,t011,t100,t101,t110,t111);
+      v000 = ((float) t000); v001 = ((float) t001); v010 = ((float) t010);  
+      v011 = ((float) t011); v100 = ((float) t100); v101 = ((float) t101);  
+      v110 = ((float) t110); v111 = ((float) t111); 
+    }
+    //
+    // And do linear interpolation with calculation of all partials
+    //
+    float onemdz = 1.0-dz;
+    float onemdy = 1.0-dy;    
+    float tmp11 = onemdz*v000 + dz*v001;
+    float tmp12 = onemdz*v010 + dz*v011;
+    float tmp13 = onemdz*v100 + dz*v101;
+    float tmp14 = onemdz*v110 + dz*v111;
+    *dfdx = onemdy*(tmp13-tmp11) + dy*(tmp14-tmp12);
+    *dfdy = (1.0-dx)*(tmp12-tmp11) + dx*(tmp14-tmp13);
+    tmp11 = onemdy*v000 + dy*v010;
+    tmp12 = onemdy*v001 + dy*v011;
+    tmp13 = onemdy*v100 + dy*v110;
+    tmp14 = onemdy*v101 + dy*v111;
+    float tmp21 = (1.0-dx)*tmp11 + dx*tmp13;
+    float tmp22 = (1.0-dx)*tmp12 + dx*tmp14;
+    *dfdz = tmp22 - tmp21;
+    return(onemdz*tmp21 + dz*tmp22);
+  }
+
   template <class T>
   float volume<T>::interpolate(float x, float y, float z) const
     {
diff --git a/newimage.h b/newimage.h
index f644df00844c2f461579f4739dfce158527cb1cd..cba50b1efb43e6f65abff5d757c1fb2e9a093cba 100644
--- a/newimage.h
+++ b/newimage.h
@@ -181,6 +181,13 @@ namespace NEWIMAGE {
     volume<T> ROI() const;  // returns a new volume = ROI
     int copyROIonly(const volume<T>& source);
 
+    // Volume<->ColumnVector conversion
+
+    ReturnMatrix vec(const volume<T>& mask) const;
+    ReturnMatrix vec() const;
+    void insert_vec(const ColumnVector& pvec, const volume<T>& pmask);
+    void insert_vec(const ColumnVector& pvec);
+
     // SECONDARY PROPERTIES
     Matrix sform_mat() const { return StandardSpaceCoordMat; }
     int sform_code() const { return StandardSpaceTypeCode; }
@@ -259,6 +266,13 @@ namespace NEWIMAGE {
     inline bool in_bounds(int x, int y, int z) const
       { return ( (x>=0) && (y>=0) && (z>=0) 
 		 && (x<ColumnsX) && (y<RowsY) && (z<SlicesZ) ); }
+    bool in_bounds(float x, float y, float z) const
+    {
+      int ix=((int) floor(x)); 
+      int iy=((int) floor(y)); 
+      int iz=((int) floor(z));
+      return((ix>=0) && (iy>=0) && (iz>=0) && ((ix+1)<ColumnsX) && ((iy+1)<RowsY) && ((iz+1)<SlicesZ));
+    } 
     inline T& operator()(int x, int y, int z)
       { set_whole_cache_validity(false); 
         if (in_bounds(x,y,z)) return *(basicptr(x,y,z)); 
@@ -267,6 +281,18 @@ namespace NEWIMAGE {
       const { 	if (in_bounds(x,y,z)) return *(basicptr(x,y,z)); 
 	        else                  return extrapolate(x,y,z); }
     float interpolate(float x, float y, float z) const;
+    float interpolate(float x, float y, float z, bool *ep) const;
+    float interp1partial(// Input
+                         float x, float y, float z,  // Co-ordinates to get value for
+                         int     dir,                   // Direction for partial, 0->x, 1->y, 2->z
+                         // Output
+                         float  *pderiv                // Derivative returned here
+                         ) const;
+    float interp3partial(// Input
+                         float x, float y, float z,             // Co-ordinate to get value for
+                         // Output
+                         float *dfdx, float *dfdy, float *dfdz  // Partials
+                         ) const;
 
     inline T& value(int x, int y, int z)
       { set_whole_cache_validity(false); 
diff --git a/newimagefns.h b/newimagefns.h
index b03e27a69d98622f27b7ce1be1b7b2ac53f2b7e8..6ab5bbdd163947a8cd28f762f10dc3279db3a5fa 100644
--- a/newimagefns.h
+++ b/newimagefns.h
@@ -197,6 +197,138 @@ namespace NEWIMAGE {
 
   // IMAGE PROCESSING ROUTINES
 
+  // A set of routines for "general" (affine + displacement-field)
+  // transformations. The "actual routine" is raw_general_transform,
+  // and the others are mainly alternative interfaces for certain
+  // special cases.
+
+  template<class T>
+  void raw_general_transform(// Input
+                             const volume<T>&         vin,        // Input volume
+                             const Matrix&            aff,        // 4x4 affine transformation matrix
+                             const volume4D<float>&   df,         // Displacement fields
+                             const vector<int>&       defdir,     // Directions of displacements. 
+                             const vector<int>&       derivdir,   // Directions of derivatives
+                             // Output
+                             volume<T>&               vout,       // Output volume
+                             volume4D<T>&             deriv);     // Partial derivative directions
+
+  template <class T>
+  void raw_general_transform(// Input
+                             const volume<T>&         vin,        // Input volume
+                             const Matrix&            aff,        // 4x4 affine transformation matrix
+                             const volume4D<float>&   df,         // Displacement fields
+                             const vector<int>&       defdir,     // Directions of displacements. 
+                             const vector<int>&       derivdir,   // Directions of derivatives
+                             // Output
+                             volume<T>&               vout,       // Output volume
+                             volume4D<T>&             deriv,      // Partial derivative directions
+                             volume<char>&            invol);     // Mask indicating what voxels fell inside original volume
+
+  template <class T>    
+  void raw_general_transform(// Input
+                             const volume<T>&         vin,        // Input volume
+                             const Matrix&            aff,        // 4x4 affine transformation matrix
+                             const volume4D<float>&   df,         // Displacement fields
+                             const vector<int>&       defdir,     // Directions of displacements. 
+                             const vector<int>&       derivdir,   // Directions of derivatives
+                             // Output
+                             volume<T>&               vout,       // Output volume
+                             volume4D<T>&             deriv,      // Partial derivative directions
+                             volume<char>             *invol);    // Mask indicating what voxels fell inside original volume
+
+  template <class T>
+  void affine_transform_3partial(// Input
+                                 const volume<T>&     vin,
+                                 const Matrix&        aff,
+                                 // Output
+                                 volume<T>&           vout,
+                                 volume4D<T>&         deriv);
+
+  template <class T>
+  void affine_transform_3partial(// Input
+                                 const volume<T>&     vin,
+                                 const Matrix&        aff,
+                                 // Output
+                                 volume<T>&           vout,
+                                 volume4D<T>&         deriv,
+                                 volume<char>&        invol);
+  template <class T>
+  void displacement_transform_1D(// Input
+                                 const volume<T>&       vin,
+                                 const Matrix&          aff,
+                                 const volume<float>&   df,
+                                 int                    defdir,
+                                 // Output
+                                 volume<T>&             vout);
+
+  template <class T>
+  void displacement_transform_1D(// Input
+                                 const volume<T>&       vin,
+                                 const Matrix&          aff,
+                                 const volume<float>&   df,
+                                 int                    defdir,
+                                 // Output
+                                 volume<T>&             vout,
+                                 volume<char>&          invol);
+
+  template <class T>
+  void displacement_transform_1D_3partial(// Input
+                                          const volume<T>&       vin,
+                                          const Matrix&          aff,
+                                          const volume<float>&   df,
+                                          int                    dir,
+                                          // Output
+                                          volume<T>&             vout,
+                                          volume4D<T>&           deriv);
+
+  template <class T>
+  void displacement_transform_1D_3partial(// Input
+                                          const volume<T>&       vin,
+                                          const Matrix&          aff,
+                                          const volume<float>&   df,
+                                          int                    dir,
+                                          // Output
+                                          volume<T>&             vout,
+                                          volume4D<T>&           deriv,
+                                          volume<char>&          invol);
+
+  template <class T>
+  void general_transform(// Input
+                         const volume<T>&         vin,
+                         const Matrix&            aff,
+                         const volume4D<float>&   df,
+                         // Output
+                         volume<T>&               vout);
+
+  template <class T>	       
+  void general_transform(// Input
+                         const volume<T>&         vin,
+                         const Matrix&            aff,
+                         const volume4D<float>&   df,
+                         // Output
+                         volume<T>&               vout,
+                         volume<char>&            invol);
+
+  template <class T>
+  void general_transform_3partial(// Input
+                                  const volume<T>&         vin,
+                                  const Matrix&            aff,
+                                  const volume4D<float>&   df,
+                                  // Output
+                                  volume<T>&               vout,
+                                  volume4D<T>&             deriv);
+
+  template <class T>
+  void general_transform_3partial(// Input
+                                  const volume<T>&         vin,
+                                  const Matrix&            aff,
+                                  const volume4D<float>&   df,
+                                  // Output
+                                  volume<T>&               vout,
+                                  volume4D<T>&             deriv,
+                                  volume<char>&            invol);
+
   template <class T>
   void affine_transform(const volume<T>& vin, volume<T>& vout,
 			     const Matrix& aff, float paddingsize=0.0);
@@ -1048,6 +1180,414 @@ template <class T, class S>
   // IMAGE PROCESSING ROUTINES
   ///////////////////////////////////////////////////////////////////////////
 
+  // General Transform
+  //
+  // The form of a "general" transform is given by an affine matrix
+  // and a displacement-field such that 
+  // [x' y' z' 1]^T = \mathbf{M}*[x y z 1]^T + [d_x(x,y,z) d_y(x,y,z) d_z(x,y,z)]^T
+  // It can be used for non-linear registration of one subject to another (or to
+  // an atlas) or for distortion correction.
+  // The bits pertaining to the affine transform have been copied more or less straight
+  // off from MJ's code, but if we want a single resampling I saw no way to escape
+  // the code copying/multiplication.
+
+  template <class T>
+  void raw_general_transform(// Input
+                             const volume<T>&         vin,        // Input volume
+                             const Matrix&            aff,        // 4x4 affine transformation matrix
+                             const volume4D<float>&   df,         // Displacement fields
+                             const vector<int>&       defdir,     // Directions of displacements. 
+                             const vector<int>&       derivdir,   // Directions of derivatives
+                             // Output
+                             volume<T>&               vout,       // Output volume
+                             volume4D<T>&             deriv)      // Partial derivatives
+  {
+    raw_general_transform(vin,aff,df,defdir,derivdir,vout,deriv,0);
+  }
+
+  template <class T>
+  void raw_general_transform(// Input
+                             const volume<T>&         vin,        // Input volume
+                             const Matrix&            aff,        // 4x4 affine transformation matrix
+                             const volume4D<float>&   df,         // Displacement fields
+                             const vector<int>&       defdir,     // Directions of displacements. 
+                             const vector<int>&       derivdir,   // Directions of derivatives
+                             // Output
+                             volume<T>&               vout,       // Output volume
+                             volume4D<T>&             deriv,      // Partial derivative directions
+                             volume<char>&            invol)      // Mask indicating what voxels fell inside original volume
+  {
+    raw_general_transform(vin,aff,df,defdir,derivdir,vout,deriv,&invol);    
+  }
+
+  template <class T>    
+  void raw_general_transform(// Input
+                             const volume<T>&         vin,        // Input volume
+                             const Matrix&            aff,        // 4x4 affine transformation matrix
+                             const volume4D<float>&   df,         // Displacement fields
+                             const vector<int>&       defdir,     // Directions of displacements. 
+                             const vector<int>&       derivdir,   // Directions of derivatives
+                             // Output
+                             volume<T>&               vout,       // Output volume
+                             volume4D<T>&             deriv,      // Partial derivative directions
+                             volume<char>             *invol)     // Mask indicating what voxels fell inside original volume
+  {
+    if (int(defdir.size()) != df.tsize()) {imthrow("NEWIMAGE::raw_general_transform: Mismatch in input.",11);}
+    for (int i=0; i<int(defdir.size()); i++) {
+      if (defdir[i] < 0 || defdir[i] > 2) {imthrow("NEWIMAGE::raw_general_transform: Mismatch in input",11);}
+    }
+    if (int(derivdir.size()) != deriv.tsize()) {imthrow("NEWIMAGE::raw_general_transform: Mismatch in input",11);}
+    for (int i=0; i<int(derivdir.size()); i++) {
+      if (derivdir[i] < 0 || derivdir[i] > 2) {imthrow("NEWIMAGE::raw_general_transform: Mismatch in input",11);}
+    }
+    if (vout.nvoxels() <= 0) {imthrow("NEWIMAGE::raw_general_transform: Size of vout must be set",8);}
+    if (defdir.size() && !samesize(vout,df[0]))  {imthrow("NEWIMAGE::raw_general_transform: vout and df must have same dimensions",11);}
+    if (derivdir.size() && !samesize(vout,deriv[0]))  {imthrow("NEWIMAGE::raw_general_transform: vout and deriv must have same dimensions",11);}
+    if (invol && !samesize(vout,*invol)) {imthrow("NEWIMAGE::raw_general_transform: vout and invol must have same dimensions",11);}
+
+    extrapolation oldex = vin.getextrapolationmethod();
+    if ((oldex==boundsassert) || (oldex==boundsexception)) {vin.setextrapolationmethod(constpad);}
+
+    // Repackage info about displacement directions in more convenient form
+    int xd, yd, zd;
+    xd=yd=zd= -1;
+    for (int i=0; i<int(defdir.size()); i++) {
+      if (defdir[i]==0) {xd=i;} else if (defdir[i]==1) {yd=i;} else {zd=i;}
+    }
+    // Repackage info about derivative directions in more convenient form
+    int xp, yp, zp;
+    xp=yp=zp= -1;
+    for (int i=0; i<int(derivdir.size()); i++) {
+      if (derivdir[i]==0) {xp=i;} else if (derivdir[i]==1) {yp=i;} else {zp=i;}
+    }
+    
+    // Create a matrix M mapping from world-coordinates in output image
+    // to world-coordinates in input image, for the affine part only.
+
+    Matrix iaff = aff.i(); // Inverse world->world matrix
+    if (vin.left_right_order()==FSL_NEUROLOGICAL) {iaff = vin.swapmat(-1,2,3) * iaff;}
+    if (vout.left_right_order()==FSL_NEUROLOGICAL) {iaff = iaff * vout.swapmat(-1,2,3);}
+    iaff = vin.sampling_mat().i() * iaff * vout.sampling_mat(); // Inverse voxel->voxel matrix
+
+    float a11=iaff(1,1), a12=iaff(1,2), a13=iaff(1,3), a14=iaff(1,4);
+    float a21=iaff(2,1), a22=iaff(2,2), a23=iaff(2,3), a24=iaff(2,4);
+    float a31=iaff(3,1), a32=iaff(3,2), a33=iaff(3,3), a34=iaff(3,4); 
+    float o1,o2,o3;
+  
+    // The matrix algebra below has been hand-optimized from
+    // [o1 o2 o3] = a * [x y z]  at each iteration
+    // I have put some "outer if's" leading to code multiplication here.
+    // I have done so to ensure that we don't pay a performance penalty
+    // For example for the cases where we want to use the routine to get
+    // derivatives for the affine only case, or when we want to perform
+    // a general transformation without calculating any derivatives.
+
+    if (!defdir.size()) { // If we have an affine only transform
+      if (!derivdir.size()) { // If we don't need to calculate derivatives
+        for (int z=0; z<vout.zsize(); z++) { 
+          for (int x=0; x<vout.xsize(); x++) { 
+	    o1=x*a11 + z*a13 + a14;  // y=0
+	    o2=x*a21 + z*a23 + a24;  // y=0
+	    o3=x*a31 + z*a33 + a34;  // y=0
+ 	    for (int y=0; y<vout.ysize(); y++) {
+	      vout(x,y,z) = ((T) vin.interpolate(o1,o2,o3));
+              if (invol) {invol->operator()(x,y,z) = (vin.in_bounds(o1,o2,o3)) ? 1 : 0;}
+	      o1 += a12;
+	      o2 += a22;
+	      o3 += a32;
+	    }
+          }
+        }
+      }
+      else { // If we need derivatives in at least one direction
+        for (int z=0; z<vout.zsize(); z++) { 
+          for (int x=0; x<vout.xsize(); x++) { 
+	    o1=x*a11 + z*a13 + a14;  // y=0
+	    o2=x*a21 + z*a23 + a24;  // y=0
+	    o3=x*a31 + z*a33 + a34;  // y=0
+ 	    for (int y=0; y<vout.ysize(); y++) {
+              if (derivdir.size() == 1) { // If we want a single partial
+                float tmp;
+	        vout(x,y,z) = ((T) vin.interp1partial(o1,o2,o3,derivdir[0],&tmp));
+                deriv(x,y,z,0) = ((T) tmp);
+	      }
+              else { // More than one derivative
+		float tmp1,tmp2,tmp3;
+		vout(x,y,z) = ((T) vin.interp3partial(o1,o2,o3,&tmp1,&tmp2,&tmp3));
+                if (!(xp<0)) {deriv(x,y,z,xp)=((T) tmp1);}
+                if (!(yp<0)) {deriv(x,y,z,yp)=((T) tmp2);}
+                if (!(zp<0)) {deriv(x,y,z,zp)=((T) tmp3);}
+	      }
+              if (invol) {invol->operator()(x,y,z) = (vin.in_bounds(o1,o2,o3)) ? 1 : 0;}
+	      o1 += a12;
+	      o2 += a22;
+	      o3 += a32;
+	    }
+          }
+        }
+      }
+    }
+    else { // We have displacements in at least one direction
+      float oo1,oo2,oo3;
+      if (derivdir.size()) { // If we need to calculate derivatives in at least one direction
+        for (int z=0; z<vout.zsize(); z++) { 
+          for (int x=0; x<vout.xsize(); x++) { 
+	    o1=x*a11 + z*a13 + a14;  // y=0
+	    o2=x*a21 + z*a23 + a24;  // y=0
+	    o3=x*a31 + z*a33 + a34;  // y=0
+ 	    for (int y=0; y<vout.ysize(); y++) {
+              if (xd<0) {oo1=o1;} else {oo1=o1+df(x,y,z,xd);}
+              if (yd<0) {oo2=o2;} else {oo2=o2+df(x,y,z,yd);}
+              if (zd<0) {oo3=o3;} else {oo3=o3+df(x,y,z,zd);}
+              if (derivdir.size() == 1) { // If we want a single partial
+                float tmp;
+	        vout(x,y,z) = ((T) vin.interp1partial(oo1,oo2,oo3,derivdir[0],&tmp));
+                deriv(x,y,z,0) = ((T) tmp);
+	      }
+              else { // More than one derivative
+		float tmp1,tmp2,tmp3;
+		vout(x,y,z) = ((T) vin.interp3partial(oo1,oo2,oo3,&tmp1,&tmp2,&tmp3));
+                if (!(xp<0)) {deriv(x,y,z,xp)=((T) tmp1);}
+                if (!(yp<0)) {deriv(x,y,z,yp)=((T) tmp2);}
+                if (!(zp<0)) {deriv(x,y,z,zp)=((T) tmp3);}
+	      }
+              if (invol) {invol->operator()(x,y,z) = (vin.in_bounds(oo1,oo2,oo3)) ? 1 : 0;}
+	      o1 += a12;
+	      o2 += a22;
+	      o3 += a32;
+	    }
+          }
+        }
+      }
+      else { // If we don't need derivatives
+        for (int z=0; z<vout.zsize(); z++) { 
+          for (int x=0; x<vout.xsize(); x++) { 
+	    o1=x*a11 + z*a13 + a14;  // y=0
+	    o2=x*a21 + z*a23 + a24;  // y=0
+	    o3=x*a31 + z*a33 + a34;  // y=0
+ 	    for (int y=0; y<vout.ysize(); y++) {
+              if (xd<0) {oo1=o1;} else {oo1=o1+df(x,y,z,xd);}
+              if (yd<0) {oo2=o2;} else {oo2=o2+df(x,y,z,yd);}
+              if (zd<0) {oo3=o3;} else {oo3=o3+df(x,y,z,zd);}
+	      vout(x,y,z) = ((T) vin.interpolate(oo1,oo2,oo3));
+              if (invol) {invol->operator()(x,y,z) = (vin.in_bounds(oo1,oo2,oo3)) ? 1 : 0;}
+	      o1 += a12;
+	      o2 += a22;
+	      o3 += a32;
+	    }
+          }
+        }
+      }
+    }
+
+    // Set the sform and qform appropriately (if set)
+    // That is, copy the sform from vout if it is set, otherwise use
+    // the transformed one from vin
+    // Always copy the transformed qform (if set)
+    Matrix nmat;
+    if ( (vout.sform_code()==NIFTI_XFORM_UNKNOWN) &&
+         (vout.qform_code()!=NIFTI_XFORM_UNKNOWN) ) {
+      vout.set_sform(vout.qform_code(), vout.qform_mat()); // Copy qform->sform
+    }
+    else if ( (vout.qform_code()==NIFTI_XFORM_UNKNOWN) &&
+	      (vout.sform_code()!=NIFTI_XFORM_UNKNOWN) ) {
+      vout.set_qform(vout.sform_code(), vout.sform_mat()); // Copy sform->qform
+    }
+    else if ( (vout.qform_code()==NIFTI_XFORM_UNKNOWN) &&
+	      (vout.sform_code()==NIFTI_XFORM_UNKNOWN) ) {
+      if (vin.sform_code()!=NIFTI_XFORM_UNKNOWN) {
+	nmat = vin.sform_mat() * iaff;
+	vout.set_sform(vin.sform_code(), nmat);
+	vout.set_qform(vin.sform_code(), nmat); // Copy transformed in-sform->(sform & qform)
+      } 
+      else if (vin.qform_code()!=NIFTI_XFORM_UNKNOWN) {
+	nmat = vin.qform_mat() * iaff;
+	vout.set_sform(vin.qform_code(), nmat);
+	vout.set_qform(vin.qform_code(), nmat); // Copy transformed in-qform->(sform & qform)
+      }
+    }
+    
+    // restore settings and return
+    vin.setextrapolationmethod(oldex);
+
+    // All done!  
+  }
+  
+  // The following handful of routines are simplified interfaces to 
+  // raw_general_transform that may be convenient for certain
+  // specific applications.
+
+  template <class T>
+  void affine_transform_3partial(// Input
+                                 const volume<T>&     vin,
+                                 const Matrix&        aff,
+                                 // Output
+                                 volume<T>&           vout,
+                                 volume4D<T>&         deriv)
+  {
+    volume4D<float>  pdf;
+    vector<int>      pdefdir;
+    vector<int>      pderivdir(3);
+    for (int i=0; i<3; i++) {pderivdir[i] = i;}
+    raw_general_transform(vin,aff,pdf,pdefdir,pderivdir,vout,deriv);
+  }
+  
+  template <class T>
+  void affine_transform_3partial(// Input
+                                 const volume<T>&     vin,
+                                 const Matrix&        aff,
+                                 // Output
+                                 volume<T>&           vout,
+                                 volume4D<T>&         deriv,
+                                 volume<char>&        invol)
+  {
+    volume4D<float>  pdf;
+    vector<int>      pdefdir;
+    vector<int>      pderivdir(3);
+    for (int i=0; i<3; i++) {pderivdir[i] = i;}
+    raw_general_transform(vin,aff,pdf,pdefdir,pderivdir,vout,deriv,invol);
+  }
+  
+  template <class T>
+  void displacement_transform_1D(// Input
+                                 const volume<T>&       vin,
+                                 const Matrix&          aff,
+                                 const volume<float>&   df,
+                                 int                    defdir,
+                                 // Output
+                                 volume<T>&             vout)
+  {
+    volume4D<float>  pdf;
+    vector<int>      pdefdir(1,defdir);
+    vector<int>      pderivdir;
+    volume4D<T>      pderiv;
+
+    pdf.addvolume(df);
+    raw_general_transform(vin,aff,pdf,pdefdir,pderivdir,vout,pderiv);
+  }
+
+  template <class T>
+  void displacement_transform_1D(// Input
+                                 const volume<T>&       vin,
+                                 const Matrix&          aff,
+                                 const volume<float>&   df,
+                                 int                    defdir,
+                                 // Output
+                                 volume<T>&             vout,
+                                 volume<char>&          invol)
+  {
+    volume4D<float>  pdf;
+    vector<int>      pdefdir(1,defdir);
+    vector<int>      pderivdir;
+    volume4D<T>      pderiv;
+
+    pdf.addvolume(df);
+    raw_general_transform(vin,aff,pdf,pdefdir,pderivdir,vout,pderiv,invol);
+  }
+  
+  template <class T>
+  void displacement_transform_1D_3partial(// Input
+                                          const volume<T>&       vin,
+                                          const Matrix&          aff,
+                                          const volume<float>&   df,
+                                          int                    dir,
+                                          // Output
+                                          volume<T>&             vout,
+                                          volume4D<T>&           deriv)
+  {
+    volume4D<float>      pdf;
+    vector<int>          pdefdir(1,dir);
+    vector<int>          pderivdir(3);
+    
+    for (int i=0; i<3; i++) {pderivdir[i] = i;}
+    pdf.addvolume(df);
+    raw_general_transform(vin,aff,pdf,pdefdir,pderivdir,vout,deriv);    
+  }
+			
+  template <class T>
+  void displacement_transform_1D_3partial(// Input
+                                          const volume<T>&       vin,
+                                          const Matrix&          aff,
+                                          const volume<float>&   df,
+                                          int                    dir,
+                                          // Output
+                                          volume<T>&             vout,
+                                          volume4D<T>&           deriv,
+                                          volume<char>&          invol)
+  {
+    volume4D<float>      pdf;
+    vector<int>          pdefdir(1,dir);
+    vector<int>          pderivdir(3);
+    
+    for (int i=0; i<3; i++) {pderivdir[i] = i;}
+    pdf.addvolume(df);
+    raw_general_transform(vin,aff,pdf,pdefdir,pderivdir,vout,deriv,invol);    
+  }
+			
+  template <class T>	       
+  void general_transform(// Input
+                         const volume<T>&         vin,
+                         const Matrix&            aff,
+                         const volume4D<float>&   df,
+                         // Output
+                         volume<T>&               vout)
+  {
+    vector<int>       pdefdir(3);
+    vector<int>       pderivdir;
+    volume4D<T>  pderiv;
+
+    for (int i=0; i<3; i++) {pdefdir[i] = i;}
+    raw_general_transform(vin,aff,df,pdefdir,pderivdir,vout,pderiv);
+  }
+
+  template <class T>	       
+  void general_transform(// Input
+                         const volume<T>&         vin,
+                         const Matrix&            aff,
+                         const volume4D<float>&   df,
+                         // Output
+                         volume<T>&               vout,
+                         volume<char>&            invol)
+  {
+    vector<int>       pdefdir(3);
+    vector<int>       pderivdir;
+    volume4D<T>  pderiv;
+
+    for (int i=0; i<3; i++) {pdefdir[i] = i;}
+    raw_general_transform(vin,aff,df,pdefdir,pderivdir,vout,pderiv,invol);
+  }
+
+  template <class T>
+  void general_transform_3partial(// Input
+                                  const volume<T>&         vin,
+                                  const Matrix&            aff,
+                                  const volume4D<float>&   df,
+                                  // Output
+                                  volume<T>&               vout,
+                                  volume4D<T>&             deriv)
+  {
+    vector<int>   dir(3);
+    for (int i=0; i<3; i++) {dir[i] = i;}
+
+    raw_general_transform(vin,aff,df,dir,dir,vout,deriv);
+  }
+                                
+  template <class T>
+  void general_transform_3partial(// Input
+                                  const volume<T>&         vin,
+                                  const Matrix&            aff,
+                                  const volume4D<float>&   df,
+                                  // Output
+                                  volume<T>&               vout,
+                                  volume4D<T>&             deriv,
+                                  volume<char>&            invol)
+  {
+    vector<int>   dir(3);
+    for (int i=0; i<3; i++) {dir[i] = i;}
+
+    raw_general_transform(vin,aff,df,dir,dir,vout,deriv,invol);
+  }
+                                
   // AFFINE TRANSFORM
 
   template <class T>
@@ -1169,6 +1709,7 @@ template <class T, class S>
 
       // The matrix algebra below has been hand-optimized from
       //  [o1 o2 o3] = a * [x y z]  at each iteration
+
       for (int z=0; z<vout.zsize(); z++) { 
 	for (int x=0; x<vout.xsize(); x++) { 
 	  o1=x*a11 + z*a13 + a14;  // y=0