46 #include <visp/vpMatrix.h>
47 #include <visp/vpException.h>
48 #include <visp/vpMatrixException.h>
49 #include <visp/vpDebug.h>
72 template<
unsigned int>
inline void GEMMsize(
const vpMatrix & ,
const vpMatrix & ,
unsigned int &,
unsigned int &,
unsigned int &,
unsigned int &){}
74 template<>
void inline GEMMsize<0>(
const vpMatrix & A,
const vpMatrix & B,
unsigned int &Arows,
unsigned int &Acols,
unsigned int &Brows,
unsigned int &Bcols){
81 template<>
inline void GEMMsize<1>(
const vpMatrix & A,
const vpMatrix & B,
unsigned int &Arows,
unsigned int &Acols,
unsigned int &Brows,
unsigned int &Bcols){
87 template<>
inline void GEMMsize<2>(
const vpMatrix & A,
const vpMatrix & B,
unsigned int &Arows,
unsigned int &Acols,
unsigned int &Brows,
unsigned int &Bcols){
93 template<>
inline void GEMMsize<3>(
const vpMatrix & A,
const vpMatrix & B,
unsigned int &Arows,
unsigned int &Acols,
unsigned int &Brows,
unsigned int &Bcols){
100 template<>
inline void GEMMsize<4>(
const vpMatrix & A,
const vpMatrix & B,
unsigned int &Arows,
unsigned int &Acols,
unsigned int &Brows,
unsigned int &Bcols){
107 template<>
inline void GEMMsize<5>(
const vpMatrix & A,
const vpMatrix & B,
unsigned int &Arows,
unsigned int &Acols,
unsigned int &Brows,
unsigned int &Bcols){
114 template<>
inline void GEMMsize<6>(
const vpMatrix & A,
const vpMatrix & B,
unsigned int &Arows,
unsigned int &Acols,
unsigned int &Brows,
unsigned int &Bcols){
121 template<>
inline void GEMMsize<7>(
const vpMatrix & A,
const vpMatrix & B,
unsigned int &Arows,
unsigned int &Acols,
unsigned int &Brows,
unsigned int &Bcols){
130 template<
unsigned int>
inline void GEMM1(
const unsigned int &,
const unsigned int &,
const unsigned int &,
const vpMatrix & ,
const vpMatrix & ,
const double & ,
vpMatrix &){}
132 template<>
inline void GEMM1<0>(
const unsigned int &Arows,
const unsigned int &Brows,
const unsigned int &Bcols,
const vpMatrix & A,
const vpMatrix & B,
const double & alpha,
vpMatrix &D){
133 for(
unsigned int r=0;r<Arows;r++)
134 for(
unsigned int c=0;c<Bcols;c++){
136 for(
unsigned int n=0;n<Brows;n++)
137 sum+=A[r][n]*B[n][c]*alpha;
142 template<>
inline void GEMM1<1>(
const unsigned int &Arows,
const unsigned int &Brows,
const unsigned int &Bcols,
const vpMatrix & A,
const vpMatrix & B,
const double & alpha,
vpMatrix &D){
143 for(
unsigned int r=0;r<Arows;r++)
144 for(
unsigned int c=0;c<Bcols;c++){
146 for(
unsigned int n=0;n<Brows;n++)
147 sum+=A[n][r]*B[n][c]*alpha;
152 template<>
inline void GEMM1<2>(
const unsigned int &Arows,
const unsigned int &Brows,
const unsigned int &Bcols,
const vpMatrix & A,
const vpMatrix & B,
const double & alpha,
vpMatrix &D){
153 for(
unsigned int r=0;r<Arows;r++)
154 for(
unsigned int c=0;c<Bcols;c++){
156 for(
unsigned int n=0;n<Brows;n++)
157 sum+=A[r][n]*B[c][n]*alpha;
162 template<>
inline void GEMM1<3>(
const unsigned int &Arows,
const unsigned int &Brows,
const unsigned int &Bcols,
const vpMatrix & A,
const vpMatrix & B,
const double & alpha,
vpMatrix &D){
163 for(
unsigned int r=0;r<Arows;r++)
164 for(
unsigned int c=0;c<Bcols;c++){
166 for(
unsigned int n=0;n<Brows;n++)
167 sum+=A[n][r]*B[c][n]*alpha;
172 template<
unsigned int>
inline void GEMM2(
const unsigned int &,
const unsigned int &,
const unsigned int &,
const vpMatrix & ,
const vpMatrix & ,
const double & ,
const vpMatrix & ,
const double &,
vpMatrix &){}
174 template<>
inline void GEMM2<0>(
const unsigned int &Arows,
const unsigned int &Brows,
const unsigned int &Bcols,
const vpMatrix & A,
const vpMatrix & B,
const double & alpha,
const vpMatrix & C ,
const double &beta,
vpMatrix &D){
176 for(
unsigned int r=0;r<Arows;r++)
177 for(
unsigned int c=0;c<Bcols;c++){
179 for(
unsigned int n=0;n<Brows;n++)
180 sum+=A[r][n]*B[n][c]*alpha;
181 D[r][c]=sum+C[r][c]*beta;
185 template<>
inline void GEMM2<1>(
const unsigned int &Arows,
const unsigned int &Brows,
const unsigned int &Bcols,
const vpMatrix & A,
const vpMatrix & B,
const double & alpha,
const vpMatrix & C ,
const double &beta,
vpMatrix &D){
186 for(
unsigned int r=0;r<Arows;r++)
187 for(
unsigned int c=0;c<Bcols;c++){
189 for(
unsigned int n=0;n<Brows;n++)
190 sum+=A[n][r]*B[n][c]*alpha;
191 D[r][c]=sum+C[r][c]*beta;
195 template<>
inline void GEMM2<2>(
const unsigned int &Arows,
const unsigned int &Brows,
const unsigned int &Bcols,
const vpMatrix & A,
const vpMatrix & B,
const double & alpha,
const vpMatrix & C ,
const double &beta,
vpMatrix &D){
196 for(
unsigned int r=0;r<Arows;r++)
197 for(
unsigned int c=0;c<Bcols;c++){
199 for(
unsigned int n=0;n<Brows;n++)
200 sum+=A[r][n]*B[c][n]*alpha;
201 D[r][c]=sum+C[r][c]*beta;
205 template<>
inline void GEMM2<3>(
const unsigned int &Arows,
const unsigned int &Brows,
const unsigned int &Bcols,
const vpMatrix & A,
const vpMatrix & B,
const double & alpha,
const vpMatrix & C ,
const double &beta,
vpMatrix &D){
206 for(
unsigned int r=0;r<Arows;r++)
207 for(
unsigned int c=0;c<Bcols;c++){
209 for(
unsigned int n=0;n<Brows;n++)
210 sum+=A[n][r]*B[c][n]*alpha;
211 D[r][c]=sum+C[r][c]*beta;
216 template<>
inline void GEMM2<4>(
const unsigned int &Arows,
const unsigned int &Brows,
const unsigned int &Bcols,
const vpMatrix & A,
const vpMatrix & B,
const double & alpha,
const vpMatrix & C ,
const double &beta,
vpMatrix &D){
217 for(
unsigned int r=0;r<Arows;r++)
218 for(
unsigned int c=0;c<Bcols;c++){
220 for(
unsigned int n=0;n<Brows;n++)
221 sum+=A[r][n]*B[n][c]*alpha;
222 D[r][c]=sum+C[c][r]*beta;
226 template<>
inline void GEMM2<5>(
const unsigned int &Arows,
const unsigned int &Brows,
const unsigned int &Bcols,
const vpMatrix & A,
const vpMatrix & B,
const double & alpha,
const vpMatrix & C ,
const double &beta,
vpMatrix &D){
227 for(
unsigned int r=0;r<Arows;r++)
228 for(
unsigned int c=0;c<Bcols;c++){
230 for(
unsigned int n=0;n<Brows;n++)
231 sum+=A[n][r]*B[n][c]*alpha;
232 D[r][c]=sum+C[c][r]*beta;
237 template<>
inline void GEMM2<6>(
const unsigned int &Arows,
const unsigned int &Brows,
const unsigned int &Bcols,
const vpMatrix & A,
const vpMatrix & B,
const double & alpha,
const vpMatrix & C ,
const double &beta,
vpMatrix &D){
238 for(
unsigned int r=0;r<Arows;r++)
239 for(
unsigned int c=0;c<Bcols;c++){
241 for(
unsigned int n=0;n<Brows;n++)
242 sum+=A[r][n]*B[c][n]*alpha;
243 D[r][c]=sum+C[c][r]*beta;
247 template<>
inline void GEMM2<7>(
const unsigned int &Arows,
const unsigned int &Brows,
const unsigned int &Bcols,
const vpMatrix & A,
const vpMatrix & B,
const double & alpha,
const vpMatrix & C ,
const double &beta,
vpMatrix &D){
249 for(
unsigned int r=0;r<Arows;r++)
250 for(
unsigned int c=0;c<Bcols;c++){
252 for(
unsigned int n=0;n<Brows;n++)
253 sum+=A[n][r]*B[c][n]*alpha;
254 D[r][c]=sum+C[c][r]*beta;
258 template<
unsigned int T>
inline void vpTGEMM(
const vpMatrix & A,
const vpMatrix & B,
const double & alpha ,
const vpMatrix & C,
const double & beta,
vpMatrix & D){
266 GEMMsize<T>(A,B,Arows,Acols,Brows,Bcols);
276 std::cout << me << std::endl ;
295 GEMM2<T>(Arows,Brows,Bcols,A,B,alpha,C,beta,D);
297 GEMM1<T>(Arows,Brows,Bcols,A,B,alpha,D);
334 vpTGEMM<0>( A, B, alpha , C, beta, D);
337 vpTGEMM<1>( A, B, alpha , C, beta, D);
340 vpTGEMM<2>( A, B, alpha , C, beta, D);
343 vpTGEMM<3>( A, B, alpha , C, beta, D);
346 vpTGEMM<4>( A, B, alpha , C, beta, D);
349 vpTGEMM<5>( A, B, alpha , C, beta, D);
352 vpTGEMM<6>( A, B, alpha , C, beta, D);
355 vpTGEMM<7>( A, B, alpha , C, beta, D);
358 vpERROR_TRACE(
"\n\t\tvpMatrix mismatch operation in vpGEMM") ;