### 向量和矩阵的表达式模板

void cblas_dgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA,
const enum CBLAS_TRANSPOSE TransB, const int M, const int N,
const int K, const double alpha, const double *A,
const int lda, const double *B, const int ldb,
const double beta, double *C, const int ldc);


$$C = \alpha A B + \beta C.$$

template <class Derived, class Value, class Reference>
struct vector_proxy {
...
size_t size() const;
value_type get(size_t idx) const;
reference_type get(size_t idx);
...
};


template <class Derived, class Value, class Reference>
struct vector_proxy {
...

template <class R>
void assign(const vector_proxy<R> &r) {
size_t n = size();
assert(r.size() == n);
for (size_t i = 0; i < n; ++i) {
get(i) = r.get(i);
}
}

template <class R>
void parallel_assign(const vector_proxy<R> &r) {
size_t n = size();
assert(r.size() == n);
#pragma omp parallel for
for (size_t i = 0; i < n; ++i) {
get(i) = r.get(i);
}
}

...

}

matrix_proxyvector_proxy 完全类似.

value_type get(size_t i, size_t j) {
return vec_inner_prod(m_A.row(i), m_B.col(j));
}


gemm_proxy 里的 row(i), col(j) 可以采用矩阵和向量的乘积 gemv(m_B, m_A.row(i)), gemv(m_A, m_B.col(j)). 在 gemv_proxy 里, 还是用 vec_inner_prod 来自动区分 dense/sparse 的向量.
gemm(A, B) 在 A, B 都是稀疏矩阵时, 情况比较复杂, 使 gemv 返回一个稀 疏向量的实现效率比较低, 我直接将结果当成 dense 了.