replaced loops with calls to VOLK kernels

2025-05-05 23:09:21 +00:00 · 2019-09-09 17:41:20 +02:00 · 2019-09-09 17:41:20 +02:00 · 65b73f8258
parent 09ef3402aa
commit 65b73f8258
8 changed files with 56 additions and 50 deletions
--- a/lib/adaptive_dfe_impl.cc
+++ b/lib/adaptive_dfe_impl.cc
@ -70,6 +70,7 @@ adaptive_dfe_impl::adaptive_dfe_impl(int sps, // samples per symbol
  , _mu(mu)
  , _alpha(alpha)
  , _use_symbol_taps(true)
+  , _tmp()
  , _taps_samples()
  , _taps_symbols()
  , _hist_symbols()
@ -185,7 +186,7 @@ adaptive_dfe_impl::general_work(int noutput_items,
        }
        // rotate samples
        if (i == i0) {
-#if 0
+#ifdef USE_VOLK_ROTATOR
          _rotator.rotateN(&_rotated_samples[0] + i - _nB,
                           in      + i - _nB,
                           _nB+_nF+1);
@ -194,7 +195,7 @@ adaptive_dfe_impl::general_work(int noutput_items,
            _rotated_samples[j + i-_nB] = _rotator.rotate(in[j + i-_nB]);
 #endif
        } else {
-#if 0
+#ifdef USE_VOLK_ROTATOR
          _rotator.rotateN(&_rotated_samples[0] + i + _nF+1 - _sps,
                           in      + i + _nF+1 - _sps,
                           _sps);
@ -219,6 +220,7 @@ adaptive_dfe_impl::general_work(int noutput_items,
 bool adaptive_dfe_impl::start()
 {
  gr::thread::scoped_lock lock(d_setlock);
+  _tmp.resize(_nB+_nF+1);
  _taps_samples.resize(_nB+_nF+1);
  _last_taps_samples.resize(_nB+_nF+1);
  _taps_symbols.resize(_nW);
@ -227,8 +229,8 @@ bool adaptive_dfe_impl::start()
  GR_LOG_DEBUG(d_logger,str(boost::format("adaptive_dfe_impl::start() nB=%d nF=%d mu=%f alpha=%f")
                             % _nB % _nF % _mu % _alpha));

-  //_filter_update = lms::make(_mu);
-  _filter_update = rls::make(0.001, 0.9999);
+  _filter_update = lms::make(_mu);
+  //_filter_update = rls::make(0.001, 0.9999);
  return true;
 }
 bool adaptive_dfe_impl::stop()
@ -290,11 +292,9 @@ gr_complex adaptive_dfe_impl::filter(gr_complex const* start, gr_complex const*
      std::cout << "err= " << std::abs(err) << std::endl;
    //       taps_samples
    gr_complex const* gain = _filter_update->update(start, end);
-    for (int j=0; j<_nB+_nF+1; ++j) {
-      _last_taps_samples[j] = _taps_samples[j];
-      _taps_samples[j]     += _mu*std::conj(start[j]) * err;
-//      _taps_samples[j]     += gain[j] * err;
-    }
+    std::copy(_taps_samples.begin(), _taps_samples.end(), _last_taps_samples.begin());
+    volk_32fc_s32fc_multiply_32fc(&_tmp[0],           gain,              err,     _nB+_nF+1);
+    volk_32fc_x2_add_32fc        (&_taps_samples[0], &_taps_samples[0], &_tmp[0], _nB+_nF+1);
    //       taps_symbols
    if (_use_symbol_taps) {
      for (int j=0; j<_nW; ++j) {
@ -310,9 +310,7 @@ gr_complex adaptive_dfe_impl::filter(gr_complex const* start, gr_complex const*
  if (update_taps) {
    if (_symbol_counter != 0) { // a filter tap shift might have ocurred when _symbol_counter==0
      gr_complex acc(0);
-      for (int j=0; j<_nB+_nF+1; ++j) {
-        acc += std::conj(_last_taps_samples[j]) * _taps_samples[j];
-      }
+      volk_32fc_x2_conjugate_dot_prod_32fc(&acc, &_taps_samples[0], &_last_taps_samples[0], _nB+_nF+1);
      float const frequency_err = gr::fast_atan2f(acc)/(0+1*_num_samples_since_filter_update); // frequency error (rad/sample)
      GR_LOG_DEBUG(d_logger, str(boost::format("frequency_err= %f %d") % frequency_err % _num_samples_since_filter_update));
      _control_loop.advance_loop(frequency_err);
@ -334,13 +332,6 @@ gr_complex adaptive_dfe_impl::filter(gr_complex const* start, gr_complex const*

 int
 adaptive_dfe_impl::recenter_filter_taps() {
-#if 0
-  ssize_t const _idx_max = std::distance(_taps_samples.begin(),
-                                         std::max_element(_taps_samples.begin()+_nB+1-3*_sps, _taps_samples.begin()+_nB+1+3*_sps,
-                                                         [](gr_complex a, gr_complex b) {
-                                                           return std::norm(a) < std::norm(b);
-                                                         }));
-#else
  float sum_w=0, sum_wi=0;
  for (int i=0; i<_nB+_nF+1; ++i) {
    float const w = std::norm(_taps_samples[i]);
@ -348,7 +339,7 @@ adaptive_dfe_impl::recenter_filter_taps() {
    sum_wi += w*i;
  }
  ssize_t const idx_max = ssize_t(0.5 + sum_wi/sum_w);
-#endif
+
  // GR_LOG_DEBUG(d_logger, str(boost::format("idx_max=%2d abs(tap_max)=%f") % idx_max % std::abs(_taps_samples[idx_max])));
  if (idx_max-_nB-1 > +2*_sps) {
    // maximum is right of the center tap
@ -445,7 +436,6 @@ void adaptive_dfe_impl::update_frame_info(pmt::pmt_t data)
      // GR_LOG_DEBUG(d_logger, str(boost::format("XOR %3d %3d %d") % i % j % _scramble_xor[i][j]));
    }
  }
-
  assert(_symbols.size() == _scramble.size());
  _descrambled_symbols.resize(_symbols.size());
  _vec_soft_decisions.clear();
--- a/lib/adaptive_dfe_impl.h
+++ b/lib/adaptive_dfe_impl.h
@ -60,7 +60,6 @@ private:
 class adaptive_dfe_impl : public adaptive_dfe {
 private:
  typedef std::vector<gr_complex, volk_allocator<gr_complex> > gr_complex_vec_type;
-//  typedef std::vector<gr_complex> gr_complex_vec_type;

  int _sps;
  int _nB, _nF, _nW;
@ -71,6 +70,7 @@ private:

  bool _use_symbol_taps;

+  gr_complex_vec_type _tmp;
  gr_complex_vec_type _taps_samples;
  gr_complex_vec_type _taps_symbols;
  gr_complex_vec_type _last_taps_samples;
@ -105,7 +105,7 @@ private:
  } _state;

  filter_update::sptr _filter_update;
-//  void update_constellations(boost::python::object obj);
+
  void update_constellations(pmt::pmt_t );
  void update_frame_info(pmt::pmt_t );

@ -135,7 +135,7 @@ public:
                           gr_vector_const_void_star &input_items,
                           gr_vector_void_star &output_items);

-  virtual void set_mu(float mu) { _mu = mu; }
+  virtual void set_mu(float mu) { _mu = mu; _filter_update->set_parameters({{"mu", mu}}); }
  virtual void set_alpha(float alpha) { _alpha = alpha; }
 } ;

--- a/lib/doppler_correction_cc_impl.cc
+++ b/lib/doppler_correction_cc_impl.cc
@ -150,7 +150,7 @@ doppler_correction_cc_impl::work(int noutput_items,
    } // CONSUME_AND_SKIP
  }
  // apply current doppler correction to all produced samples
-#if 0 // rotateN is broken in some older VOLK versions
+#ifdef USE_VOLK_ROTATOR
  _rotator.rotateN(out, in, nout);
 #else
  for (int i=0; i<nout; ++i)
--- a/lib/filter_update.hpp
+++ b/lib/filter_update.hpp
@ -3,6 +3,7 @@
 #ifndef _LIB_FILTER_UPDATE_HPP_
 #define _LIB_FILTER_UPDATE_HPP_

+#include <map>
 #include <memory>
 #include <string>

@ -18,7 +19,7 @@ public:

  virtual void reset() = 0;
  virtual gr_complex const* update(gr_complex const*, gr_complex const*) = 0;
-
+  virtual void set_parameters(std::map<std::string, float>const &) = 0;
 protected:
 private:
 } ;
--- a/lib/lms.cc
+++ b/lib/lms.cc
@ -13,7 +13,8 @@ filter_update::sptr lms::make(float mu) {

 lms::lms(float mu)
  : _mu(mu)
-  , _gain() {
+  , _gain()
+  , _tmp() {
 }

 lms::~lms() {
@ -23,6 +24,7 @@ void lms::resize(size_t n) {
  if (_gain.size() == n)
    return;
  _gain.resize(n);
+  _tmp.resize(n);
  std::fill_n(_gain.begin(), n, 0);
 }

@ -33,13 +35,17 @@ void lms::reset() {
 gr_complex const* lms::update(gr_complex const* beg,
                              gr_complex const* end) {
  assert(end-beg > 0);
-  size_t n = end - beg;
+  size_t const n = end - beg;
  resize(n);
-  for (size_t i=0; i<n; ++i)
-    _gain[i] = _mu * std::conj(beg[i]);
+  volk_32fc_conjugate_32fc(&_tmp[0], beg, n);
+  volk_32f_s32f_multiply_32f((float*)&_gain[0], (float const*)&_tmp[0], _mu, 2*n);

  return &_gain.front();
 }

+void lms::set_parameters(std::map<std::string, float>const & p) {
+  _mu = p.at("mu");
+}
+
 } // namespace digitalhf
 } // namespace gr
--- a/lib/lms.hpp
+++ b/lib/lms.hpp
@ -21,13 +21,16 @@ public:

  virtual void reset();
  virtual gr_complex const* update(gr_complex const*, gr_complex const*);
+  virtual void set_parameters(std::map<std::string, float>const &);

 protected:
  void resize(size_t);

 private:
-  float _mu;
-  std::vector<gr_complex, volk_allocator<gr_complex> > _gain;
+  typedef std::vector<gr_complex, volk_allocator<gr_complex> > vec_type;
+  float    _mu;
+  vec_type _gain;
+  vec_type _tmp;
 } ;

 } // namespace digitalhf
--- a/lib/rls.cc
+++ b/lib/rls.cc
@ -26,6 +26,8 @@ void rls::resize(size_t n) {
    return;
  _gain.resize(n);
  _inv_corr.resize(n*n);
+  _pu.resize(n);
+  _tmp.resize(n);
  reset();
 }

@ -43,30 +45,30 @@ gr_complex const* rls::update(gr_complex const* beg,
  unsigned const n = end - beg;
  resize(n);

-  std::vector<gr_complex, volk_allocator<gr_complex> > pu(n), tmp(n);
-  //pu.resize(n);
-
  for (unsigned i=0; i<n; ++i)
-    volk_32fc_x2_dot_prod_32fc(&pu[i], &_inv_corr[n*i], beg, n);
+    volk_32fc_x2_dot_prod_32fc(&_pu[i], &_inv_corr[n*i], beg, n);

  gr_complex uPu = 0;
-  volk_32fc_x2_conjugate_dot_prod_32fc(&uPu, &pu[0], beg, n);
-  for (unsigned i=0; i<n; ++i)
-    _gain[i] = std::conj(pu[i])/(_lambda + std::real(uPu));
+  volk_32fc_x2_conjugate_dot_prod_32fc(&uPu, &_pu[0], beg, n);
+
+  volk_32fc_conjugate_32fc(&_tmp[0], &_pu[0], n);
+  volk_32f_s32f_multiply_32f((float*)&_gain[0], (float const*)&_tmp[0], 1.0f/(_lambda + std::real(uPu)), 2*n);
+//  volk_32fc_s32fc_multiply_32fc(&_gain[0], &_tmp[0], 1/(_lambda + uPu), n);
+
  for (unsigned i=0; i<n; ++i) {
    unsigned const k = n*i;
-#if 0
-    for (unsigned j=0; j<n; ++j)
-      _inv_corr[k+j] = (_inv_corr[k+j] - pu[i]*_gain[j]) / _lambda;
-#else
-    volk_32fc_s32fc_multiply_32fc(&tmp[0], &_gain[0], -pu[i], n);
-    volk_32fc_x2_add_32fc(&_inv_corr[k], &_inv_corr[k], &tmp[0], n);
+    volk_32fc_s32fc_multiply_32fc(&_tmp[0], &_gain[0], -_pu[i], n);
+    volk_32fc_x2_add_32fc(&_inv_corr[k], &_inv_corr[k], &_tmp[0], n);
    volk_32f_s32f_multiply_32f((float*)&_inv_corr[k], (float const*)&_inv_corr[k], 1.0f/_lambda, 2*n);
-#endif
    _inv_corr[k+i] += _delta;
  }
  return &_gain.front();
 }

+void rls::set_parameters(std::map<std::string, float> const& p) {
+  _delta  = p.at("delta");
+  _lambda = p.at("lambda");
+}
+
 } // namespace digitalhf
 } // namespace gr
--- a/lib/rls.hpp
+++ b/lib/rls.hpp
@ -21,15 +21,19 @@ public:

  virtual void reset();
  virtual gr_complex const* update(gr_complex const*, gr_complex const*);
+  virtual void set_parameters(std::map<std::string, float>const &);

 protected:
  void resize(size_t);

 private:
-  float _delta;
-  float _lambda;
-  std::vector<gr_complex, volk_allocator<gr_complex> > _gain;
-  std::vector<gr_complex, volk_allocator<gr_complex> > _inv_corr;
+  typedef std::vector<gr_complex, volk_allocator<gr_complex> > vec_type;
+  float    _delta;
+  float    _lambda;
+  vec_type _gain;
+  vec_type _inv_corr;
+  vec_type _pu;
+  vec_type _tmp;
 } ;

 } // namespace digitalhf