replaced loops with calls to VOLK kernels

This commit is contained in:
Christoph Mayer 2019-09-09 17:41:20 +02:00
parent 09ef3402aa
commit 65b73f8258
8 changed files with 56 additions and 50 deletions

View File

@ -70,6 +70,7 @@ adaptive_dfe_impl::adaptive_dfe_impl(int sps, // samples per symbol
, _mu(mu) , _mu(mu)
, _alpha(alpha) , _alpha(alpha)
, _use_symbol_taps(true) , _use_symbol_taps(true)
, _tmp()
, _taps_samples() , _taps_samples()
, _taps_symbols() , _taps_symbols()
, _hist_symbols() , _hist_symbols()
@ -185,7 +186,7 @@ adaptive_dfe_impl::general_work(int noutput_items,
} }
// rotate samples // rotate samples
if (i == i0) { if (i == i0) {
#if 0 #ifdef USE_VOLK_ROTATOR
_rotator.rotateN(&_rotated_samples[0] + i - _nB, _rotator.rotateN(&_rotated_samples[0] + i - _nB,
in + i - _nB, in + i - _nB,
_nB+_nF+1); _nB+_nF+1);
@ -194,7 +195,7 @@ adaptive_dfe_impl::general_work(int noutput_items,
_rotated_samples[j + i-_nB] = _rotator.rotate(in[j + i-_nB]); _rotated_samples[j + i-_nB] = _rotator.rotate(in[j + i-_nB]);
#endif #endif
} else { } else {
#if 0 #ifdef USE_VOLK_ROTATOR
_rotator.rotateN(&_rotated_samples[0] + i + _nF+1 - _sps, _rotator.rotateN(&_rotated_samples[0] + i + _nF+1 - _sps,
in + i + _nF+1 - _sps, in + i + _nF+1 - _sps,
_sps); _sps);
@ -219,6 +220,7 @@ adaptive_dfe_impl::general_work(int noutput_items,
bool adaptive_dfe_impl::start() bool adaptive_dfe_impl::start()
{ {
gr::thread::scoped_lock lock(d_setlock); gr::thread::scoped_lock lock(d_setlock);
_tmp.resize(_nB+_nF+1);
_taps_samples.resize(_nB+_nF+1); _taps_samples.resize(_nB+_nF+1);
_last_taps_samples.resize(_nB+_nF+1); _last_taps_samples.resize(_nB+_nF+1);
_taps_symbols.resize(_nW); _taps_symbols.resize(_nW);
@ -227,8 +229,8 @@ bool adaptive_dfe_impl::start()
GR_LOG_DEBUG(d_logger,str(boost::format("adaptive_dfe_impl::start() nB=%d nF=%d mu=%f alpha=%f") GR_LOG_DEBUG(d_logger,str(boost::format("adaptive_dfe_impl::start() nB=%d nF=%d mu=%f alpha=%f")
% _nB % _nF % _mu % _alpha)); % _nB % _nF % _mu % _alpha));
//_filter_update = lms::make(_mu); _filter_update = lms::make(_mu);
_filter_update = rls::make(0.001, 0.9999); //_filter_update = rls::make(0.001, 0.9999);
return true; return true;
} }
bool adaptive_dfe_impl::stop() bool adaptive_dfe_impl::stop()
@ -290,11 +292,9 @@ gr_complex adaptive_dfe_impl::filter(gr_complex const* start, gr_complex const*
std::cout << "err= " << std::abs(err) << std::endl; std::cout << "err= " << std::abs(err) << std::endl;
// taps_samples // taps_samples
gr_complex const* gain = _filter_update->update(start, end); gr_complex const* gain = _filter_update->update(start, end);
for (int j=0; j<_nB+_nF+1; ++j) { std::copy(_taps_samples.begin(), _taps_samples.end(), _last_taps_samples.begin());
_last_taps_samples[j] = _taps_samples[j]; volk_32fc_s32fc_multiply_32fc(&_tmp[0], gain, err, _nB+_nF+1);
_taps_samples[j] += _mu*std::conj(start[j]) * err; volk_32fc_x2_add_32fc (&_taps_samples[0], &_taps_samples[0], &_tmp[0], _nB+_nF+1);
// _taps_samples[j] += gain[j] * err;
}
// taps_symbols // taps_symbols
if (_use_symbol_taps) { if (_use_symbol_taps) {
for (int j=0; j<_nW; ++j) { for (int j=0; j<_nW; ++j) {
@ -310,9 +310,7 @@ gr_complex adaptive_dfe_impl::filter(gr_complex const* start, gr_complex const*
if (update_taps) { if (update_taps) {
if (_symbol_counter != 0) { // a filter tap shift might have ocurred when _symbol_counter==0 if (_symbol_counter != 0) { // a filter tap shift might have ocurred when _symbol_counter==0
gr_complex acc(0); gr_complex acc(0);
for (int j=0; j<_nB+_nF+1; ++j) { volk_32fc_x2_conjugate_dot_prod_32fc(&acc, &_taps_samples[0], &_last_taps_samples[0], _nB+_nF+1);
acc += std::conj(_last_taps_samples[j]) * _taps_samples[j];
}
float const frequency_err = gr::fast_atan2f(acc)/(0+1*_num_samples_since_filter_update); // frequency error (rad/sample) float const frequency_err = gr::fast_atan2f(acc)/(0+1*_num_samples_since_filter_update); // frequency error (rad/sample)
GR_LOG_DEBUG(d_logger, str(boost::format("frequency_err= %f %d") % frequency_err % _num_samples_since_filter_update)); GR_LOG_DEBUG(d_logger, str(boost::format("frequency_err= %f %d") % frequency_err % _num_samples_since_filter_update));
_control_loop.advance_loop(frequency_err); _control_loop.advance_loop(frequency_err);
@ -334,13 +332,6 @@ gr_complex adaptive_dfe_impl::filter(gr_complex const* start, gr_complex const*
int int
adaptive_dfe_impl::recenter_filter_taps() { adaptive_dfe_impl::recenter_filter_taps() {
#if 0
ssize_t const _idx_max = std::distance(_taps_samples.begin(),
std::max_element(_taps_samples.begin()+_nB+1-3*_sps, _taps_samples.begin()+_nB+1+3*_sps,
[](gr_complex a, gr_complex b) {
return std::norm(a) < std::norm(b);
}));
#else
float sum_w=0, sum_wi=0; float sum_w=0, sum_wi=0;
for (int i=0; i<_nB+_nF+1; ++i) { for (int i=0; i<_nB+_nF+1; ++i) {
float const w = std::norm(_taps_samples[i]); float const w = std::norm(_taps_samples[i]);
@ -348,7 +339,7 @@ adaptive_dfe_impl::recenter_filter_taps() {
sum_wi += w*i; sum_wi += w*i;
} }
ssize_t const idx_max = ssize_t(0.5 + sum_wi/sum_w); ssize_t const idx_max = ssize_t(0.5 + sum_wi/sum_w);
#endif
// GR_LOG_DEBUG(d_logger, str(boost::format("idx_max=%2d abs(tap_max)=%f") % idx_max % std::abs(_taps_samples[idx_max]))); // GR_LOG_DEBUG(d_logger, str(boost::format("idx_max=%2d abs(tap_max)=%f") % idx_max % std::abs(_taps_samples[idx_max])));
if (idx_max-_nB-1 > +2*_sps) { if (idx_max-_nB-1 > +2*_sps) {
// maximum is right of the center tap // maximum is right of the center tap
@ -445,7 +436,6 @@ void adaptive_dfe_impl::update_frame_info(pmt::pmt_t data)
// GR_LOG_DEBUG(d_logger, str(boost::format("XOR %3d %3d %d") % i % j % _scramble_xor[i][j])); // GR_LOG_DEBUG(d_logger, str(boost::format("XOR %3d %3d %d") % i % j % _scramble_xor[i][j]));
} }
} }
assert(_symbols.size() == _scramble.size()); assert(_symbols.size() == _scramble.size());
_descrambled_symbols.resize(_symbols.size()); _descrambled_symbols.resize(_symbols.size());
_vec_soft_decisions.clear(); _vec_soft_decisions.clear();

View File

@ -60,7 +60,6 @@ private:
class adaptive_dfe_impl : public adaptive_dfe { class adaptive_dfe_impl : public adaptive_dfe {
private: private:
typedef std::vector<gr_complex, volk_allocator<gr_complex> > gr_complex_vec_type; typedef std::vector<gr_complex, volk_allocator<gr_complex> > gr_complex_vec_type;
// typedef std::vector<gr_complex> gr_complex_vec_type;
int _sps; int _sps;
int _nB, _nF, _nW; int _nB, _nF, _nW;
@ -71,6 +70,7 @@ private:
bool _use_symbol_taps; bool _use_symbol_taps;
gr_complex_vec_type _tmp;
gr_complex_vec_type _taps_samples; gr_complex_vec_type _taps_samples;
gr_complex_vec_type _taps_symbols; gr_complex_vec_type _taps_symbols;
gr_complex_vec_type _last_taps_samples; gr_complex_vec_type _last_taps_samples;
@ -105,7 +105,7 @@ private:
} _state; } _state;
filter_update::sptr _filter_update; filter_update::sptr _filter_update;
// void update_constellations(boost::python::object obj);
void update_constellations(pmt::pmt_t ); void update_constellations(pmt::pmt_t );
void update_frame_info(pmt::pmt_t ); void update_frame_info(pmt::pmt_t );
@ -135,7 +135,7 @@ public:
gr_vector_const_void_star &input_items, gr_vector_const_void_star &input_items,
gr_vector_void_star &output_items); gr_vector_void_star &output_items);
virtual void set_mu(float mu) { _mu = mu; } virtual void set_mu(float mu) { _mu = mu; _filter_update->set_parameters({{"mu", mu}}); }
virtual void set_alpha(float alpha) { _alpha = alpha; } virtual void set_alpha(float alpha) { _alpha = alpha; }
} ; } ;

View File

@ -150,7 +150,7 @@ doppler_correction_cc_impl::work(int noutput_items,
} // CONSUME_AND_SKIP } // CONSUME_AND_SKIP
} }
// apply current doppler correction to all produced samples // apply current doppler correction to all produced samples
#if 0 // rotateN is broken in some older VOLK versions #ifdef USE_VOLK_ROTATOR
_rotator.rotateN(out, in, nout); _rotator.rotateN(out, in, nout);
#else #else
for (int i=0; i<nout; ++i) for (int i=0; i<nout; ++i)

View File

@ -3,6 +3,7 @@
#ifndef _LIB_FILTER_UPDATE_HPP_ #ifndef _LIB_FILTER_UPDATE_HPP_
#define _LIB_FILTER_UPDATE_HPP_ #define _LIB_FILTER_UPDATE_HPP_
#include <map>
#include <memory> #include <memory>
#include <string> #include <string>
@ -18,7 +19,7 @@ public:
virtual void reset() = 0; virtual void reset() = 0;
virtual gr_complex const* update(gr_complex const*, gr_complex const*) = 0; virtual gr_complex const* update(gr_complex const*, gr_complex const*) = 0;
virtual void set_parameters(std::map<std::string, float>const &) = 0;
protected: protected:
private: private:
} ; } ;

View File

@ -13,7 +13,8 @@ filter_update::sptr lms::make(float mu) {
lms::lms(float mu) lms::lms(float mu)
: _mu(mu) : _mu(mu)
, _gain() { , _gain()
, _tmp() {
} }
lms::~lms() { lms::~lms() {
@ -23,6 +24,7 @@ void lms::resize(size_t n) {
if (_gain.size() == n) if (_gain.size() == n)
return; return;
_gain.resize(n); _gain.resize(n);
_tmp.resize(n);
std::fill_n(_gain.begin(), n, 0); std::fill_n(_gain.begin(), n, 0);
} }
@ -33,13 +35,17 @@ void lms::reset() {
gr_complex const* lms::update(gr_complex const* beg, gr_complex const* lms::update(gr_complex const* beg,
gr_complex const* end) { gr_complex const* end) {
assert(end-beg > 0); assert(end-beg > 0);
size_t n = end - beg; size_t const n = end - beg;
resize(n); resize(n);
for (size_t i=0; i<n; ++i) volk_32fc_conjugate_32fc(&_tmp[0], beg, n);
_gain[i] = _mu * std::conj(beg[i]); volk_32f_s32f_multiply_32f((float*)&_gain[0], (float const*)&_tmp[0], _mu, 2*n);
return &_gain.front(); return &_gain.front();
} }
void lms::set_parameters(std::map<std::string, float>const & p) {
_mu = p.at("mu");
}
} // namespace digitalhf } // namespace digitalhf
} // namespace gr } // namespace gr

View File

@ -21,13 +21,16 @@ public:
virtual void reset(); virtual void reset();
virtual gr_complex const* update(gr_complex const*, gr_complex const*); virtual gr_complex const* update(gr_complex const*, gr_complex const*);
virtual void set_parameters(std::map<std::string, float>const &);
protected: protected:
void resize(size_t); void resize(size_t);
private: private:
float _mu; typedef std::vector<gr_complex, volk_allocator<gr_complex> > vec_type;
std::vector<gr_complex, volk_allocator<gr_complex> > _gain; float _mu;
vec_type _gain;
vec_type _tmp;
} ; } ;
} // namespace digitalhf } // namespace digitalhf

View File

@ -26,6 +26,8 @@ void rls::resize(size_t n) {
return; return;
_gain.resize(n); _gain.resize(n);
_inv_corr.resize(n*n); _inv_corr.resize(n*n);
_pu.resize(n);
_tmp.resize(n);
reset(); reset();
} }
@ -43,30 +45,30 @@ gr_complex const* rls::update(gr_complex const* beg,
unsigned const n = end - beg; unsigned const n = end - beg;
resize(n); resize(n);
std::vector<gr_complex, volk_allocator<gr_complex> > pu(n), tmp(n);
//pu.resize(n);
for (unsigned i=0; i<n; ++i) for (unsigned i=0; i<n; ++i)
volk_32fc_x2_dot_prod_32fc(&pu[i], &_inv_corr[n*i], beg, n); volk_32fc_x2_dot_prod_32fc(&_pu[i], &_inv_corr[n*i], beg, n);
gr_complex uPu = 0; gr_complex uPu = 0;
volk_32fc_x2_conjugate_dot_prod_32fc(&uPu, &pu[0], beg, n); volk_32fc_x2_conjugate_dot_prod_32fc(&uPu, &_pu[0], beg, n);
for (unsigned i=0; i<n; ++i)
_gain[i] = std::conj(pu[i])/(_lambda + std::real(uPu)); volk_32fc_conjugate_32fc(&_tmp[0], &_pu[0], n);
volk_32f_s32f_multiply_32f((float*)&_gain[0], (float const*)&_tmp[0], 1.0f/(_lambda + std::real(uPu)), 2*n);
// volk_32fc_s32fc_multiply_32fc(&_gain[0], &_tmp[0], 1/(_lambda + uPu), n);
for (unsigned i=0; i<n; ++i) { for (unsigned i=0; i<n; ++i) {
unsigned const k = n*i; unsigned const k = n*i;
#if 0 volk_32fc_s32fc_multiply_32fc(&_tmp[0], &_gain[0], -_pu[i], n);
for (unsigned j=0; j<n; ++j) volk_32fc_x2_add_32fc(&_inv_corr[k], &_inv_corr[k], &_tmp[0], n);
_inv_corr[k+j] = (_inv_corr[k+j] - pu[i]*_gain[j]) / _lambda;
#else
volk_32fc_s32fc_multiply_32fc(&tmp[0], &_gain[0], -pu[i], n);
volk_32fc_x2_add_32fc(&_inv_corr[k], &_inv_corr[k], &tmp[0], n);
volk_32f_s32f_multiply_32f((float*)&_inv_corr[k], (float const*)&_inv_corr[k], 1.0f/_lambda, 2*n); volk_32f_s32f_multiply_32f((float*)&_inv_corr[k], (float const*)&_inv_corr[k], 1.0f/_lambda, 2*n);
#endif
_inv_corr[k+i] += _delta; _inv_corr[k+i] += _delta;
} }
return &_gain.front(); return &_gain.front();
} }
void rls::set_parameters(std::map<std::string, float> const& p) {
_delta = p.at("delta");
_lambda = p.at("lambda");
}
} // namespace digitalhf } // namespace digitalhf
} // namespace gr } // namespace gr

View File

@ -21,15 +21,19 @@ public:
virtual void reset(); virtual void reset();
virtual gr_complex const* update(gr_complex const*, gr_complex const*); virtual gr_complex const* update(gr_complex const*, gr_complex const*);
virtual void set_parameters(std::map<std::string, float>const &);
protected: protected:
void resize(size_t); void resize(size_t);
private: private:
float _delta; typedef std::vector<gr_complex, volk_allocator<gr_complex> > vec_type;
float _lambda; float _delta;
std::vector<gr_complex, volk_allocator<gr_complex> > _gain; float _lambda;
std::vector<gr_complex, volk_allocator<gr_complex> > _inv_corr; vec_type _gain;
vec_type _inv_corr;
vec_type _pu;
vec_type _tmp;
} ; } ;
} // namespace digitalhf } // namespace digitalhf