replaced loops with calls to VOLK kernels

This commit is contained in:
Christoph Mayer 2019-09-09 17:41:20 +02:00
parent 09ef3402aa
commit 65b73f8258
8 changed files with 56 additions and 50 deletions

View File

@ -70,6 +70,7 @@ adaptive_dfe_impl::adaptive_dfe_impl(int sps, // samples per symbol
, _mu(mu)
, _alpha(alpha)
, _use_symbol_taps(true)
, _tmp()
, _taps_samples()
, _taps_symbols()
, _hist_symbols()
@ -185,7 +186,7 @@ adaptive_dfe_impl::general_work(int noutput_items,
}
// rotate samples
if (i == i0) {
#if 0
#ifdef USE_VOLK_ROTATOR
_rotator.rotateN(&_rotated_samples[0] + i - _nB,
in + i - _nB,
_nB+_nF+1);
@ -194,7 +195,7 @@ adaptive_dfe_impl::general_work(int noutput_items,
_rotated_samples[j + i-_nB] = _rotator.rotate(in[j + i-_nB]);
#endif
} else {
#if 0
#ifdef USE_VOLK_ROTATOR
_rotator.rotateN(&_rotated_samples[0] + i + _nF+1 - _sps,
in + i + _nF+1 - _sps,
_sps);
@ -219,6 +220,7 @@ adaptive_dfe_impl::general_work(int noutput_items,
bool adaptive_dfe_impl::start()
{
gr::thread::scoped_lock lock(d_setlock);
_tmp.resize(_nB+_nF+1);
_taps_samples.resize(_nB+_nF+1);
_last_taps_samples.resize(_nB+_nF+1);
_taps_symbols.resize(_nW);
@ -227,8 +229,8 @@ bool adaptive_dfe_impl::start()
GR_LOG_DEBUG(d_logger,str(boost::format("adaptive_dfe_impl::start() nB=%d nF=%d mu=%f alpha=%f")
% _nB % _nF % _mu % _alpha));
//_filter_update = lms::make(_mu);
_filter_update = rls::make(0.001, 0.9999);
_filter_update = lms::make(_mu);
//_filter_update = rls::make(0.001, 0.9999);
return true;
}
bool adaptive_dfe_impl::stop()
@ -290,11 +292,9 @@ gr_complex adaptive_dfe_impl::filter(gr_complex const* start, gr_complex const*
std::cout << "err= " << std::abs(err) << std::endl;
// taps_samples
gr_complex const* gain = _filter_update->update(start, end);
for (int j=0; j<_nB+_nF+1; ++j) {
_last_taps_samples[j] = _taps_samples[j];
_taps_samples[j] += _mu*std::conj(start[j]) * err;
// _taps_samples[j] += gain[j] * err;
}
std::copy(_taps_samples.begin(), _taps_samples.end(), _last_taps_samples.begin());
volk_32fc_s32fc_multiply_32fc(&_tmp[0], gain, err, _nB+_nF+1);
volk_32fc_x2_add_32fc (&_taps_samples[0], &_taps_samples[0], &_tmp[0], _nB+_nF+1);
// taps_symbols
if (_use_symbol_taps) {
for (int j=0; j<_nW; ++j) {
@ -310,9 +310,7 @@ gr_complex adaptive_dfe_impl::filter(gr_complex const* start, gr_complex const*
if (update_taps) {
if (_symbol_counter != 0) { // a filter tap shift might have ocurred when _symbol_counter==0
gr_complex acc(0);
for (int j=0; j<_nB+_nF+1; ++j) {
acc += std::conj(_last_taps_samples[j]) * _taps_samples[j];
}
volk_32fc_x2_conjugate_dot_prod_32fc(&acc, &_taps_samples[0], &_last_taps_samples[0], _nB+_nF+1);
float const frequency_err = gr::fast_atan2f(acc)/(0+1*_num_samples_since_filter_update); // frequency error (rad/sample)
GR_LOG_DEBUG(d_logger, str(boost::format("frequency_err= %f %d") % frequency_err % _num_samples_since_filter_update));
_control_loop.advance_loop(frequency_err);
@ -334,13 +332,6 @@ gr_complex adaptive_dfe_impl::filter(gr_complex const* start, gr_complex const*
int
adaptive_dfe_impl::recenter_filter_taps() {
#if 0
ssize_t const _idx_max = std::distance(_taps_samples.begin(),
std::max_element(_taps_samples.begin()+_nB+1-3*_sps, _taps_samples.begin()+_nB+1+3*_sps,
[](gr_complex a, gr_complex b) {
return std::norm(a) < std::norm(b);
}));
#else
float sum_w=0, sum_wi=0;
for (int i=0; i<_nB+_nF+1; ++i) {
float const w = std::norm(_taps_samples[i]);
@ -348,7 +339,7 @@ adaptive_dfe_impl::recenter_filter_taps() {
sum_wi += w*i;
}
ssize_t const idx_max = ssize_t(0.5 + sum_wi/sum_w);
#endif
// GR_LOG_DEBUG(d_logger, str(boost::format("idx_max=%2d abs(tap_max)=%f") % idx_max % std::abs(_taps_samples[idx_max])));
if (idx_max-_nB-1 > +2*_sps) {
// maximum is right of the center tap
@ -445,7 +436,6 @@ void adaptive_dfe_impl::update_frame_info(pmt::pmt_t data)
// GR_LOG_DEBUG(d_logger, str(boost::format("XOR %3d %3d %d") % i % j % _scramble_xor[i][j]));
}
}
assert(_symbols.size() == _scramble.size());
_descrambled_symbols.resize(_symbols.size());
_vec_soft_decisions.clear();

View File

@ -60,7 +60,6 @@ private:
class adaptive_dfe_impl : public adaptive_dfe {
private:
typedef std::vector<gr_complex, volk_allocator<gr_complex> > gr_complex_vec_type;
// typedef std::vector<gr_complex> gr_complex_vec_type;
int _sps;
int _nB, _nF, _nW;
@ -71,6 +70,7 @@ private:
bool _use_symbol_taps;
gr_complex_vec_type _tmp;
gr_complex_vec_type _taps_samples;
gr_complex_vec_type _taps_symbols;
gr_complex_vec_type _last_taps_samples;
@ -105,7 +105,7 @@ private:
} _state;
filter_update::sptr _filter_update;
// void update_constellations(boost::python::object obj);
void update_constellations(pmt::pmt_t );
void update_frame_info(pmt::pmt_t );
@ -135,7 +135,7 @@ public:
gr_vector_const_void_star &input_items,
gr_vector_void_star &output_items);
virtual void set_mu(float mu) { _mu = mu; }
virtual void set_mu(float mu) { _mu = mu; _filter_update->set_parameters({{"mu", mu}}); }
virtual void set_alpha(float alpha) { _alpha = alpha; }
} ;

View File

@ -150,7 +150,7 @@ doppler_correction_cc_impl::work(int noutput_items,
} // CONSUME_AND_SKIP
}
// apply current doppler correction to all produced samples
#if 0 // rotateN is broken in some older VOLK versions
#ifdef USE_VOLK_ROTATOR
_rotator.rotateN(out, in, nout);
#else
for (int i=0; i<nout; ++i)

View File

@ -3,6 +3,7 @@
#ifndef _LIB_FILTER_UPDATE_HPP_
#define _LIB_FILTER_UPDATE_HPP_
#include <map>
#include <memory>
#include <string>
@ -18,7 +19,7 @@ public:
virtual void reset() = 0;
virtual gr_complex const* update(gr_complex const*, gr_complex const*) = 0;
virtual void set_parameters(std::map<std::string, float>const &) = 0;
protected:
private:
} ;

View File

@ -13,7 +13,8 @@ filter_update::sptr lms::make(float mu) {
lms::lms(float mu)
: _mu(mu)
, _gain() {
, _gain()
, _tmp() {
}
lms::~lms() {
@ -23,6 +24,7 @@ void lms::resize(size_t n) {
if (_gain.size() == n)
return;
_gain.resize(n);
_tmp.resize(n);
std::fill_n(_gain.begin(), n, 0);
}
@ -33,13 +35,17 @@ void lms::reset() {
gr_complex const* lms::update(gr_complex const* beg,
gr_complex const* end) {
assert(end-beg > 0);
size_t n = end - beg;
size_t const n = end - beg;
resize(n);
for (size_t i=0; i<n; ++i)
_gain[i] = _mu * std::conj(beg[i]);
volk_32fc_conjugate_32fc(&_tmp[0], beg, n);
volk_32f_s32f_multiply_32f((float*)&_gain[0], (float const*)&_tmp[0], _mu, 2*n);
return &_gain.front();
}
void lms::set_parameters(std::map<std::string, float>const & p) {
_mu = p.at("mu");
}
} // namespace digitalhf
} // namespace gr

View File

@ -21,13 +21,16 @@ public:
virtual void reset();
virtual gr_complex const* update(gr_complex const*, gr_complex const*);
virtual void set_parameters(std::map<std::string, float>const &);
protected:
void resize(size_t);
private:
float _mu;
std::vector<gr_complex, volk_allocator<gr_complex> > _gain;
typedef std::vector<gr_complex, volk_allocator<gr_complex> > vec_type;
float _mu;
vec_type _gain;
vec_type _tmp;
} ;
} // namespace digitalhf

View File

@ -26,6 +26,8 @@ void rls::resize(size_t n) {
return;
_gain.resize(n);
_inv_corr.resize(n*n);
_pu.resize(n);
_tmp.resize(n);
reset();
}
@ -43,30 +45,30 @@ gr_complex const* rls::update(gr_complex const* beg,
unsigned const n = end - beg;
resize(n);
std::vector<gr_complex, volk_allocator<gr_complex> > pu(n), tmp(n);
//pu.resize(n);
for (unsigned i=0; i<n; ++i)
volk_32fc_x2_dot_prod_32fc(&pu[i], &_inv_corr[n*i], beg, n);
volk_32fc_x2_dot_prod_32fc(&_pu[i], &_inv_corr[n*i], beg, n);
gr_complex uPu = 0;
volk_32fc_x2_conjugate_dot_prod_32fc(&uPu, &pu[0], beg, n);
for (unsigned i=0; i<n; ++i)
_gain[i] = std::conj(pu[i])/(_lambda + std::real(uPu));
volk_32fc_x2_conjugate_dot_prod_32fc(&uPu, &_pu[0], beg, n);
volk_32fc_conjugate_32fc(&_tmp[0], &_pu[0], n);
volk_32f_s32f_multiply_32f((float*)&_gain[0], (float const*)&_tmp[0], 1.0f/(_lambda + std::real(uPu)), 2*n);
// volk_32fc_s32fc_multiply_32fc(&_gain[0], &_tmp[0], 1/(_lambda + uPu), n);
for (unsigned i=0; i<n; ++i) {
unsigned const k = n*i;
#if 0
for (unsigned j=0; j<n; ++j)
_inv_corr[k+j] = (_inv_corr[k+j] - pu[i]*_gain[j]) / _lambda;
#else
volk_32fc_s32fc_multiply_32fc(&tmp[0], &_gain[0], -pu[i], n);
volk_32fc_x2_add_32fc(&_inv_corr[k], &_inv_corr[k], &tmp[0], n);
volk_32fc_s32fc_multiply_32fc(&_tmp[0], &_gain[0], -_pu[i], n);
volk_32fc_x2_add_32fc(&_inv_corr[k], &_inv_corr[k], &_tmp[0], n);
volk_32f_s32f_multiply_32f((float*)&_inv_corr[k], (float const*)&_inv_corr[k], 1.0f/_lambda, 2*n);
#endif
_inv_corr[k+i] += _delta;
}
return &_gain.front();
}
void rls::set_parameters(std::map<std::string, float> const& p) {
_delta = p.at("delta");
_lambda = p.at("lambda");
}
} // namespace digitalhf
} // namespace gr

View File

@ -21,15 +21,19 @@ public:
virtual void reset();
virtual gr_complex const* update(gr_complex const*, gr_complex const*);
virtual void set_parameters(std::map<std::string, float>const &);
protected:
void resize(size_t);
private:
float _delta;
float _lambda;
std::vector<gr_complex, volk_allocator<gr_complex> > _gain;
std::vector<gr_complex, volk_allocator<gr_complex> > _inv_corr;
typedef std::vector<gr_complex, volk_allocator<gr_complex> > vec_type;
float _delta;
float _lambda;
vec_type _gain;
vec_type _inv_corr;
vec_type _pu;
vec_type _tmp;
} ;
} // namespace digitalhf