diff --git a/Debug/poor-mans-profiler.sh b/Debug/poor-mans-profiler.sh index ab06a1b66a..4b03e56310 100755 --- a/Debug/poor-mans-profiler.sh +++ b/Debug/poor-mans-profiler.sh @@ -1,12 +1,20 @@ #!/bin/bash # +# Author: Pavel Kirienko +# # Poor man's sampling profiler for NuttX. # # Usage: Install flamegraph.pl in your PATH, configure your .gdbinit, run the script with proper arguments and go # have a coffee. When you're back, you'll see the flamegraph. Note that frequent calls to GDB significantly # interfere with normal operation of the target, which means that you can't profile real-time tasks with it. +# For best results, ensure that the PC is not overloaded, the USB host controller to which the debugger is +# connected is not congested. You should also allow the current user to set negative nice values. +# +# The FlameGraph script can be downloaded from https://github.com/brendangregg/FlameGraph. Thanks Mr. Gregg. # -# Requirements: ARM GDB with Python support +# Requirements: ARM GDB with Python support. You can get one by downloading the sources from +# https://launchpad.net/gcc-arm-embedded and building them with correct flags. +# Note that Python support is not required if no per-task sampling is needed. # set -e @@ -33,7 +41,7 @@ which flamegraph.pl > /dev/null || die "Install flamegraph.pl first" nsamples=0 sleeptime=0.1 # Doctors recommend 7-8 hours a day taskname= -elf=$root/Build/px4fmu-v2_default.build/firmware.elf +elf= append=0 fgfontsize=10 fgwidth=1900 @@ -69,6 +77,8 @@ do shift done +[[ -z "$elf" ]] && die "Please specify the ELF file location, e.g.: build_px4fmu-v4_default/src/firmware/nuttx/firmware_nuttx" + # # Temporary files # @@ -237,8 +247,8 @@ for s, f in sorted(stacks.items(), key=lambda (s, f): s): print('Total stack frames:', num_stack_frames, file=sys.stderr) print('Top consumers (distribution of the stack tops):', file=sys.stderr) -for name,num in sorted(stack_tops.items(), key=lambda (name, num): num, reverse=True)[:10]: - print('% 5.1f%% ' % (100 * num / num_stack_frames), name, file=sys.stderr) +for name,num in sorted(stack_tops.items(), key=lambda (name, num): num, reverse=True)[:300]: + print('% 7.3f%% ' % (100 * num / num_stack_frames), name, file=sys.stderr) EOF cat $stacksfile | python /tmp/pmpn-folder.py > $foldfile diff --git a/ROMFS/px4fmu_common/init.d/4012_quad_x_can b/ROMFS/px4fmu_common/init.d/4012_quad_x_can index f6246fef8e..01f90b4704 100644 --- a/ROMFS/px4fmu_common/init.d/4012_quad_x_can +++ b/ROMFS/px4fmu_common/init.d/4012_quad_x_can @@ -25,4 +25,5 @@ then param set MC_YAWRATE_D 0.0 fi +set MIXER quad_x_can set OUTPUT_MODE uavcan_esc diff --git a/ROMFS/px4fmu_common/mixers/quad_x_can.main.mix b/ROMFS/px4fmu_common/mixers/quad_x_can.main.mix new file mode 100644 index 0000000000..d6a2980148 --- /dev/null +++ b/ROMFS/px4fmu_common/mixers/quad_x_can.main.mix @@ -0,0 +1 @@ +R: 4x 10000 10000 10000 0 diff --git a/src/lib/matrix b/src/lib/matrix index cf924956d7..499b897e5f 160000 --- a/src/lib/matrix +++ b/src/lib/matrix @@ -1 +1 @@ -Subproject commit cf924956d7d62ce18bfc4f8441e9177ddb69c0dc +Subproject commit 499b897e5f270c3207a0e88d2f7239c5885d1681 diff --git a/src/modules/uavcan/actuators/esc.cpp b/src/modules/uavcan/actuators/esc.cpp index d24ad9a1fa..92d12ba32c 100644 --- a/src/modules/uavcan/actuators/esc.cpp +++ b/src/modules/uavcan/actuators/esc.cpp @@ -136,6 +136,25 @@ void UavcanEscController::update_outputs(float *outputs, unsigned num_outputs) } } + /* + * Remove channels that are always zero. + * The objective of this optimization is to avoid broadcasting multi-frame transfers when a single frame + * transfer would be enough. This is a valid optimization as the UAVCAN specification implies that all + * non-specified ESC setpoints should be considered zero. + * The positive outcome is a (marginally) lower bus traffic and lower CPU load. + * + * From the standpoint of the PX4 architecture, however, this is a hack. It should be investigated why + * the mixer returns more outputs than are actually used. + */ + for (int index = int(msg.cmd.size()) - 1; index >= _max_number_of_nonzero_outputs; index--) { + if (msg.cmd[index] != 0) { + _max_number_of_nonzero_outputs = index + 1; + break; + } + } + + msg.cmd.resize(_max_number_of_nonzero_outputs); + /* * Publish the command message to the bus * Note that for a quadrotor it takes one CAN frame diff --git a/src/modules/uavcan/actuators/esc.hpp b/src/modules/uavcan/actuators/esc.hpp index 40b151e308..ced372f7c6 100644 --- a/src/modules/uavcan/actuators/esc.hpp +++ b/src/modules/uavcan/actuators/esc.hpp @@ -107,6 +107,7 @@ private: * ESC states */ uint32_t _armed_mask = 0; + uint8_t _max_number_of_nonzero_outputs = 0; /* * Perf counters diff --git a/src/modules/uavcan/uavcan_main.cpp b/src/modules/uavcan/uavcan_main.cpp index 204d8c79a3..e6bbc0d078 100644 --- a/src/modules/uavcan/uavcan_main.cpp +++ b/src/modules/uavcan/uavcan_main.cpp @@ -110,18 +110,6 @@ UavcanNode::UavcanNode(uavcan::ICanDriver &can_driver, uavcan::ISystemClock &sys } /* _server_command_sem use case is a signal */ px4_sem_setprotocol(&_server_command_sem, SEM_PRIO_NONE); - - if (_perfcnt_node_spin_elapsed == nullptr) { - errx(1, "uavcan: couldn't allocate _perfcnt_node_spin_elapsed"); - } - - if (_perfcnt_esc_mixer_output_elapsed == nullptr) { - errx(1, "uavcan: couldn't allocate _perfcnt_esc_mixer_output_elapsed"); - } - - if (_perfcnt_esc_mixer_total_elapsed == nullptr) { - errx(1, "uavcan: couldn't allocate _perfcnt_esc_mixer_total_elapsed"); - } } UavcanNode::~UavcanNode() @@ -164,9 +152,6 @@ UavcanNode::~UavcanNode() _instance = nullptr; - perf_free(_perfcnt_node_spin_elapsed); - perf_free(_perfcnt_esc_mixer_output_elapsed); - perf_free(_perfcnt_esc_mixer_total_elapsed); pthread_mutex_destroy(&_node_mutex); px4_sem_destroy(&_server_command_sem); @@ -697,7 +682,6 @@ int UavcanNode::init(uavcan::NodeID node_id) void UavcanNode::node_spin_once() { - perf_begin(_perfcnt_node_spin_elapsed); const int spin_res = _node.spinOnce(); if (spin_res < 0) { @@ -708,8 +692,6 @@ void UavcanNode::node_spin_once() if (_tx_injector != nullptr) { _tx_injector->injectTxFramesInto(_node); } - - perf_end(_perfcnt_node_spin_elapsed); } /* @@ -868,12 +850,8 @@ int UavcanNode::run() // Mutex is unlocked while the thread is blocked on IO multiplexing (void)pthread_mutex_unlock(&_node_mutex); - perf_end(_perfcnt_esc_mixer_total_elapsed); // end goes first, it's not a mistake - const int poll_ret = ::poll(_poll_fds, _poll_fds_num, PollTimeoutMs); - perf_begin(_perfcnt_esc_mixer_total_elapsed); - (void)pthread_mutex_lock(&_node_mutex); node_spin_once(); // Non-blocking @@ -965,9 +943,7 @@ int UavcanNode::run() // Output to the bus _outputs.timestamp = hrt_absolute_time(); - perf_begin(_perfcnt_esc_mixer_output_elapsed); _esc_controller.update_outputs(_outputs.output, _outputs.noutputs); - perf_end(_perfcnt_esc_mixer_output_elapsed); } diff --git a/src/modules/uavcan/uavcan_main.hpp b/src/modules/uavcan/uavcan_main.hpp index b962333a06..f84dff1630 100644 --- a/src/modules/uavcan/uavcan_main.hpp +++ b/src/modules/uavcan/uavcan_main.hpp @@ -209,10 +209,6 @@ private: // index into _poll_fds for each _control_subs handle uint8_t _poll_ids[NUM_ACTUATOR_CONTROL_GROUPS_UAVCAN]; - perf_counter_t _perfcnt_node_spin_elapsed = perf_alloc(PC_ELAPSED, "uavcan_node_spin_elapsed"); - perf_counter_t _perfcnt_esc_mixer_output_elapsed = perf_alloc(PC_ELAPSED, "uavcan_esc_mixer_output_elapsed"); - perf_counter_t _perfcnt_esc_mixer_total_elapsed = perf_alloc(PC_ELAPSED, "uavcan_esc_mixer_total_elapsed"); - void handle_time_sync(const uavcan::TimerEvent &); typedef uavcan::MethodBinder TimerCallback; diff --git a/src/systemcmds/tests/test_matrix.cpp b/src/systemcmds/tests/test_matrix.cpp index d2e574810f..029f36ceb5 100644 --- a/src/systemcmds/tests/test_matrix.cpp +++ b/src/systemcmds/tests/test_matrix.cpp @@ -317,9 +317,9 @@ bool MatrixTest::filterTests() bool MatrixTest::helperTests() { - ut_test(fabs(wrap_pi(4.0) - (4.0 - 2 * M_PI)) < 1e-5); - ut_test(fabs(wrap_pi(-4.0) - (-4.0 + 2 * M_PI)) < 1e-5); - ut_test(fabs(wrap_pi(3.0) - (3.0)) < 1e-3); + ut_test(::fabs(wrap_pi(4.0) - (4.0 - 2 * M_PI)) < 1e-5); + ut_test(::fabs(wrap_pi(-4.0) - (-4.0 + 2 * M_PI)) < 1e-5); + ut_test(::fabs(wrap_pi(3.0) - (3.0)) < 1e-3); wrap_pi(NAN); Vector3f a(1, 2, 3);