Skip to content

Commit

Permalink
Improved Latency graph (librenms#15940)
Browse files Browse the repository at this point in the history
* Improved Latency graph
Store loss+jitter info in rrd instead of database
New graph icmp_perf (legacy ping_perf still valid referencing part of the newer data)
Delete device_perf table

* Change loss to an area so it is more visible

* Style fixes

* Cleanups from phpstan & tests

* exit_code fix

* Remove alert usage of device_perf

* Don't use magic __get

* Add test for bulkPing
Add host to previous tests

* style fixes

* Fix issue fping error responses
  • Loading branch information
murrant authored Apr 18, 2024
1 parent 4cce4f0 commit 49f8269
Show file tree
Hide file tree
Showing 38 changed files with 466 additions and 495 deletions.
18 changes: 11 additions & 7 deletions LibreNMS/Alert/RunAlerts.php
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,15 @@
namespace LibreNMS\Alert;

use App\Facades\DeviceCache;
use App\Facades\Rrd;
use App\Models\AlertTransport;
use App\Models\Eventlog;
use LibreNMS\Config;
use LibreNMS\Enum\AlertState;
use LibreNMS\Enum\Severity;
use LibreNMS\Exceptions\AlertTransportDeliveryException;
use LibreNMS\Polling\ConnectivityHelper;
use LibreNMS\Util\Number;
use LibreNMS\Util\Time;

class RunAlerts
Expand Down Expand Up @@ -116,13 +118,15 @@ public function describeAlert($alert)
$obj['status'] = $device->status;
$obj['status_reason'] = $device->status_reason;
if ((new ConnectivityHelper($device))->canPing()) {
$ping_stats = $device->perf()->latest('timestamp')->first();
$obj['ping_timestamp'] = $ping_stats->timestamp;
$obj['ping_loss'] = $ping_stats->loss;
$obj['ping_min'] = $ping_stats->min;
$obj['ping_max'] = $ping_stats->max;
$obj['ping_avg'] = $ping_stats->avg;
$obj['debug'] = $ping_stats->debug;
$last_ping = Rrd::lastUpdate(Rrd::name($device->hostname, 'icmp-perf'));
if ($last_ping) {
$obj['ping_timestamp'] = $last_ping->timestamp;
$obj['ping_loss'] = Number::calculatePercent($last_ping->get('xmt') - $last_ping->get('rcv'), $last_ping->get('xmt'));
$obj['ping_min'] = $last_ping->get('min');
$obj['ping_max'] = $last_ping->get('max');
$obj['ping_avg'] = $last_ping->get('avg');
$obj['debug'] = 'unsupported';
}
}
$extra = $alert['details'];

Expand Down
89 changes: 71 additions & 18 deletions LibreNMS/Data/Source/Fping.php
Original file line number Diff line number Diff line change
Expand Up @@ -26,58 +26,111 @@
namespace LibreNMS\Data\Source;

use LibreNMS\Config;
use LibreNMS\Exceptions\FpingUnparsableLine;
use Log;
use Symfony\Component\Process\Process;

class Fping
{
private string $fping_bin;
private string|false $fping6_bin;
private int $count;
private int $timeout;
private int $interval;
private int $tos;
private int $retries;

public function __construct()
{
// prep fping parameters
$this->fping_bin = Config::get('fping', 'fping');
$fping6 = Config::get('fping6', 'fping6');
$this->fping6_bin = is_executable($fping6) ? $fping6 : false;
$this->count = max(Config::get('fping_options.count', 3), 1);
$this->interval = max(Config::get('fping_options.interval', 500), 20);
$this->timeout = max(Config::get('fping_options.timeout', 500), $this->interval);
$this->retries = Config::get('fping_options.retries', 2);
$this->tos = Config::get('fping_options.tos', 0);
}

/**
* Run fping against a hostname/ip in count mode and collect stats.
*
* @param string $host
* @param int $count (min 1)
* @param int $interval (min 20)
* @param int $timeout (not more than $interval)
* @param string $host hostname or ip
* @param string $address_family ipv4 or ipv6
* @return \LibreNMS\Data\Source\FpingResponse
*/
public function ping($host, $count = 3, $interval = 1000, $timeout = 500, $address_family = 'ipv4'): FpingResponse
public function ping($host, $address_family = 'ipv4'): FpingResponse
{
$interval = max($interval, 20);

$fping = Config::get('fping');
$fping6 = Config::get('fping6');
$fping_tos = Config::get('fping_options.tos', 0);

if ($address_family == 'ipv6') {
$cmd = is_executable($fping6) ? [$fping6] : [$fping, '-6'];
$cmd = $this->fping6_bin === false ? [$this->fping_bin, '-6'] : [$this->fping6_bin];
} else {
$cmd = is_executable($fping6) ? [$fping] : [$fping, '-4'];
$cmd = $this->fping6_bin === false ? [$this->fping_bin, '-4'] : [$this->fping_bin];
}

// build the command
$cmd = array_merge($cmd, [
'-e',
'-q',
'-c',
max($count, 1),
$this->count,
'-p',
$interval,
$this->interval,
'-t',
max($timeout, $interval),
$this->timeout,
'-O',
$fping_tos,
$this->tos,
$host,
]);

$process = app()->make(Process::class, ['command' => $cmd]);
Log::debug('[FPING] ' . $process->getCommandLine() . PHP_EOL);
$process->run();

$response = FpingResponse::parseOutput($process->getErrorOutput(), $process->getExitCode());
$response = FpingResponse::parseLine($process->getErrorOutput(), $process->getExitCode());

Log::debug("response: $response");

return $response;
}

public function bulkPing(array $hosts, callable $callback): void
{
$process = app()->make(Process::class, ['command' => [
$this->fping_bin,
'-f', '-',
'-e',
'-t', $this->timeout,
'-r', $this->retries,
'-O', $this->tos,
'-c', $this->count,
]]);

// twice polling interval
$process->setTimeout(Config::get('rrd.step', 300) * 2);
// send hostnames to stdin to avoid overflowing cli length limits
$process->setInput(implode(PHP_EOL, $hosts) . PHP_EOL);

Log::debug('[FPING] ' . $process->getCommandLine() . PHP_EOL);

$partial = '';
$process->run(function ($type, $output) use ($callback, &$partial) {
// stdout contains individual ping responses, stderr contains summaries
if ($type == Process::ERR) {
foreach (explode(PHP_EOL, $output) as $line) {
if ($line) {
Log::debug("Fping OUTPUT|$line PARTIAL|$partial");
try {
$response = FpingResponse::parseLine($partial . $line);
call_user_func($callback, $response);
$partial = '';
} catch (FpingUnparsableLine $e) {
// handle possible partial line
$partial = $e->unparsedLine;
}
}
}
}
});
}
}
148 changes: 77 additions & 71 deletions LibreNMS/Data/Source/FpingResponse.php
Original file line number Diff line number Diff line change
Expand Up @@ -25,46 +25,19 @@

namespace LibreNMS\Data\Source;

use App\Models\DevicePerf;
use App\Facades\Rrd;
use App\Models\Device;
use Carbon\Carbon;
use LibreNMS\Exceptions\FpingUnparsableLine;
use LibreNMS\RRD\RrdDefinition;

class FpingResponse
{
/**
* @var int
*/
public $transmitted;
/**
* @var int
*/
public $received;
/**
* @var int
*/
public $loss;
/**
* @var float
*/
public $min_latency;
/**
* @var float
*/
public $max_latency;
/**
* @var float
*/
public $avg_latency;
/**
* @var int
*/
public $duplicates;
/**
* @var int
*/
public $exit_code;
/**
* @var bool
*/
private $skipped;
const SUCESS = 0;
const UNREACHABLE = 1;
const INVALID_HOST = 2;
const INVALID_ARGS = 3;
const SYS_CALL_FAIL = 4;

/**
* @param int $transmitted ICMP packets transmitted
Expand All @@ -75,50 +48,72 @@ class FpingResponse
* @param float $avg_latency Average latency (ms)
* @param int $duplicates Number of duplicate responses (Indicates network issue)
* @param int $exit_code Return code from fping
* @param string|null $host Hostname/IP pinged
*/
public function __construct(int $transmitted, int $received, int $loss, float $min_latency, float $max_latency, float $avg_latency, int $duplicates, int $exit_code, bool $skipped = false)
private function __construct(
public readonly int $transmitted,
public readonly int $received,
public readonly int $loss,
public readonly float $min_latency,
public readonly float $max_latency,
public readonly float $avg_latency,
public readonly int $duplicates,
public int $exit_code,
public readonly ?string $host = null,
private bool $skipped = false)
{
}

public static function artificialUp(string $host = null): static
{
return new static(1, 1, 0, 0, 0, 0, 0, 0, $host, true);
}

public static function artificialDown(string $host = null): static
{
$this->transmitted = $transmitted;
$this->received = $received;
$this->loss = $loss;
$this->min_latency = $min_latency;
$this->max_latency = $max_latency;
$this->avg_latency = $avg_latency;
$this->duplicates = $duplicates;
$this->exit_code = $exit_code;
$this->skipped = $skipped;
return new static(1, 0, 100, 0, 0, 0, 0, 0, $host, false);
}

public static function artificialUp(): FpingResponse
/**
* Change the exit code to 0, this may be approriate when a non-fatal error was encourtered
*/
public function ignoreFailure(): void
{
return new FpingResponse(1, 1, 0, 0, 0, 0, 0, 0, true);
$this->exit_code = 0;
}

public function wasSkipped(): bool
{
return $this->skipped;
}

public static function parseOutput(string $output, int $code): FpingResponse
public static function parseLine(string $output, int $code = null): FpingResponse
{
preg_match('#= (\d+)/(\d+)/(\d+)%(, min/avg/max = ([\d.]+)/([\d.]+)/([\d.]+))?$#', $output, $parsed);
[, $xmt, $rcv, $loss, , $min, $avg, $max] = array_pad($parsed, 8, 0);
$matched = preg_match('#(\S+)\s*: (xmt/rcv/%loss = (\d+)/(\d+)/(?:(100)%|(\d+)%, min/avg/max = ([\d.]+)/([\d.]+)/([\d.]+))|Name or service not known|Temporary failure in name resolution)$#', $output, $parsed);

if ($code == 0 && ! $matched) {
throw new FpingUnparsableLine($output);
}

[, $host, $error, $xmt, $rcv, $loss100, $loss, $min, $avg, $max] = array_pad($parsed, 10, 0);
$loss = $loss100 ?: $loss;

if ($loss < 0) {
$xmt = 1;
$rcv = 0;
$loss = 100;
if ($error == 'Name or service not known') {
return new FpingResponse(0, 0, 0, 0, 0, 0, 0, self::INVALID_HOST, $host);
} elseif ($error == 'Temporary failure in name resolution') {
return new FpingResponse(0, 0, 0, 0, 0, 0, 0, self::SYS_CALL_FAIL, $host);
}

return new FpingResponse(
return new static(
(int) $xmt,
(int) $rcv,
(int) $loss,
(float) $min,
(float) $max,
(float) $avg,
substr_count($output, 'duplicate'),
$code
$code ?? ($loss100 ? self::UNREACHABLE : self::SUCESS),
$host,
);
}

Expand All @@ -131,26 +126,37 @@ public function success(): bool
return $this->exit_code == 0 && $this->loss < 100;
}

public function toModel(): ?DevicePerf
{
return new DevicePerf([
'xmt' => $this->transmitted,
'rcv' => $this->received,
'loss' => $this->loss,
'min' => $this->min_latency,
'max' => $this->max_latency,
'avg' => $this->avg_latency,
]);
}

public function __toString()
{
$str = "xmt/rcv/%loss = $this->transmitted/$this->received/$this->loss%";
$str = "$this->host : xmt/rcv/%loss = $this->transmitted/$this->received/$this->loss%";

if ($this->max_latency) {
$str .= ", min/avg/max = $this->min_latency/$this->avg_latency/$this->max_latency";
}

return $str;
}

public function saveStats(Device $device): void
{
$device->last_ping = Carbon::now();
$device->last_ping_timetaken = $this->avg_latency ?: $device->last_ping_timetaken;
$device->save();

// detailed multi-ping capable graph
app('Datastore')->put($device->toArray(), 'icmp-perf', [
'rrd_def' => RrdDefinition::make()
->addDataset('avg', 'GAUGE', 0, 65535, source_ds: 'ping', source_file: Rrd::name($device->hostname, 'ping-perf'))
->addDataset('xmt', 'GAUGE', 0, 65535)
->addDataset('rcv', 'GAUGE', 0, 65535)
->addDataset('min', 'GAUGE', 0, 65535)
->addDataset('max', 'GAUGE', 0, 65535),
], [
'avg' => $this->avg_latency,
'xmt' => $this->transmitted,
'rcv' => $this->received,
'min' => $this->min_latency,
'max' => $this->max_latency,
]);
}
}
Loading

0 comments on commit 49f8269

Please sign in to comment.