changeset 369:85a5a231b0b5

Update Logwatch Systemd to handle lots of unmatched entries
author IBBoard <dev@ibboard.co.uk>
date Tue, 16 Feb 2021 13:13:00 +0000
parents d3b10f2a5f60
children cd0e77678dca
files common/logwatch/systemd
diffstat 1 files changed, 179 insertions(+), 25 deletions(-) [+]
line wrap: on
line diff
--- a/common/logwatch/systemd	Tue Feb 02 20:52:38 2021 +0000
+++ b/common/logwatch/systemd	Tue Feb 16 13:13:00 2021 +0000
@@ -1,3 +1,11 @@
+########################################################
+# Please file all bug reports, patches, and feature
+# requests under:
+#      https://sourceforge.net/p/logwatch/_list/tickets
+# Help requests and discusion can be filed under:
+#      https://sourceforge.net/p/logwatch/discussion/
+########################################################
+
 ########################################################
 ## Copyright (c) 2016 Orion Poplawski
 ## Covered under the included MIT/X-Consortium License:
@@ -16,11 +24,18 @@
 
 use strict;
 my $Detail = $ENV{'LOGWATCH_DETAIL_LEVEL'} || 0;
+my $Ignore_failed = $ENV{'ignore_failed'} || "";
+my $Ignore_leftover = $ENV{'ignore_leftover'} || "";
+my %ConfigError;
 my %Activated;
 my %Failed;
+my %Deactivated;
+my %LeftOver;
 my $Reexecuted = 0;
 my %Reloaded;
+my %Skipped;
 my %Slice;
+my %Slow;
 my %Started;
 my %Target;
 my $TimeChanged = 0;
@@ -28,7 +43,7 @@
 my %UserSession;
 my %OtherList;
 
-# Failue will generate multiple messages like:
+# Failure will generate multiple messages like:
 # Feb  5 16:37:50 hostname systemd: ansible-pull.service: main process exited, code=exited, status=2/INVALIDARGUMENT
 # Feb  5 16:37:50 hostname systemd: Failed to start Run ansible-pull on boot.
 # Feb  5 16:37:50 hostname systemd: Unit ansible-pull.service entered failed state.
@@ -36,58 +51,129 @@
 
 while (defined(my $ThisLine = <STDIN>)) {
    chomp($ThisLine);
-   if ($ThisLine =~ /^(Activating|Deactivating|Mounting|Unmounting|Starting|Stopping) / or
+   if ($ThisLine =~ /^(Activat|Deactivat|Mount|Unmount|Reload|Start|Stopp)ing / or
+       $ThisLine =~ /^Finished / or
        # These events will be caught with the Unit X entered failed state message
        $ThisLine =~ /^Failed to start / or
+       $ThisLine =~ /: Failed with result / or
+       $ThisLine =~ /Failed at step / or
        $ThisLine =~ / failed\.$/ or
-       $ThisLine =~ /: (control|main) process exited, code=(exited|killed),? status=/ or
+       $ThisLine =~ /([Cc]ontrol|[Mm]ain|[Mm]ount) process exited, code=(exited|killed|dumped),? status=/ or
        # Informational
-       $ThisLine =~ /^Closed .* socket\.$/ or
+       $ThisLine =~ /^Closed .*[\. ][Ss]ockets?\.$/ or
+       $ThisLine =~ /^Closed .* [Ss]cheduler\.$/ or
+       $ThisLine =~ /^Closed .* [Ww]atch\.$/ or
+       $ThisLine =~ /^Closed (?:Multimedia|Sound) System\.$/ or
        $ThisLine =~ /^Closed udev / or
+       $ThisLine =~ /: Consumed .* CPU time\.$/ or
+       # crond will never restart process when it is restarted
+       $ThisLine =~ /^crond\.service: Found left-over process \d+ \(.*\) in control group while starting unit\. Ignoring\.$/ or
+       $ThisLine =~ /^Received SIGINT\./ or
+       $ThisLine =~ /^Deactivated / or
        $ThisLine =~ /^Detected (architecture|virtualization) / or
        $ThisLine =~ /^Found device / or
-       $ThisLine =~ /^Got automount request for \/proc\// or
+       $ThisLine =~ /Got automount request for \/proc\// or
        $ThisLine =~ /^Inserted module / or
        $ThisLine =~ /^Listening on / or
        $ThisLine =~ /^Mounted / or
+       $ThisLine =~ /^Queued start job for default target / or
        $ThisLine =~ /^Relabelled / or
        $ThisLine =~ /^Reloading\.$/ or         # Happens on each boot at switch root
+       $ThisLine =~ /^RTC configured in / or
        $ThisLine =~ /^Running in initial RAM disk\.$/ or
+       $ThisLine =~ /^selinux: avc: *received policyload notice/ or
        $ThisLine =~ /^Set hostname to / or
+       $ThisLine =~ /^(?:Set up|Unset) automount Arbitrary Executable File Formats File System Automount Point\.$/ or
        $ThisLine =~ /^Shutting down\.$/ or
        $ThisLine =~ /^Startup finished in / or
        $ThisLine =~ /^Stopped / or
        $ThisLine =~ /^Switching root\.$/ or
+       $ThisLine =~ /: Succeeded\.$/ or
        $ThisLine =~ /^Successfully loaded SELinux policy in / or
+       $ThisLine =~ /already active, refusing\./ or
+       $ThisLine =~ /^Failed to propagate agent release message:/ or
        $ThisLine =~ /: Supervising process .* which is not our child\. We'll most likely not notice when it exits\.$/ or
-       $ThisLine =~ /^systemd (\d+) running in system mode/ or
+       $ThisLine =~ /: Got notification message from PID \d+, but reception is disabled\./ or
+       $ThisLine =~ /: Got notification message from PID \d+, but reception only permitted for main PID \d+/ or
+       $ThisLine =~ /Cannot find unit for notify message of PID \d+/ or
+       $ThisLine =~ /^systemd .* running in system mode/ or
+       # This is preceeded by a more descriptive message
+       $ThisLine =~ /^This usually indicates unclean termination of a previous run, or service implementation deficiencies\.$/ or
+       $ThisLine =~ /Transaction is destructive\./ or
        $ThisLine =~ /^Unit .* is bound to inactive unit .*\. Stopping, too\./ or
-       $ThisLine =~ /^Unit .* is not needed anymore\. Stopping\./ or
+       $ThisLine =~ /Unit (.* is )?not needed anymore\. Stopping\./ or
+       $ThisLine =~ /State '(stop-sigterm|stop-final-sigterm)' timed out\. Killing\./ or
+       $ThisLine =~ /: Start(-pre)? operation timed out\. Terminating\./ or
+       $ThisLine =~ /hold-?off time over, scheduling restart\./ or
+       $ThisLine =~ /Service has no hold-off time.*, scheduling restart\./ or
+       $ThisLine =~ /Service Restart.* expired, scheduling restart\./ or
+       $ThisLine =~ /Scheduled restart job, restart counter is at .*\./ or
+       $ThisLine =~ /Stopping timed out\. Killing\./ or
+       $ThisLine =~ /^Timed out waiting for/ or
+       $ThisLine =~ /: Watchdog timeout/ or
+       $ThisLine =~ /^Dependency failed for / or
+       $ThisLine =~ /Processes still around after .*SIGKILL\./ or
        $ThisLine =~ /^Unmounted / or
+       $ThisLine =~ /: Unit is bound to inactive unit / or
+       $ThisLine =~ /[Hh]ardware watchdog / or
+       # This is now trapped as a Slow Start error
+       #$ThisLine =~ /PID file .* not readable \(yet\?\) after start/ or
+       $ThisLine =~ /Failed to read PID from file / or
        # Units can depend on files that do not exist
-       $ThisLine =~ /^Cannot add dependency job for unit .*, ignoring: Unit .* failed to load: No such file or directory\.$/ or
+       $ThisLine =~ /Cannot add dependency job(:? for unit .*)?, ignoring: Unit (:?.* failed to load: No such file or directory|not found)\.$/ or
        # https://bugs.freedesktop.org/show_bug.cgi?id=90386
-       $ThisLine =~ /^Device .* appeared twice with different sysfs paths .* and / or
+       $ThisLine =~ /Dev(ice)? .* appeared twice with different sysfs paths .* and / or
        # Inactive units are sometimes reloaded
        $ThisLine =~ /^Unit .* cannot be reloaded because it is inactive\.$/ or
-       # https://bugzilla.redhat.com/show_bug.cgi?id=1293941
-       $ThisLine =~ /^Configuration file \/usr\/lib\/systemd\/system\/auditd\.service is marked world-inaccessible/ or
-       # https://bugzilla.redhat.com/show_bug.cgi?id=1301182
-       $ThisLine =~ /^Configuration file \/usr\/lib\/systemd\/system\/wpa_supplicant\.service is marked executable/ or
+       $ThisLine =~ /^.*: Unit cannot be reloaded because it is inactive\.$/ or
+       $ThisLine =~ / is not active\.$/ or
        # https://bugzilla.redhat.com/show_bug.cgi?id=1306452 
-       $ThisLine =~ /^tmp\.mount: Directory \/tmp to mount over is not empty, mounting anyway\.$/ or
-       $ThisLine =~ /^Received SIGRTMIN\+2[01] from PID \d+ \(plymouthd\)\.$/ or
+       $ThisLine =~ /^[^ ]*\.mount: Directory \/[^ ]* to mount over is not empty, mounting anyway\.$/ or
+       # A known issue - reported by multiple distributions
+       $ThisLine =~ /^user\@\d+\.service: Failed at step CGROUP spawning \/usr\/lib\/systemd\/systemd: No such file or directory$/ or
+       $ThisLine =~ /^Received SIGRTMIN\+2[01] from PID \d+ \((?:plymouthd|n\/a)\)\.$/ or
        # https://bugzilla.redhat.com/show_bug.cgi?id=1072368
-       $ThisLine =~ /^Received SIGRTMIN\+24 from PID \d+ \(kill\)\.$/ or
+       $ThisLine =~ /^Received SIGRTMIN\+24 from PID \d+ \((?:kill|n\/a)\)\.$/ or
+       $ThisLine =~ /: Killing process \d+ \(.*\) with signal SIG.+\.$/ or
        $ThisLine =~ /^Removed slice / or
-       $ThisLine =~ /^user-runtime-dir@[0-9]+\.service: Unit not needed anymore. Stopping.$/ or
-       $ThisLine =~ /^user@[0-9]+\.service: Killing process [0-9]+ \(systemctl\) with signal SIGKILL\.$/ or
-       $ThisLine =~ /^Closed D-Bus User Message Bus Socket.$/ or
-       $ThisLine =~ /^pam_unix\(systemd-user:session\): session (?:opened|closed) for user/       
+       $ThisLine =~ /^pam_unix\(systemd-user:session\): session (?:opened|closed) for user/ or
+       $ThisLine =~ /Adding .* random time\.$/ or
+       # https://bugzilla.redhat.com/show_bug.cgi?id=1890632
+       $ThisLine =~ /Not generating service for XDG autostart .*,/ or
+       $ThisLine =~ /gnome-systemd-autostart-condition not found/ or
+       $ThisLine =~ /kde-systemd-start-condition not found/ or
+       $ThisLine =~ /Unknown key name .* in section 'Desktop Entry'/ or
+       # These happen on every shutdown - downgraded to debug message in systemd v235
+       # https://github.com/systemd/systemd/issues/6777
+       $ThisLine =~ /^Failed to propagate agent release message: (?:Connection reset by peer|Transport endpoint is not connected)/ or
+       $ThisLine =~ /^cgroup compatibility translation between legacy and unified hierarchy settings activated\. See cgroup-compat debug messages for details\.$/ or
+       $ThisLine =~ /^.*\.socket: Socket service .* already active/
    ) {
       # Ignore these
+   } elsif (my ($service,$reason) = ($ThisLine =~ /^Configuration file ([^ ]*) is ([^.]*)\./)) {
+      $ConfigError{$reason}{$service}++;
+   } elsif (my ($service,$reason) = ($ThisLine =~ /^\[(.*)\] (Support for option .* has been removed) and it is ignored/)) {
+      $ConfigError{$reason}{$service}++;
+   } elsif (my ($service,$reason) = ($ThisLine =~ /^\[?([^\]:]+(?::\d+)?)[\]:]? (Unknown .* in section '.*')/)) {
+      $ConfigError{$reason}{$service}++;
    } elsif (my ($service) = ($ThisLine =~ /^Unit (.*) entered failed state\.$/)) {
       $Failed{$service}++;
+   } elsif (my ($service) = ($ThisLine =~ /^(.*): Failed to execute command/)) {
+      $Failed{$service}++;
+   } elsif (my ($service) = ($ThisLine =~ /^(.*): Unit entered failed state\.$/)) {
+      $Failed{$service}++;
+   } elsif (my ($service) = ($ThisLine =~ /^(.*) failed with error code \d+\.$/)) {
+      $Failed{$service}++;
+   } elsif (my ($service) = ($ThisLine =~ /^(.*): Start request repeated too quickly\.$/)) {
+      $Failed{$service}++;
+   } elsif (my ($service) = ($ThisLine =~ /^(.*): Job .* timed out\.$/)) {
+      $Failed{$service}++;
+   } elsif (my ($service) = ($ThisLine =~ /^(.*): Job .* failed with result .*$/)) {
+      $Failed{$service}++;
+   } elsif (my ($service) = ($ThisLine =~ /^Failed (unmounting .*)\.$/)) {
+      $Failed{$service}++;
+   } elsif (my ($service) = ($ThisLine =~ /^Failed to (listen on .*)\.$/)) {
+      $Failed{$service}++;
    } elsif (my ($target) = ($ThisLine =~ /^Reached target (.*)\.$/)) {
       $Target{$target}++;
       $LastTarget = $target;
@@ -99,29 +185,81 @@
       $Started{$service}++;
    } elsif (my ($service) = ($ThisLine =~ /^Reloaded (.*)\.$/)) {
       $Reloaded{$service}++;
+   } elsif (my ($service) = ($ThisLine =~ /^Deactivated (.*)\.$/)) {
+      $Deactivated{$service}++;
    } elsif ($ThisLine eq "Reexecuting.") {
-      $Reexecuted++;
+      $Reexecuted++ if $Detail;
    } elsif ($ThisLine =~ /^Time has been changed$/) {
       $TimeChanged++;
    } elsif (my ($slice) = ($ThisLine =~ /^Created slice (.*)\.$/)) {
       $Slice{$slice}++;
+   } elsif (my ($name) = ($ThisLine =~ /^Condition check resulted in (.*) being skipped\.$/)) {
+      $Skipped{$name}++ if $Detail;
+   } elsif (my ($pidfile) = ($ThisLine =~ /^PID file (.*) not readable \(yet\?\) after start\.$/)) {
+      $Slow{$pidfile}++;
+   } elsif (my ($pidfile) = ($ThisLine =~ /: Can't open PID file (.*) \(yet\?\) after start:/)) {
+      $Slow{$pidfile}++;
+   } elsif (my ($Service, $Exe) = ($ThisLine =~ /^(.*): Found left-over process \d+ \((.*)\) in control group while starting unit\. Ignoring\.$/)) {
+      $LeftOver{"$Service:$Exe"}++ unless "$Service:$Exe" =~/^$Ignore_leftover$/i;
    } else {
       $OtherList{$ThisLine}++;
    }
 }
 
+if (keys %ConfigError) {
+    print "Configuration errors:\n";
+    foreach my $reason (sort {$a cmp $b} keys %ConfigError) {
+	my $tot = 0;
+	print "   $reason";
+	foreach my $service (sort {$a cmp $b} keys %{$ConfigError{$reason}}) {
+	    $tot += $ConfigError{$reason}{$service};
+	    if ($Detail >= 10) {
+		print "\n      $service: $ConfigError{$reason}{$service} Time(s)";
+	    }
+	}
+	if ($Detail < 10) {
+	    print ": $tot Time(s)"
+	}
+	print "\n";
+    }
+    print "\n";
+}
+
+# Because we set Failed in multiple locations, cleanup once here
+foreach my $item (keys %Failed) {
+   delete $Failed{$item} if ($item =~ /^$Ignore_failed$/i);
+}
+
 if (keys %Failed) {
    print "ERROR: Failed state:\n";
    foreach my $item (sort {$a cmp $b} keys %Failed) {
-      print "        $item $Failed{$item} Time(s)\n";
+      print "        $item: $Failed{$item} Time(s)\n";
    }
    print "\n";
 }
 
-if ($Reexecuted && $Detail) {
+# Detail >= 1
+if ($Reexecuted) {
    print "Reexecuted systemd: $Reexecuted Time(s)\n\n";
 }
 
+if (keys %Skipped) {
+   print "Condition check resulted in the following being skipped:\n";
+   foreach my $item (sort {$a cmp $b} keys %Skipped) {
+      print "        $item: $Skipped{$item} Time(s)\n";
+   }
+   print "\n";
+}
+
+if (keys %LeftOver) {
+   print "Warning: Found left-over process in control group while starting unit:\n";
+   foreach my $item (sort {$a cmp $b} keys %LeftOver) {
+      my ($service, $exe) = split(":", $item);
+      print "        $service($exe): $LeftOver{$item} Time(s)\n";
+   }
+   print "\n";
+}
+
 if (keys %Target && $Detail > 3) {
    print "Reached target $LastTarget: $Target{$LastTarget} Time(s)";
    if ($Detail > 10) {
@@ -151,6 +289,14 @@
    print "\n";
 }
 
+if (keys %Slow && $Detail > 3) {
+   print "Slow to start:\n";
+   foreach my $pidfile (sort {$a cmp $b} keys %Slow) {
+      print "    $pidfile: $Slow{$pidfile} Time(s)\n";
+   }
+   print "\n";
+}
+
 if (keys %Reloaded && $Detail > 5) {
    print "Reloaded:\n";
    foreach my $item (sort {$a cmp $b} keys %Reloaded) {
@@ -159,8 +305,16 @@
    print "\n";
 }
 
+if (keys %Deactivated && $Detail > 5) {
+   print "Deactivated:\n";
+   foreach my $item (sort {$a cmp $b} keys %Deactivated) {
+      print "    $item: $Deactivated{$item} Time(s)\n";
+   }
+   print "\n";
+}
+
 if ($TimeChanged && $Detail > 5) {
-   print "Time Changed $TimeChanged Time(s)\n\n";
+   print "Time Changed: $TimeChanged Time(s)\n\n";
 }
 
 if (keys %UserSession && $Detail > 3) {
@@ -182,7 +336,7 @@
 if (keys %Slice && $Detail > 5) {
    print "Slices created:\n";
    foreach my $slice (sort {$a cmp $b} keys %Slice) {
-      print "    $slice $Slice{$slice} Time(s)\n";
+      print "    $slice: $Slice{$slice} Time(s)\n";
    }
     print "\n";
 }