#!/usr/bin/perl
$^W=1;
use strict;

use MIME::Base64;	# for PEM to DER format conversion
use Crypt::X509;	# to parse X509 cert
# for canonicalizing IP addresses:
use Regexp::Common qw /net/; # /$RE{net}{IPv4}/
use Regexp::IPv6 qw($IPv6_re); # /^$IPv6_re$/
use NetAddr::IP::Util qw(inet_aton inet_ntoa ipv6_aton ipv6_ntoa);
use bignum;

# This program is intended to parse nmap output (stdout+stderr)
# from stuff approximately like:
# $ hosts='some space separated list of host(s)'
# $ ports='some comma separated list of port(s)'
# and either:
# $ TZ=GMT0 nmap -v -Pn -r -sT -p "$ports" --resolve-all --script=ssl-cert $hosts 2>&1
# $ TZ=GMT0 nmap -v -6 -Pn -r -sT -p "$ports" --resolve-all --script=ssl-cert $hosts 2>&1
# or:
# $ TZ=GMT0 nmap -Pn -r -sT -p "$ports" --resolve-all --script=ssl-cert $hosts 2>&1
# $ TZ=GMT0 nmap -6 -Pn -r -sT -p "$ports" --resolve-all --script=ssl-cert $hosts 2>&1
# and produce a higher-level summary in a certain format.
# Note that the program may need changes if/when/as nmap output format changes.

# vi(1) :se tabstop=4

$ENV{"LC_ALL"}='C'; # ye plain olde ASCII - avoid unpleasant surprises

# store our host/IP, as it may be split in separate earlier record
my $host_info='';
my %h=();	# Hash we'll use to stash our summary resultant data.

# key: cert in PEM format, value: canonicalized serial number:
my %cert_serial;

# Internet Protocol address to Canonical text format
sub isip{
	$#_ == 0 or return undef;
	return(1) if $_[0] =~ /\A$RE{net}{IPv4}\z/;
	return(1) if $_[0] =~ /\A$IPv6_re\z/;
	return(undef);
}

sub ipc{
	$#_ == 0 or die "$0: bad call to sub \&ipc(), aborting";
	my $original_ip=$_[0];
	my $ip;
	my $nip;
	$nip=inet_aton($original_ip);
	if(defined($nip)){
		$ip=inet_ntoa($nip);
	}else{
		$nip=ipv6_aton($original_ip);
		$ip=ipv6_ntoa($nip) if(defined $nip);
	};
	return($ip);
};

sub ip2b{
	# IP address to binary
	return(
		'0b'
		.
		unpack(
			'B*',
			defined(inet_aton(&ipc($_[0])))
			?
			inet_aton(&ipc($_[0]))
			:
			ipv6_aton(&ipc($_[0]))
		)
	);
};

sub ipsort{
	&isip($a) or return(0);
	&isip($b) or return(0);
	if(defined(inet_aton(&ipc($a)))){
		if(defined(inet_aton(&ipc($b)))){
			return(ip2b($a) + 0 <=> &ip2b($b) + 0)
		}elsif(defined(ipv6_aton(&ipc($b)))){
			return(-1);
		}else{
			die "$0: internal failure in ipsort, aborting";
		};
	}elsif(defined(ipv6_aton(&ipc($a)))){
		if(defined(inet_aton(&ipc($b)))){
			return(1);
		}elsif(defined(ipv6_aton(&ipc($b)))){
			return(ip2b($a) + 0 <=> &ip2b($b) + 0)
		}else{
			die "$0: internal failure in ipsort, aborting";
		};
	}else{
		die "$0: internal failure in ipsort, aborting";
	};
};

# process our record (including possible need to split and recursion):
sub handle_record{
	for (@_){

		# Change any Microsoft DOS/Windows line endings to Unix/Linux:
		s/\r\n/\n/g;
		# Get rid of uninteresting:
		s/^Failed to resolve .*$//gm;
		s/^PORT +STATE +SERVICE$//gm;
		s/^Warning: Hostname .* resolves, but not to any IPv[46] .*$//gm;
		s/^rDNS record for .*$//gm;
		s/^Host is up.*$//gm;
		s/^\| Not valid before: .*$//gm;
		s/^Other addresses for .*$//gm;
		s/^Starting Nmap .*$//gm;
		s/^Nmap done: .*$//gm;
		s/^NSE: Loaded 1 scripts for scanning\.$//gm;
		s/^NSE: Script Pre-scanning\.$//gm;
		s/^Initiating NSE at .*$//gm;
		s/^Completed NSE at .*, .*s elapsed$//gm;
		s/^Initiating Parallel DNS resolution of .* host.*\. at .*$//gm;
		s/^Completed Parallel DNS resolution of .* host.*\. at .*, .*s elapsed$//gm;
		s/^Increasing send delay for .*$//gm;
		s/^Initiating Connect Scan at .*$//gm;
		s/^Scanning .*$//gm;
		s/^Discovered open port .*$//gm;
		s/^Completed Connect Scan at .*, .*s elapsed .*$//gm;
		s/^NSE: Script scanning .*\.$//gm;
		s/^Initiating NSE at .*$//gm;
		s/^Completed NSE at .*, .*s elapsed$//gm;
		s/^NSE: Script Post-scanning\.$//gm;
		s/^NSE Timing: About .*?% done; ETC: .* \(.*? remaining\)$//gm;
		s/^Initiating NSE at .*$//gm;
		s/^Completed NSE at .*, .*s elapsed$//gm;
		s/^Read data files from: .*$//gm;
		s/^Not shown: [1-9]\d* (?:closed|filtered) ports?$//gm;
		s/^Completed Connect Scan against .*$//gm;
		s/^Connect Scan Timing: About .*$//gm;
		# squash empty lines the above may have created:
		s/\n{2,}/\n/g;

		# Strip any leading/trailing line endings from record:
		s/\A\n+//;
		s/\n+\z//;

		# done if we've got nothing left for this record:
		next if /\A\z/;

		# Handle host(s) information.
		#
		# If our record has host info, and nothing else, save it,
		# we may need it for following record.
		if(/\ANmap scan report for .*\z/){
			$host_info=$_;
			next;
		}
		#
		# If our record is missing host info and we earlier saved such,
		# prepend it.
		$_=$host_info . "\n" . $_ if
			! /^Nmap scan report for .*$/m and $host_info ne '';
		#
		# If all went well, at this point we should have information
		# on exactly one host on the current record we're processing,
		# save it (in case we didn't earlier), and check we have only
		# one on this record.
		if(
			/
				\A.*?
				# first match in record:
				^Nmap\ scan\ report\ for\ [^\n]*$
				.*
				# last and >=2nd match in record, if present::
				^(Nmap\ scan\ report\ for\ [^\n]*)$
			/msx
		){
			# Multiple matches
			# Save last match, in case we need it later:
			$host_info=$1;
			# kick record out as exception:
			warn(
				"$0: WARNING, multiple matches for host information ",
				"in record (skipping record):\n",
				$_,
				"\n",
			);
			next;
		}elsif(
			/
				\A.*?
				# first match in record:
				^(Nmap\ scan\ report\ for\ [^\n]*)$
			/msx
		){
			# We have exactly one match from the record, save it,
			# in case we'd not earlier done so:
			$host_info=$1;
		}else{
			# zero matches, kick record out as exception:
			warn(
				"$0: WARNING, zero matches for host information ",
				"in record (skipping record):\n",
				$_,
				"\n",
			);
			next;
		}

		# We're only interested in where we found certs,
		# those will have an expiration, if we don't see expiration
		# in the record, skip:
		/^\| Not valid after:  /m or next;

		# If we have more than one cert in record, we'll want to split
		# that into multiple records
		if(
			/
				^\|\ Not\ valid\ after:\ \ [^\n]*$
				.*
				^\|\ Not\ valid\ after:\ \ [^\n]*$
			/msx
		){
			if(
				/
					\A
					# record header bits (before ^\|):
					(.*?)
					^
					# first cert:
					(
						\|
						.*?
						^\|_[^\n]*
					)
					$
					# remainder:
					(.*)
					\z
				/msx
			){
				my $header=$1;
				my $cert=$2;
				my $remainder=$3;
				# strip leading and trailing newlines:
				$header =~ s/\A\n*(.*?)\n*\z/$1/s;
				$cert =~ s/\A\n*(.*?)\n*\z/$1/s;
				$remainder =~ s/\A\n*(.*?)\n*\z/$1/s;
				# complain and skip if any of 'em are empty:
				if($header eq ''){
					warn("$0: WARNING: record missing header, skipping:\n$_\n");
					next;
				}
				if($cert eq ''){
					warn("$0: WARNING: record missing cert, skipping:\n$_\n");
					next;
				}
				if($remainder eq ''){
					warn("$0: WARNING: record missing remainder, skipping:\n$_\n");
					next;
				}
				# if we made it here, we successfully split
				$cert = "$header\n$cert";
				$remainder = "$header\n$remainder";
				# To understand recursion, you must first understand recursion:
				&handle_record($cert,$remainder);
				# ^^ Processes as record, header and first cert bits,
				# then process as record, head and remainder (stuff after first cert).
				# Record was split and handled by recursion,
				# so here we're done with our pre-split record:
				next;
			}
			else{
				# multiple certs, but failed to make expected split:
				warn(
					"$0: WARNING: failed to split certs on record ",
					"(skipping record):\n$_\n",
				);
				next;
			}
		};

		# strip uninteresting tail (after cert) bits:
		while(
			s/(?:\n+|^)[1-9][0-9]*\/tcp [^\n]*\n*\z//
		){
		};

		# If we have certs on multiple ports on same host,
		# we may have earlier put too many ports in our header
		# information before the cert.  If that's the case, we
		# only want the one immediately preceding the cert:
		s/
			\A
				(Nmap\ scan\ report\ for\ [^\n]*\n)
				(?:^[1-9][0-9]*\/tcp [^\n]*\n)*
				(
					^[1-9][0-9]*\/tcp [^\n]*\n
					^\|
					.*
				)
			\z
		/$1$2/msx;

		# If our input is essentially as expected (may need to alter
		# this program if things have significantly changed), our record
		# should now be in expected canonical form.
		# Check that record is in expected canonical form,
		# if so, ensure it's saved,
		# if not, complain and skip.
		if(
			!
			/
				\A

				# host, port, cert info, in that order, neither more, nor less:

				(Nmap\ scan\ report\ for\ ([^\n]*))\n	# host info
				^([1-9][0-9]*)\/tcp\ [^\n]*\n		# port info
				# cert info:
				(
					(?:^\|\ [^\n]*\n)+
					\|_
					(?:
						SHA-1:(?:\ [0-9a-f]{4}){10}
						|
						-----END\ CERTIFICATE-----
					)
				)

				\z

			/mx
		){
			warn (
				"$0: WARNING: failed to find record in canonical form ",
				"(skipping):\n$_\n",
			);
			next;
		};
		$host_info=$1; # In case we need it a bit later.
		my $host_ip=$2;	# IP(/+host?) portion
		my $port=$3;	# Our TCP port.
		my $cert=$4;	# Our cert info.
		# parse IP(/+host?) portion:
		if(!($host_ip =~ /\A([^() \t]+)(?: \(([0-9a-f.:]+)\))?\z/)){
			warn (
				"$0: WARNING: failed to find IP(/+host?) in canonical form ",
				"(skipping record):\n$_\n",
			);
			next;
		};
		my $host='';
		my $IP;
		if(defined($2)){
			$host = $1;
			$IP = $2;
		}elsif(defined($1)){
			$IP = $1;
			$host = '';
		}else{
			# this should be unreachable
			warn (
				"$0: WARNING: reached unreachable? ",
				"(skipping record):\n$_\n",
			);
			next;
		};

		my $CN='';
		$CN=$1 if /\A.*?^\| ssl-cert: Subject: commonName=([^\n]+)$/ms;
		$CN =~ s/\/.*\z//; # strip non-name bits (e.g. organization)
		my $SAN='';
		$SAN=$1 if /\A.*?^\| Subject Alternative Name: ([^\n]+)$/ms;
		$SAN =~ s/, /,/g;
		$SAN =~ s/\A(?:DNS|IP Address)://;
		$SAN =~ s/,(?:DNS|IP Address):/,/g;
		if('' eq "$CN$SAN"){
			warn (
				"$0: WARNING: found neither CN nor SAN ",
				"(skipping record):\n$_\n",
			);
			next;
		};

		my $expires='';
		$expires=$1 if /\A.*?^\| Not valid after:  ([^\n]*)$/ms;
		if('' eq $expires){
			warn (
				"$0: WARNING: failed to determine expiration ",
				"(skipping record):\n$_\n",
			);
			next;
		};
		# Tag zone as Z (UTC/GMT0), as nmap doesn't mark zone, and scan
		# should've been done using GMT0/Z:
		$expires =~ s/[^Z]\z/$&Z/;

		# multiple full certs?
		if(
			/
					^\|\ -----BEGIN\ CERTIFICATE-----$
					.*
					^\|\ -----BEGIN\ CERTIFICATE-----$
			/x
		){
			warn (
				"$0: WARNING: found multiple full cert (headers), ",
				"skipping record:\n$_\n",
			);
			next;
		}
		my $cert_pem;
		my $serial=undef;
		# do we have full cert to process?
		if(
			/
				(
					^\|\ -----BEGIN\ CERTIFICATE-----$
					.+?
					^\|_-----END\ CERTIFICATE-----
				)
				\z
			/msx
		){
			$cert_pem=$1;
			$cert_pem =~ s/^\| //gm;
			$cert_pem =~ s/^\|_(-----END\ CERTIFICATE-----)\z/$1/m;
			if(exists $cert_serial{$cert_pem}){
				# have serial from cert from earlier, use that:
				$serial=$cert_serial{$cert_pem};
			}else{
				# get serial
				my $cert_pem_body='';
				if(
					$cert_pem
					=~
					/
						^-----BEGIN\ CERTIFICATE-----$
						\n*(.+?)\n*
						^-----END\ CERTIFICATE-----
						\z
					/msx
				){
					$cert_pem_body=$1;
				}else{
					warn(
						"$0: failed to find body of cert, ",
						"skipping record:\n$_\n",
					);
					next;
				}

				# convert to DER format:
				my $cert_der=MIME::Base64::decode($cert_pem_body);

				my $decoded=Crypt::X509->new(cert => $cert_der);
				$serial=$decoded->serial;	# decimal
				{
					use Math::BigInt;
					$serial=Math::BigInt->new($serial);
					$serial=$serial->as_hex;
				}
				$serial =~ s/^0x//;	# hex without leading 0x
				if($serial !~ /\A[\da-f]+\z/){
					warn(
						"$0: serial failed to match expected format, ",
						"skipping record:\n$_\n",
					);
					next;
				}
				# save cert & serial:
				$cert_serial{$cert_pem}=$serial;
			}
		}

		# Shouldn't have whitespace within most any of our gathered
		# lowest level pieces - check that (but $CN can have space(s)).
		if("$IP$host$port$SAN$expires" =~ /[ \t\r\n]/){
			warn(
				"$0: WARNING, unexpected whitespace, skipping record:\n",
				$_,
				"\n",
				"on account of:\n",
				"\$IP=$IP\$host=$host\$port=$port\$SAN=$SAN\$expires=$expires\n",
			);
			next;
		};
		if("$CN" =~ /[\t\r\n]/){
			warn(
				"$0: WARNING, unexpected whitespace, skipping record:\n",
				$_,
				"\n",
				"on account of:\n",
				"\$CN=$CN\n",
			);
			next;
		};

		# At this point we've got, from our record:
		# $IP - IP address
		# may have (supplied?/resolved?) name as $host (or empty)
		# $port - TCP port
		# at least $CN or $SAN (one but not both may be empty)
		# $expires - expiration (should be in ISO format)
		# may have $serial

		# Let's do some more intelligent processing of the data we care
		# about.

		my $SANorCN; # preferably SAN data, if we lack that, CN data
		if($SAN ne ''){
			$SANorCN=$SAN;	# have SAN data, use it (preferred)
			$SANorCN ="\L$SANorCN"; # lowercase
			# let's sort and canonicalize so equivalent SAN data will match
			my %SAN=();
			for my $s (split(/,/,$SANorCN,-1)){
				# if it's an IP address, put it in canonical form.
				if(&isip($s)){
					my $ip=&ipc($s);
					$s=$ip if defined($ip);
				};
				#warn("\$s=$s, \$ip=$ip\n") if defined($ip) and $s ne $ip; #####
				$SAN{$s}=undef if $s ne ''; # unique non-null as hash keys
			};
			$SANorCN=join(',',
				# ideally for domains we'd sort TLD on down,
				# and IPs in network order, but we may have
				# either or both, so we just do a simple string sort
				sort keys %SAN,
			);
		}else{
			$SANorCN=$CN;	# no SAN data, just use CN data
			# let's canonicalize so equivalent SAN data will match
			$SANorCN ="\L$SANorCN"; # lowercase
			# CN is just simple string, so nothing to sort here
		};


		{
			# let's canonicalize $IP
			if(&isip($IP)){
				my $ip=&ipc($IP);
				$IP=$ip if defined($ip);
			}else{
				die("$0: failed to canonicalize IP address: $IP, aborting")
			};
		};

		# Let's build up our hash in useful intelligent way.
		# We'll layer its data in our sort priority ordering
		# note that $host can be empty string, and that's valid hash key
		if(exists($h{$expires})){
			if(exists($h{$expires}{$SANorCN})){
				if(exists($h{$expires}{$SANorCN}{$IP})){
					if(exists($h{$expires}{$SANorCN}{$IP}{$port})){
						if(!exists($h{$expires}{$SANorCN}{$IP}{$port}{$host})){
							$h{$expires}{$SANorCN}{$IP}{$port}{$host}=undef;
							++$h{$expires}{$SANorCN}{''};
						};
					}else{
						$h{$expires}{$SANorCN}{$IP}{$port}={
							$host => undef
						};
						++$h{$expires}{$SANorCN}{''};
					};
				}else{
					$h{$expires}{$SANorCN}{$IP}=
						{$port => {$host => undef}}
					;
					++$h{$expires}{$SANorCN}{''};
				};
			}else{
				$h{$expires}{$SANorCN}=
					{
						$IP => {$port => {$host => undef}},
						# count host entries below $h{$expires}{$SANorCN}:
						'' => 1, # initialize
					}
				;
			};
		}else{
			$h{$expires}=
				{$SANorCN =>
					{
						$IP => {$port => {$host => undef}},
						# count host entries below $h{$expires}{$SANorCN}:
						'' => 1, # initialize
					}
				}
			;
		};
	}
}

{
	# Separate our "records" on input by one or more consecutive empty lines:
	local $/ = '';

	# Read through all our file argument(s) or lacking such, STDIN,
	# (- can also be used to specify STDIN),
	# by records:
	while(<>){
		&handle_record($_); # handle our record
	}
}

# Output our summarized data from our hash, in sorted order:
# output key:
print(
	"expires SAN_or_CN:\n",
	"IP port [host]\n...\n\n",
	"expires IP port [host] SANorCN\n",
);
for my $expires (sort keys %h){
	for my $SANorCN (sort keys %{$h{$expires}}){
		if($h{$expires}{$SANorCN}{''}>=2){
			# multiple items under this, do header:
			print("\n$expires $SANorCN:\n");
			for my $IP (sort ipsort keys %{$h{$expires}{$SANorCN}}){
				next if $IP eq ''; # skip our counter key
				for my $port (sort {$a+0<=>$b+0} keys %{$h{$expires}{$SANorCN}{$IP}}){
					for my $host (sort keys %{$h{$expires}{$SANorCN}{$IP}{$port}}){
						if($host ne ''){
							print "$IP $port $host\n";
						}else{
							print "$IP $port\n";
						};
					};
				};
			};
		}else{
			# singleton, just print it
			for my $IP (sort ipsort keys %{$h{$expires}{$SANorCN}}){
				next if $IP eq ''; # skip our counter key
				for my $port (sort {$a+0<=>$b+0} keys %{$h{$expires}{$SANorCN}{$IP}}){
					for my $host (sort keys %{$h{$expires}{$SANorCN}{$IP}{$port}}){
						if($host ne ''){
							print "\n$expires $IP $port $host $SANorCN\n";
						}else{
							print "\n$expires $IP $port $SANorCN\n";
						};
					};
				};
			};
		};
	};
};
