#!/bin/sh
# -*- tab-width: 4 -*- ;; Emacs
# vi: set noexpandtab  :: Vi/ViM
############################################################ IDENT(1)
#
# $Title: Script to report on SSD health $
# $Copyright: 2017-2019 Devin Teske. All rights reserved. $
# $FrauBSD: ssd_report/ssd_report 2019-09-12 19:07:16 -0700 freebsdfrau $
#
############################################################ GLOBALS

VERSION='$Version: 0.4 $'

pgm="${0##*/}" # Program basename

: ${HOSTNAME:=$( hostname )}
: ${UNAME_s:=$( uname -s )}

#
# Global exit status
#
SUCCESS=0
FAILURE=1

#
# Command-line options
#
JSON=	# -j
PERL=	# -p

############################################################ FUNCTIONS

# usage
#
# Display short usage statement on stderr and exit with error status.
#
usage()
{
	exec >&2
	echo "Usage: $pgm [-jpv]"
	echo "OPTIONS:"
	local fmt="\t%-10s %s\n"
	printf "$fmt" "-j" "Enable JSON output."
	printf "$fmt" "-p" "Enable Perl output."
	printf "$fmt" "-v" "Print version and exit."
	exit $FAILURE
}

# device_info device_name
#
# The device_name argument can begin with /dev/ or not (up to you). Prints
# a single line on stdout containing " Model=..., FwRev=..., SerialNo=..."
# with each value guaranteed to not contain spaces.
#
# How this information is acquired depends on whether you have "smart"
# (preferred if available; Linux/FreeBSD), "smartctl" (FreeBSD/Linux), or
# "hdparm" (Linux; preferred over smartctl).
#
if type smart > /dev/null 2>&1; then # FreeBSD/Linux
device_info()
{
	smart --info "/dev/${1#/dev/}" 2> /dev/null | awk '
	function _s(s) { gsub(/[[:space:]]+/, "_", s); return s }
	sub(/^Device[[:space:]]+/, "") { model = _s($0) }
	sub(/^Serial[[:space:]]+/, "") { serialno = _s($0) }
	sub(/^Revision[[:space:]]+/, "") { fwrev = _s($0) }
	END { printf " Model=%s, FwRev=%s, SerialNo=%s\n",
		model, fwrev, serialno }'
}
elif type smartctl > /dev/null 2>&1; then # FreeBSD/Linux
device_info()
{
	smartctl -a "/dev/${1#/dev/}" 2> /dev/null | awk '
	function _s(s) { gsub(/[[:space:]]+/, "_", s); return s }
	sub(/^Device Model:[[:space:]]*/, "") { model = _s($0) }
	sub(/^Serial Number:[[:space:]]*/, "") { serialno = _s($0) }
	sub(/^Firmware Version:[[:space:]]*/, "") { fwrev = _s($0) }
	END { printf " Model=%s, FwRev=%s, SerialNo=%s\n",
		model, fwrev, serialno }'
}
elif type hdparm > /dev/null 2>&1; then # Linux
device_info()
{
	hdparm -i "/dev/${1#/dev/}" 2> /dev/null | awk /Model=/
}
else
device_info()
{
	return $FAILURE
}
fi

# smart_attrs device_name
#
# The device_name argument can begin with /dev/ or not (up to you). Prints
# a series of shell statements that can be evaluated to set the following:
#
#	VARIABLE_NAME	DESCRIPTION
# 	lifetime	Attribute 202 or 233 depending on manufacturer
# 	writes_host	Attribute 247; number pages written by the host
# 	writes_ftl	Attribute 248; number pages written by the FTL*
# 	hours		Attribute 9; number of powered-on hours
#
# * The FTL is the Flash Translation Layer (FTL); a component of the SSD
# controller that maps Logical Block Addresses (LBAs) from host to Physical
# Block Addresses (PBAs). This is helpful in calculating write amplification.
#
# How this information is acquired depends on whether you have "smart"
# (preferred if available; Linux/FreeBSD), or "smartctl" (FreeBSD/Linux).
#
if type smart > /dev/null 2>&1; then # FreeBSD/Linux
smart_attrs()
{
	smart -t "/dev/${1#/dev/}" | awk '
	BEGIN {
		val[attr[173] = sttr[177] = "wear_leveling_count"] = 7
		val[attr[202] = attr[233] = "lifetime"] = 5
		val[attr[241] = "total_lbas_written"] = 7
		val[attr[247] = "writes_host"] = 7
		val[attr[248] = "writes_ftl"] = 7
		val[attr[9] = "hours"] = 7
	}
	var = attr[at = $2] {
		_[var] = $val[var]

		#
		# For Samsung SSDs, attribute 173/177 normalized value
		# is documented as percentage remaining P/E cycles
		#
		if (at == 173 || at == 177) _["samsung_lifetime"] = $5
	}
	END { for (var in _) printf "%s=%u\n", var, _[var] }
	' # END-QUOTE
}
elif type smartctl > /dev/null 2>&1; then # FreeBSD/Linux
smart_attrs()
{
	smartctl -A "/dev/${1#/dev/}" | awk '
	BEGIN {
		val[attr[173] = attr[177] = "wear_leveling_count"] = 10
		val[attr[202] = attr[233] = "lifetime"] = 4
		val[attr[241] = "total_lbas_written"] = 10
		val[attr[247] = "writes_host"] = 10
		val[attr[248] = "writes_ftl"] = 10
		val[attr[9] = "hours"] = 10
	}
	var = attr[at = $1] {
		_[var] = $val[var]

		#
		# For Samsung SSDs, attribute 173/177 normalized value
		# is documented as percentage remaining P/E cycles
		#
		if (at == 173 || at == 177) _["samsung_lifetime"] = $5
	}
	END { for (var in _) printf "%s=%u\n", var, _[var] }
	' # END-QUOTE
}
else
smart_attrs()
{
	return $FAILURE
}
fi

# user_capacity device_name
#
# Get the user capacity of a disk drive in bytes.
#
case "$UNAME_s" in
*BSD)
user_capacity()
{
	sysctl -n kern.geom.conftxt | awk -v disk="${1#/dev/}" '
		$2 == "DISK" && $3 == disk { print mediasize = $4; exit }
		END { exit mediasize == "" ? 1 : 0 }
	' # END-QUOTE
}
;;
*)
user_capacity()
{
	lsblk -b | awk -v disk="${1#/dev/}" '
		BEGIN { getline } # Skip header
		$1 == disk { print mediasize = $4; exit }
		END { exit mediasize == "" ? 1 : 0 }
	' # END-QUOTE
}
esac

# user2phys bytes
#
# Converts device user capacity to drive physical capacity.
# NB: Specific to calculating WAF and lifetime on Samsung SSDs.
#
user2phys()
{
	local n=1 user="${1:-0}"
	while [ $n -lt $user ]; do
		n=$(( $n * 2 ))
	done
	echo $n
}

# setvar $var_to_set [$value]
#
# Implement setvar for shells unlike FreeBSD sh(1).
#
if ! type setvar > /dev/null 2>&1; then
setvar()
{
        [ $# -gt 0 ] || return $SUCCESS
        local __setvar_var_to_set="$1" __setvar_right="$2" __setvar_left=
        case $# in
        1) unset "$__setvar_var_to_set"
           return $? ;;
        2) : fall through ;;
        *) f_err "setvar: too many arguments\n"
           return $FAILURE
        esac
        case "$__setvar_var_to_set" in *[!0-9A-Za-z_]*)
                f_err "setvar: %s: bad variable name\n" "$__setvar_var_to_set"
                return 2
        esac
        while case "$__setvar_r" in *\'*) : ;; *) false ; esac
        do
                __setvar_left="$__setvar_left${__setvar_right%%\'*}'\\''"
                __setvar_right="${__setvar_right#*\'}"
        done
        __setvar_left="$__setvar_left${__setvar_right#*\'}"
        eval "$__setvar_var_to_set='$__setvar_left'"
}
fi

# f_replaceall $string $find $replace [$var_to_set]
#
# Replace all occurrences of $find in $string with $replace. If $var_to_set is
# either missing or NULL, the variable name is produced on standard out for
# capturing in a sub-shell (which is less recommended due to performance
# degradation).
#
# To replace newlines or a sequence containing the newline character, use $NL
# as `\n' is not supported.
#
f_replaceall()
{
        local __left="" __right="$1"
        local __find="$2" __replace="$3" __var_to_set="$4"
        while :; do
                case "$__right" in *$__find*)
                        __left="$__left${__right%%$__find*}$__replace"
                        __right="${__right#*$__find}"
                        continue
                esac
                break
        done
        __left="$__left${__right#*$__find}"
        if [ "$__var_to_set" ]; then
                setvar "$__var_to_set" "$__left"
        else
                echo "$__left"
        fi
}

############################################################ MAIN

node=$HOSTNAME

#
# Process command-line options
#
while getopts jpv flag; do
	case "$flag" in
	j) JSON=1 PERL= ;;
	p) PERL=1 JSON= ;;
	v) VERSION="${VERSION#*: }"
	   echo "${VERSION% \$}"
	   exit $SUCCESS ;;
	*) usage # NOTREACHED
	esac
done
shift $(( $OPTIND - 1 ))

#
# Get a list of physical disk device names
#
case "$UNAME_s" in
FreeBSD) disks=$( sysctl -n kern.disks ) ;;
*) disks=$( lsblk | awk '$6 == "disk" { print $1 }' )
esac

#
# Describe the OS
#
case "$UNAME_s" in
Linux)
	if [ -e /etc/centos-release ]; then
		version=$( cat /etc/centos-release )
		release=CentOS
	elif [ -e /etc/redhat-release ]; then
		version=$( cat /etc/redhat-release )
		release=RedHat
	elif [ -e /etc/os-release ]; then
		. /etc/os-release
		version="$VERSION"
		release="${NAME%%[[:space:]]*}"
	else
		version="${UNAME_r:-$( uname -r )}"
		release="$UNAME_s"
	fi
	version="${version#"${version%%[0-9]*}"}"
	version="${version%%[^0-9]*}"
	release=$release$version
	;;
*)
	version="${UNAME_r:-$( uname -r )}"
	version="${version%%[^0-9]*}"
	release="$UNAME_s$version"
esac

#
# Operate on each disk device
#
if [ "$JSON" ]; then
	fmt='{"node":"%s","device":"%s","OSRel":"%s","Model":"%s",'
	fmt="$fmt"'"FwRev":"%s","SerialNo":"%s","LifeRemain":"%s%%",'
	fmt="$fmt"'"status":"%s","WriteAmp":"%s","PwrDays":"%s",'
	fmt="$fmt"'"EstDaysToFail":"%s","WritesHost":"%s","WritesFTL":"%s"}'
elif [ "$PERL" ]; then
	fmt='{"node"=>"%s","device"=>"%s","OSRel"=>"%s","Model"=>"%s",'
	fmt="$fmt"'"FwRev"=>"%s","SerialNo"=>"%s","LifeRemain"=>"%s%%",'
	fmt="$fmt"'"status"=>"%s","WriteAmp"=>"%s","PwrDays"=>"%s",'
	fmt="$fmt"'"EstDaysToFail"=>"%s","WritesHost"=>"%s","WritesFTL"=>"%s"}'
else
	fmt="%s %s OSRel=%s Model=%s FwRev=%s SerialNo=%s"
	fmt="$fmt LifeRemain=%s%% (%s) WriteAmp=%s PwrDays=%s"
	fmt="$fmt EstDaysToFail=%s WritesHost=%s WritesFTL=%s"
fi
for device in $disks; do
	#
	# Get disk device model
	#
	device_name=$( device_info /dev/$device ) || continue

	#
	# Determine the type of disk
	# NB: Skip non-SSD device models
	#
	case "$device_name" in
	*Crucial*|*Micron*|*MICRON*) device_type=Micron ;;
	*SAMSUNG*|*Samsung*) device_type=Samsung ;;
	*INTEL*SSD*) device_type=Intel ;;
	*) continue
	esac

	#
	# Get smart attributes
	#
	lifetime=0 writes_host=0 writes_ftl=0 hours=0 days_to_fail=
	eval "$( echo "${device_name# }" | awk 'gsub(/, /, "\n")||1' |
		awk 'sub(/=/, "&\"")&&sub(/$/,"\""){}1' )"
	eval "$( smart_attrs /dev/$device )"

	#
	# Determine the number of remaining hours until failure
	#
	case "$device_type" in
	Samsung) lifetime="$samsung_lifetime" ;;
	esac
	if [ $lifetime -lt 100 ]; then
		total_hours_to_fail=$(( ($hours * 100) / (100 - $lifetime) ))
		hours_to_fail=$(( $total_hours_to_fail - $hours ))
		days_to_fail=$(( $hours_to_fail / 24 ))
	fi

	#
	# Calculate write amplication
	#
	case "$device_type" in
	Micron)
		# Micron documents this in their SMART Attribute Reference
		#
		# https://www.micron.com/~/media/documents/products/
		# 	technical-note/solid-state-storage/
		# 	tnfd22_client_ssd_smart_attributes.pdf
		#
		#        (Attrib_247 + Attrib_248)
		# WAF = ---------------------------
		#                Attrib_247
		#
		write_amp=$( awk -v host=$writes_host -v ftl=$writes_ftl '
		END { printf "%4.2f\n", (host + ftl) / (host != 0 ? host : 1) }
		' /dev/null ) ;;
	Samsung)
		# Samsung documents this in article published May 20, 2016:
		#
		# "Estimating SSD Endurance With SMART Attributes"
		# https://insights.samsung.com/2016/05/20/
		# 	estimating-ssd-endurance-with-smart-attributes/
		#
		#        Attrib_177 * Capacity
		# WAF = -----------------------
		#          Attrib_241 * 512
		#
		# NB: On some models, wear_leveling_count is Attrib_173
		#
		user_cap=$( user_capacity $device )
		phys_cap=$( user2phys $user_cap )
		writes_ftl=$(( $wear_leveling_count * $phys_cap ))
		writes_host=$(( $total_lbas_written * 512 ))
		write_amp=$( awk -v host=$writes_host -v ftl=$writes_ftl '
		END { printf "%4.2f\n", ftl / (host != 0 ? host : 1) }
		' /dev/null ) ;;
	Intel) write_amp=N/A
	esac

	#
	# Determine drive status (OK/WARN/CRIT)
	#
	good_bad=OK pwr_days=$(( $hours / 24 ))
	if [ $lifetime -lt 10 ]; then
		good_bad=CRIT
	elif [ $lifetime -lt 25 ]; then
		good_bad=WARN
	fi

	#
	# Sanitize fields
	#
	f_replaceall "$node"         "[[:space:]]" _ node
	f_replaceall "$release"      "[[:space:]]" _ release
	f_replaceall "$device"       "[[:space:]]" _ device
	f_replaceall "$Model"        "[[:space:]]" _ Model
	f_replaceall "$FwRev"        "[[:space:]]" _ FwRev
	f_replaceall "$SerialNo"     "[[:space:]]" _ SerialNo
	f_replaceall "$lifetime"     "[[:space:]]" _ lifetime
	f_replaceall "$good_bad"     "[[:space:]]" _ good_bad
	f_replaceall "$write_amp"    "[[:space:]]" _ write_amp
	f_replaceall "$pwr_days"     "[[:space:]]" _ pwr_days
	f_replaceall "$days_to_fail" "[[:space:]]" _ days_to_fail
	f_replaceall "$writes_host"  "[[:space:]]" _ writes_host
	f_replaceall "$writes_ftl"   "[[:space:]]" _ writes_ftl

	#
	# Print drive report
	#
	printf "$fmt\n" "$node" "$device" "$release" "$Model" "$FwRev" \
		"$SerialNo" "$lifetime" "$good_bad" "$write_amp" "$pwr_days" \
		"${days_to_fail:-N/A}" "$writes_host" "$writes_ftl"
done

exit $SUCCESS

################################################################################
# END
################################################################################
