Networking

Unix and Linux network configuration. Multiple network interfaces. Bridged NICs. High-availability network configurations.

Applications

Reviews of latest Unix and Linux software. Helpful tips for application support admins. Automating application support.

Data

Disk partitioning, filesystems, directories, and files. Volume management, logical volumes, HA filesystems. Backups and disaster recovery.

Monitoring

Distributed server monitoring. Server performance and capacity planning. Monitoring applications, network status and user activity.

Commands & Shells

Cool Unix shell commands and options. Command-line tools and application. Things every Unix sysadmin needs to know.

Home » Commands & Shells, Featured

Simple Statistical Functions in Shell

Submitted by on June 3, 2015 – 12:01 pm

Here’s a small collection of basic statistical functions you can use in your shell scripts. This may come in handy when analyzing disk space, system performance, etc.

Average

avg() {
		sort -n | awk '
	  BEGIN {
		c = 0;
		sum = 0;
	  }
	  $1 ~ /^[0-9]*(\.[0-9]*)?$/ {
		a[c++] = $1;
		sum += $1;
	  }
	  END {
		ave = sum / c;
		printf("%.0f\n",ave);
	  }'
}

Minimum
min() {
	sort -n | awk '
	  BEGIN {
		c = 0;
		sum = 0;
	  }
	  $1 ~ /^[0-9]*(\.[0-9]*)?$/ {
		a[c++] = $1;
		sum += $1;
	  }
	  END {
		c=asort(a);
		print a[1];
	  }'
}

Maximum
max() {
	sort -n | awk '
	  BEGIN {
		c = 0;
		sum = 0;
	  }
	  $1 ~ /^[0-9]*(\.[0-9]*)?$/ {
		a[c++] = $1;
		sum += $1;
	  }
	  END {
	    c=asort(a);
		print a[c];
	  }'
}

Median
median() {
	sort -n | awk '
	  BEGIN {
		c = 0;
		sum = 0;
	  }
	  $1 ~ /^[0-9]*(\.[0-9]*)?$/ {
		a[c++] = $1;
		sum += $1;
	  }
	  END {
		ave = sum / c;
		if( (c % 2) == 1 ) {
		  median = a[ int(c/2) ];
		} else {
		  median = ( a[c/2] + a[c/2-1] ) / 2;
		}
		OFS="\t";
		print median;
	  }'
}

Sum
sum() {
	sort -n | awk '
	  BEGIN {
		c = 0;
		sum = 0;
	  }
	  $1 ~ /^[0-9]*(\.[0-9]*)?$/ {
		a[c++] = $1;
		sum += $1;
	  }
	  END {
		print sum;
	  }'
}

Count
count() {
	sort -n | awk '
	  BEGIN {
		c = 0;
		sum = 0;
	  }
	  $1 ~ /^[0-9]*(\.[0-9]*)?$/ {
		a[c++] = $1;
		sum += $1;
	  }
	  END {
		print c;
	  }'
}

Maximum difference
maxdelta() {
	i=0 ; unset array_a ; unset array_d ; while read line ; do array_a[$i]="$line" ; (( i++ )) ; done
	array_ac=$(echo "scale=0;`echo ${#array_a[*]}`-2"|bc -l)
	for i in `seq 0 $array_ac`
	do
		array_d[$i]=$(echo "scale=0;`echo ${array_a[(( i + 1 ))]}`-`echo ${array_a[i]}`"|bc -l)
		echo ${array_d[$i]}
	done | max
}

Minimum difference
mindelta() {
	i=0 ; unset array_a ; unset array_d ; while read line ; do array_a[$i]="$line" ; (( i++ )) ; done
	array_ac=$(echo "scale=0;`echo ${#array_a[*]}`-2"|bc -l)
	for i in `seq 0 $array_ac`
	do
		array_d[$i]=$(echo "scale=0;`echo ${array_a[(( i + 1 ))]}`-`echo ${array_a[i]}`"|bc -l)
		echo ${array_d[$i]}
	done | min
}

Average difference
avgdelta() {
	i=0 ; unset array_a ; unset array_d ; while read line ; do array_a[$i]="$line" ; (( i++ )) ; done
	array_ac=$(echo "scale=0;`echo ${#array_a[*]}`-2"|bc -l)
	for i in `seq 0 $array_ac`
	do
		array_d[$i]=$(echo "scale=0;`echo ${array_a[(( i + 1 ))]}`-`echo ${array_a[i]}`"|bc -l)
		echo ${array_d[$i]}
	done | avg
}

Standard Deviation
stddev() {
	awk '{sum+=$1; sumsq+=$1*$1} END {printf("%.2f\n", sqrt(sumsq/NR - (sum/NR)**2))}'
}

Linear Regression
fitline() {
	awk '
	BEGIN { FS = "[ ,\t]+" }
	NF == 2 { x_sum += $1
			  y_sum += $2
			  xy_sum += $1*$2
			  x2_sum += $1*$1
			  num += 1
			  x[NR] = $1
			  y[NR] = $2
			}
	END { mean_x = x_sum / num
		  mean_y = y_sum / num
		  mean_xy = xy_sum / num
		  mean_x2 = x2_sum / num
		  slope = (mean_xy - (mean_x*mean_y)) / (mean_x2 - (mean_x*mean_x))
		  inter = mean_y - slope * mean_x
		  for (i = num; i > 0; i--) {
			  ss_total += (y[i] - mean_y)**2
			  ss_residual += (y[i] - (slope * x[i] + inter))**2
		  }
		  r2 = 1 - (ss_residual / ss_total)
		  printf("Slope      :  %.2f\n", slope)
		  printf("Intercept  :  %.2f\n", inter)
		  printf("R-Squared  :  %.2f\n", r2)
		}'
}

Greatest common denominator
gcd() {
	dividend=
	divisor=
	remainder=1
	until [ "${remainder}" -eq 0 ]
	do
		let "remainder = $dividend % $divisor"
		dividend=${divisor}
		divisor=${remainder}
	done
	echo "${dividend}"
}

Least common multiple
lcm() {
	count=$#
	k=1
	a=
	while [[ $# -gt 0 ]]
	do
		if [[ $1 > $2 ]]
		then
			a=
		fi
		b[${k}]=$1
		k=${k}+1
		shift
	done
	
	lcm=0
	for (( c=9; c>${count}; c-- ))
	do
		b[${c}]=1
	done
	
	for (( i=${a}; i>=${a}; i++ ))
	do
		if (( $(($i % ${b[1]})) == 0 && $(($i % ${b[2]})) == 0 && $(($i % ${b[3]})) == 0 && $(($i % ${b[4]})) == 0 && $(($i % ${b[5]})) == 0 && $(($i % ${b[6]})) == 0 && $(($i % ${b[7]})) == 0 && $(($i % ${b[8]})) == 0 && $(($i % ${b[9]})) == 0 ))
		then
			echo "${i}"
			break         
		fi
	done
}

 

Print Friendly, PDF & Email

Leave a Reply