Simple Statistical Functions in Shell
Here’s a small collection of basic statistical functions you can use in your shell scripts. This may come in handy when analyzing disk space, system performance, etc.
Average
avg() {
sort -n | awk '
BEGIN {
c = 0;
sum = 0;
}
$1 ~ /^[0-9]*(\.[0-9]*)?$/ {
a[c++] = $1;
sum += $1;
}
END {
ave = sum / c;
printf("%.0f\n",ave);
}'
}Minimum
min() {
sort -n | awk '
BEGIN {
c = 0;
sum = 0;
}
$1 ~ /^[0-9]*(\.[0-9]*)?$/ {
a[c++] = $1;
sum += $1;
}
END {
c=asort(a);
print a[1];
}'
}Maximum
max() {
sort -n | awk '
BEGIN {
c = 0;
sum = 0;
}
$1 ~ /^[0-9]*(\.[0-9]*)?$/ {
a[c++] = $1;
sum += $1;
}
END {
c=asort(a);
print a[c];
}'
}Median
median() {
sort -n | awk '
BEGIN {
c = 0;
sum = 0;
}
$1 ~ /^[0-9]*(\.[0-9]*)?$/ {
a[c++] = $1;
sum += $1;
}
END {
ave = sum / c;
if( (c % 2) == 1 ) {
median = a[ int(c/2) ];
} else {
median = ( a[c/2] + a[c/2-1] ) / 2;
}
OFS="\t";
print median;
}'
}Sum
sum() {
sort -n | awk '
BEGIN {
c = 0;
sum = 0;
}
$1 ~ /^[0-9]*(\.[0-9]*)?$/ {
a[c++] = $1;
sum += $1;
}
END {
print sum;
}'
}Count
count() {
sort -n | awk '
BEGIN {
c = 0;
sum = 0;
}
$1 ~ /^[0-9]*(\.[0-9]*)?$/ {
a[c++] = $1;
sum += $1;
}
END {
print c;
}'
}Maximum difference
maxdelta() {
i=0 ; unset array_a ; unset array_d ; while read line ; do array_a[$i]="$line" ; (( i++ )) ; done
array_ac=$(echo "scale=0;`echo ${#array_a[*]}`-2"|bc -l)
for i in `seq 0 $array_ac`
do
array_d[$i]=$(echo "scale=0;`echo ${array_a[(( i + 1 ))]}`-`echo ${array_a[i]}`"|bc -l)
echo ${array_d[$i]}
done | max
}Minimum difference
mindelta() {
i=0 ; unset array_a ; unset array_d ; while read line ; do array_a[$i]="$line" ; (( i++ )) ; done
array_ac=$(echo "scale=0;`echo ${#array_a[*]}`-2"|bc -l)
for i in `seq 0 $array_ac`
do
array_d[$i]=$(echo "scale=0;`echo ${array_a[(( i + 1 ))]}`-`echo ${array_a[i]}`"|bc -l)
echo ${array_d[$i]}
done | min
}Average difference
avgdelta() {
i=0 ; unset array_a ; unset array_d ; while read line ; do array_a[$i]="$line" ; (( i++ )) ; done
array_ac=$(echo "scale=0;`echo ${#array_a[*]}`-2"|bc -l)
for i in `seq 0 $array_ac`
do
array_d[$i]=$(echo "scale=0;`echo ${array_a[(( i + 1 ))]}`-`echo ${array_a[i]}`"|bc -l)
echo ${array_d[$i]}
done | avg
}Standard Deviation
stddev() {
awk '{sum+=$1; sumsq+=$1*$1} END {printf("%.2f\n", sqrt(sumsq/NR - (sum/NR)**2))}'
}Linear Regression
fitline() {
awk '
BEGIN { FS = "[ ,\t]+" }
NF == 2 { x_sum += $1
y_sum += $2
xy_sum += $1*$2
x2_sum += $1*$1
num += 1
x[NR] = $1
y[NR] = $2
}
END { mean_x = x_sum / num
mean_y = y_sum / num
mean_xy = xy_sum / num
mean_x2 = x2_sum / num
slope = (mean_xy - (mean_x*mean_y)) / (mean_x2 - (mean_x*mean_x))
inter = mean_y - slope * mean_x
for (i = num; i > 0; i--) {
ss_total += (y[i] - mean_y)**2
ss_residual += (y[i] - (slope * x[i] + inter))**2
}
r2 = 1 - (ss_residual / ss_total)
printf("Slope : %.2f\n", slope)
printf("Intercept : %.2f\n", inter)
printf("R-Squared : %.2f\n", r2)
}'
}Greatest common denominator
gcd() {
dividend=
divisor=
remainder=1
until [ "${remainder}" -eq 0 ]
do
let "remainder = $dividend % $divisor"
dividend=${divisor}
divisor=${remainder}
done
echo "${dividend}"
}Least common multiple
lcm() {
count=$#
k=1
a=
while [[ $# -gt 0 ]]
do
if [[ $1 > $2 ]]
then
a=
fi
b[${k}]=$1
k=${k}+1
shift
done
lcm=0
for (( c=9; c>${count}; c-- ))
do
b[${c}]=1
done
for (( i=${a}; i>=${a}; i++ ))
do
if (( $(($i % ${b[1]})) == 0 && $(($i % ${b[2]})) == 0 && $(($i % ${b[3]})) == 0 && $(($i % ${b[4]})) == 0 && $(($i % ${b[5]})) == 0 && $(($i % ${b[6]})) == 0 && $(($i % ${b[7]})) == 0 && $(($i % ${b[8]})) == 0 && $(($i % ${b[9]})) == 0 ))
then
echo "${i}"
break
fi
done
}

