Add Least Squares slope function 13/16213/3
authorTim Rault <tim.rault@cengn.ca>
Wed, 29 Jun 2016 14:54:55 +0000 (10:54 -0400)
committerTim Rault <tim.rault@cengn.ca>
Wed, 6 Jul 2016 14:26:51 +0000 (10:26 -0400)
Added a math module in utilities that contains a slope function
able to compute the slope of the best Least Squares curve fit
given a series of [x,y] values.

Implemented a test harness for this math module in the tests/utilities
section.

Change-Id: If4d63af092d0904b2269c5ee0991e18ab84533c0
JIRA: STORPERF-54
JIRA: STORPERF-55
JIRA: STORPERF-51
Signed-off-by: Tim Rault <tim.rault@cengn.ca>
storperf/tests/utilities/math.py [new file with mode: 0644]
storperf/utilities/math.py [new file with mode: 0644]

diff --git a/storperf/tests/utilities/math.py b/storperf/tests/utilities/math.py
new file mode 100644 (file)
index 0000000..c78538d
--- /dev/null
@@ -0,0 +1,66 @@
+##############################################################################
+# Copyright (c) 2016 CENGN and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+import unittest
+from storperf.utilities.math import math
+
+class MathTest(unittest.TestCase):
+
+    def setUp(self):
+        unittest.TestCase.setUp(self)
+        pass
+
+    def test_slope_empty_series(self):
+        expected = 0
+        actual = math.slope([])
+        self.assertEqual(expected, actual)
+
+    def test_slope_integer_series(self):
+        expected = 1.4
+        actual = math.slope([[1,6], [2,5], [3,7], [4,10]])
+        self.assertEqual(expected, actual)
+
+    def test_slope_decimal_series(self):
+        expected = 1.4
+        actual = math.slope([[1.0,6.0], [2.0,5.0], [3.0,7.0], [4.0,10.0]])
+        self.assertEqual(expected, actual)
+
+    def test_slope_decimal_integer_mix(self):
+        expected = 1.4
+        actual = math.slope([[1.0,6], [2,5.0], [3,7], [4.0,10]])
+        self.assertEqual(expected, actual)
+
+    def test_slope_negative_y_series(self):
+        expected = 2
+        actual = math.slope([[1.0,-2], [2,2], [3,2]])
+        self.assertEqual(expected, actual)
+
+    def test_slope_negative_x_series(self):
+        expected = 1.4
+        actual = math.slope([[-24,6.0], [-23,5], [-22,7.0], [-21,10]])
+        self.assertEqual(expected, actual)
+
+    def test_slope_out_of_order_series(self):
+        expected = 1.4
+        actual = math.slope([[2,5.0], [4,10], [3.0,7], [1,6]])
+        self.assertEqual(expected, actual)
+
+    def test_slope_0_in_y(self):
+        expected = -0.5
+        actual = math.slope([[15.5,1], [16.5,0], [17.5,0]])
+        self.assertEqual(expected, actual)
+
+    def test_slope_0_in_x(self):
+        expected = 1.4
+        actual = math.slope([[0,6.0], [1,5], [2,7], [3,10]])
+        self.assertEqual(expected, actual)
+
+    def test_slope_0_in_x_and_y(self):
+        expected = 1.5
+        actual = math.slope([[0.0,0], [1,1], [2,3]])
+        self.assertEqual(expected, actual)
diff --git a/storperf/utilities/math.py b/storperf/utilities/math.py
new file mode 100644 (file)
index 0000000..3b124cd
--- /dev/null
@@ -0,0 +1,52 @@
+##############################################################################
+# Copyright (c) 2016 CENGN and others.
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+##############################################################################
+
+class math(object):
+
+    @staticmethod
+    def slope(data_series):
+        """
+        This function implements the linear least squares algorithm described in the following wikipedia article
+        https://en.wikipedia.org/wiki/Linear_least_squares_(mathematics)
+        in the case of m equations (provided by m data points) and 2 unknown variables (x and
+        y, which represent the time and the Volume performance variable being
+        tested e.g. IOPS, latency...)
+        """
+
+        if len(data_series)==0: #In the particular case of an empty data series
+            beta2 = 0
+
+        else: #The general case
+            m = len(data_series) #given a [[x1,y1], [x2,y2], ..., [xm,ym]] data series
+            data_series[0][0] = float(data_series[0][0]) #To make sure at least one element is a float number so the result of the algorithm be a float number
+
+            """
+            It consists in solving the normal equations system (2 equations, 2 unknowns)
+            by calculating the value of beta2 (slope). The formula of beta1 (the y-intercept)
+            is given as a comment in case it is needed later.
+            """
+            sum_xi = 0
+            sum_xi_sq = 0
+            sum_yi_xi = 0
+            sum_yi = 0
+            for i in range(0, m):
+                xi = data_series[i][0]
+                yi = data_series[i][1]
+
+                sum_xi += xi
+                sum_xi_sq += xi**2
+                sum_yi_xi += xi*yi
+                sum_yi += yi
+
+            beta2 = (sum_yi*sum_xi - m*sum_yi_xi)/(sum_xi**2 - m*sum_xi_sq) #The slope
+            #beta1 = (sum_yi_xi - beta2*sum_xi_sq)/sum_xi #The y-intercept if needed
+
+        return beta2
+
+