Skip to content

Commit e48aa88

Browse files
committed
gh-149244: Support iterator inputs in covariance, correlation, and linear_regression
1 parent f2c7c0d commit e48aa88

4 files changed

Lines changed: 62 additions & 6 deletions

File tree

Doc/library/statistics.rst

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -716,8 +716,8 @@ However, for reading convenience, most of the examples show sorted sequences.
716716
Return the sample covariance of two inputs *x* and *y*. Covariance
717717
is a measure of the joint variability of two inputs.
718718

719-
Both inputs must be of the same length (no less than two), otherwise
720-
:exc:`StatisticsError` is raised.
719+
Both inputs must be sequences or iterables of the same length (no less
720+
than two), otherwise :exc:`StatisticsError` is raised.
721721

722722
Examples:
723723

@@ -753,8 +753,9 @@ However, for reading convenience, most of the examples show sorted sequences.
753753
continuous data that doesn't meet the linear proportion requirement for
754754
Pearson's correlation coefficient.
755755

756-
Both inputs must be of the same length (no less than two), and need
757-
not to be constant, otherwise :exc:`StatisticsError` is raised.
756+
Both inputs must be sequences or iterables of the same length (no less
757+
than two), and need not to be constant, otherwise :exc:`StatisticsError`
758+
is raised.
758759

759760
Example with `Kepler's laws of planetary motion
760761
<https://en.wikipedia.org/wiki/Kepler's_laws_of_planetary_motion>`_:
@@ -802,8 +803,8 @@ However, for reading convenience, most of the examples show sorted sequences.
802803
(it is equal to the difference between predicted and actual values
803804
of the dependent variable).
804805

805-
Both inputs must be of the same length (no less than two), and
806-
the independent variable *x* cannot be constant;
806+
Both inputs must be sequences or iterables of the same length (no less
807+
than two), and the independent variable *x* cannot be constant;
807808
otherwise a :exc:`StatisticsError` is raised.
808809

809810
For example, we can use the `release dates of the Monty

Lib/statistics.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -673,6 +673,8 @@ def covariance(x, y, /):
673673
674674
"""
675675
# https://en.wikipedia.org/wiki/Covariance
676+
x = list(x)
677+
y = list(y)
676678
n = len(x)
677679
if len(y) != n:
678680
raise StatisticsError('covariance requires that both inputs have same number of data points')
@@ -710,6 +712,8 @@ def correlation(x, y, /, *, method='linear'):
710712
"""
711713
# https://en.wikipedia.org/wiki/Pearson_correlation_coefficient
712714
# https://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient
715+
x = list(x)
716+
y = list(y)
713717
n = len(x)
714718
if len(y) != n:
715719
raise StatisticsError('correlation requires that both inputs have same number of data points')
@@ -781,6 +785,8 @@ def linear_regression(x, y, /, *, proportional=False):
781785
782786
"""
783787
# https://en.wikipedia.org/wiki/Simple_linear_regression
788+
x = list(x)
789+
y = list(y)
784790
n = len(x)
785791
if len(y) != n:
786792
raise StatisticsError('linear regression requires that both inputs have same number of data points')

Lib/test/test_statistics.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2842,6 +2842,32 @@ def test_correlation_spearman(self):
28422842
with self.assertRaises(ValueError):
28432843
statistics.correlation(reading, mathematics, method='bad_method')
28442844

2845+
def test_iterator_inputs(self):
2846+
x = [1, 2, 3, 4, 5, 6, 7, 8, 9]
2847+
y = [1, 2, 3, 1, 2, 3, 1, 2, 3]
2848+
expected_cov = statistics.covariance(x, y)
2849+
expected_cor = statistics.correlation(x, y)
2850+
# iter() inputs should give same results as list inputs
2851+
self.assertAlmostEqual(statistics.covariance(iter(x), iter(y)), expected_cov)
2852+
self.assertAlmostEqual(statistics.correlation(iter(x), iter(y)), expected_cor)
2853+
# generator expressions should also work
2854+
self.assertAlmostEqual(
2855+
statistics.covariance((v for v in x), (v for v in y)), expected_cov
2856+
)
2857+
self.assertAlmostEqual(
2858+
statistics.correlation((v for v in x), (v for v in y)), expected_cor
2859+
)
2860+
# ranked method should also accept iterators
2861+
expected_ranked = statistics.correlation(x, y, method='ranked')
2862+
self.assertAlmostEqual(
2863+
statistics.correlation(iter(x), iter(y), method='ranked'), expected_ranked
2864+
)
2865+
# mismatched lengths should still raise StatisticsError
2866+
with self.assertRaises(statistics.StatisticsError):
2867+
statistics.covariance(iter([1, 2, 3]), iter([1, 2]))
2868+
with self.assertRaises(statistics.StatisticsError):
2869+
statistics.correlation(iter([1, 2, 3]), iter([1, 2]))
2870+
28452871
class TestLinearRegression(unittest.TestCase):
28462872

28472873
def test_constant_input_error(self):
@@ -2881,6 +2907,26 @@ def test_float_output(self):
28812907
self.assertTrue(isinstance(slope, float))
28822908
self.assertTrue(isinstance(intercept, float))
28832909

2910+
def test_iterator_inputs(self):
2911+
x = [1, 2, 3, 4, 5]
2912+
y = [2, 4, 6, 8, 10]
2913+
expected = statistics.linear_regression(x, y)
2914+
# iter() inputs should give same results as list inputs
2915+
result = statistics.linear_regression(iter(x), iter(y))
2916+
self.assertAlmostEqual(result.slope, expected.slope)
2917+
self.assertAlmostEqual(result.intercept, expected.intercept)
2918+
# generator expressions should also work
2919+
result = statistics.linear_regression((v for v in x), (v for v in y))
2920+
self.assertAlmostEqual(result.slope, expected.slope)
2921+
self.assertAlmostEqual(result.intercept, expected.intercept)
2922+
# proportional=True should also accept iterators
2923+
expected_prop = statistics.linear_regression(x, y, proportional=True)
2924+
result_prop = statistics.linear_regression(
2925+
iter(x), iter(y), proportional=True
2926+
)
2927+
self.assertAlmostEqual(result_prop.slope, expected_prop.slope)
2928+
self.assertEqual(result_prop.intercept, 0.0)
2929+
28842930
class TestNormalDist:
28852931

28862932
# General note on precision: The pdf(), cdf(), and overlap() methods
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
:func:`statistics.covariance`, :func:`statistics.correlation`, and
2+
:func:`statistics.linear_regression` now accept any iterable input,
3+
consistent with other functions in the :mod:`statistics` module.

0 commit comments

Comments
 (0)