diff --git a/tests/files/mock_responses/flows/flow_100.xml b/tests/files/mock_responses/flows/flow_100.xml
new file mode 100644
index 000000000..b80658d5d
--- /dev/null
+++ b/tests/files/mock_responses/flows/flow_100.xml
@@ -0,0 +1,14 @@
+
+ 100
+ 1
+ weka.J48
+ 1
+ Weka_3.7.12
+ A simple J48 flow for testing
+ 2015-01-01T00:00:00
+ English
+ Weka_3.7.12
+ C0.25
+ M2
+ test_tag_TestFlow_1234567890
+
diff --git a/tests/files/mock_responses/flows/flow_3.xml b/tests/files/mock_responses/flows/flow_3.xml
new file mode 100644
index 000000000..ed5ec2f3c
--- /dev/null
+++ b/tests/files/mock_responses/flows/flow_3.xml
@@ -0,0 +1,14 @@
+
+ 3
+ weka.ZeroR
+ 1
+ Weka_3.7.5
+ ZeroR classifier
+ 2014-04-23 18:00:36
+ English
+ Weka_3.7.5
+
+ -D
+ false
+
+
\ No newline at end of file
diff --git a/tests/files/mock_responses/flows/flow_4024.xml b/tests/files/mock_responses/flows/flow_4024.xml
new file mode 100644
index 000000000..c60c35749
--- /dev/null
+++ b/tests/files/mock_responses/flows/flow_4024.xml
@@ -0,0 +1,106 @@
+
+ 4024
+ 1
+ weka.FilteredClassifier
+ 1
+ Weka_3.7.12
+ Wrapper for filtered classifier
+ 2015-01-01T00:00:00
+ English
+ Weka_3.7.12
+ Bweka.classifiers.trees.REPTree
+ D1
+ Fweka.filters.supervised.instance.Resample
+ P11
+ P22
+ P33
+ P44
+ P55
+ P66
+ P77
+ P88
+ P99
+ P1010
+ P1111
+ P1212
+ P1313
+ P1414
+ P1515
+ P1616
+ P1717
+ P1818
+ P1919
+ P2020
+ P2121
+
+ W
+
+ 4025
+ weka.MultiSearch
+ 1
+ Weka_3.7.12
+ MultiSearch subflow
+ 2015-01-01T00:00:00
+ AAA
+ BBB
+ CCC
+ DDD
+ ECC
+ FFF
+ GGG
+ HHH
+ III
+ JJJ
+ KKK
+ LLL
+ MMM
+ NNN
+
+ X
+
+ 4026
+ weka.LogitBoost
+ 1
+ Weka_3.7.12
+ LogitBoost subflow
+ 2015-01-01T00:00:00
+ A1
+ B2
+ C3
+ D4
+ E5
+ F6
+ G7
+ H8
+ I10
+ J11
+ K12
+ L13
+ M14
+
+ Y
+
+ 1724
+ weka.REPTree
+ 1
+ Weka_3.7.12
+ REPTree subflow
+ 2015-01-01T00:00:00
+ A1
+ B2
+ C3
+ D4
+ E5
+ F6
+ G7
+ H8
+ I9
+ J10
+ L-1
+
+
+
+
+
+
+
diff --git a/tests/files/mock_responses/flows/flow_6742.xml b/tests/files/mock_responses/flows/flow_6742.xml
new file mode 100644
index 000000000..ae016f1b3
--- /dev/null
+++ b/tests/files/mock_responses/flows/flow_6742.xml
@@ -0,0 +1,64 @@
+
+ 6742
+ 1
+ weka.BayesNet
+ 1
+ Weka_3.9.0
+ A Bayes net flow for testing non-sklearn flows
+ 2017-01-01T00:00:00
+ English
+ Weka_3.9.0
+ P1v1
+ P2v2
+ P3v3
+ P4v4
+ P5v5
+ P6v6
+ P7v7
+ P8v8
+ P9v9
+ P10v10
+ P11v11
+ P12v12
+ P13v13
+ P14v14
+ P15v15
+ P16v16
+ P17v17
+ P18v18
+ P19v19
+
+ Q
+
+ 6743
+ weka.K2
+ 1
+ Weka_3.9.0
+ K2 search algorithm
+ 2017-01-01T00:00:00
+ P1
+ Q2
+ R3
+ S4
+ T5
+ U0
+ V7
+ W8
+
+ E
+
+ 5888
+ weka.SimpleEstimator
+ 1
+ Weka_3.9.0
+ Simple estimator
+ 2017-01-01T00:00:00
+ alpha0.5
+ batch-size
+ debugfalse
+ num-decimal-places2
+
+
+
+
+
diff --git a/tests/files/mock_responses/flows/flow_exists_no.xml b/tests/files/mock_responses/flows/flow_exists_no.xml
new file mode 100644
index 000000000..e4fb2b827
--- /dev/null
+++ b/tests/files/mock_responses/flows/flow_exists_no.xml
@@ -0,0 +1,3 @@
+
+ 0
+
diff --git a/tests/files/mock_responses/flows/flow_exists_yes.xml b/tests/files/mock_responses/flows/flow_exists_yes.xml
new file mode 100644
index 000000000..929e03a0d
--- /dev/null
+++ b/tests/files/mock_responses/flows/flow_exists_yes.xml
@@ -0,0 +1,3 @@
+
+ 42
+
diff --git a/tests/files/mock_responses/flows/flow_list_1.xml b/tests/files/mock_responses/flows/flow_list_1.xml
new file mode 100644
index 000000000..5d9357d19
--- /dev/null
+++ b/tests/files/mock_responses/flows/flow_list_1.xml
@@ -0,0 +1,10 @@
+
+
+ 100
+ weka.J48(1)
+ weka.J48
+ 1
+ Weka_3.7.12
+ 1
+
+
diff --git a/tests/files/mock_responses/flows/flow_list_no_result.xml b/tests/files/mock_responses/flows/flow_list_no_result.xml
new file mode 100644
index 000000000..78993102a
--- /dev/null
+++ b/tests/files/mock_responses/flows/flow_list_no_result.xml
@@ -0,0 +1,5 @@
+
+ 372
+ No results
+ No flows matched the given criteria.
+
diff --git a/tests/files/mock_responses/flows/flow_list_tagged.xml b/tests/files/mock_responses/flows/flow_list_tagged.xml
new file mode 100644
index 000000000..5d9357d19
--- /dev/null
+++ b/tests/files/mock_responses/flows/flow_list_tagged.xml
@@ -0,0 +1,10 @@
+
+
+ 100
+ weka.J48(1)
+ weka.J48
+ 1
+ Weka_3.7.12
+ 1
+
+
diff --git a/tests/files/mock_responses/flows/flow_publish.xml b/tests/files/mock_responses/flows/flow_publish.xml
new file mode 100644
index 000000000..dde7d5a08
--- /dev/null
+++ b/tests/files/mock_responses/flows/flow_publish.xml
@@ -0,0 +1,3 @@
+
+ 42
+
diff --git a/tests/files/mock_responses/flows/flow_tag.xml b/tests/files/mock_responses/flows/flow_tag.xml
new file mode 100644
index 000000000..0955480a4
--- /dev/null
+++ b/tests/files/mock_responses/flows/flow_tag.xml
@@ -0,0 +1,4 @@
+
+ 100
+ test_tag_TestFlow_1234567890
+
diff --git a/tests/files/mock_responses/flows/flow_untag.xml b/tests/files/mock_responses/flows/flow_untag.xml
new file mode 100644
index 000000000..b6fc980fe
--- /dev/null
+++ b/tests/files/mock_responses/flows/flow_untag.xml
@@ -0,0 +1,3 @@
+
+ 100
+
diff --git a/tests/test_flows/test_flow.py b/tests/test_flows/test_flow.py
index b942c0ab9..646da5ad3 100644
--- a/tests/test_flows/test_flow.py
+++ b/tests/test_flows/test_flow.py
@@ -10,6 +10,7 @@
from unittest import mock
import pytest
+import requests
import scipy.stats
import sklearn
import sklearn.datasets
@@ -30,7 +31,7 @@
import openml.exceptions
import openml.utils
from openml._api_calls import _perform_api_call
-from openml.testing import SimpleImputer, TestBase
+from openml.testing import SimpleImputer, TestBase, create_request_response
@@ -44,120 +45,6 @@ def setUp(self):
def tearDown(self):
super().tearDown()
- @pytest.mark.production_server()
- def test_get_flow(self):
- # We need to use the production server here because 4024 is not the
- # test server
- self.use_production_server()
-
- flow = openml.flows.get_flow(4024)
- assert isinstance(flow, openml.OpenMLFlow)
- assert flow.flow_id == 4024
- assert len(flow.parameters) == 24
- assert len(flow.components) == 1
-
- subflow_1 = next(iter(flow.components.values()))
- assert isinstance(subflow_1, openml.OpenMLFlow)
- assert subflow_1.flow_id == 4025
- assert len(subflow_1.parameters) == 14
- assert subflow_1.parameters["E"] == "CC"
- assert len(subflow_1.components) == 1
-
- subflow_2 = next(iter(subflow_1.components.values()))
- assert isinstance(subflow_2, openml.OpenMLFlow)
- assert subflow_2.flow_id == 4026
- assert len(subflow_2.parameters) == 13
- assert subflow_2.parameters["I"] == "10"
- assert len(subflow_2.components) == 1
-
- subflow_3 = next(iter(subflow_2.components.values()))
- assert isinstance(subflow_3, openml.OpenMLFlow)
- assert subflow_3.flow_id == 1724
- assert len(subflow_3.parameters) == 11
- assert subflow_3.parameters["L"] == "-1"
- assert len(subflow_3.components) == 0
-
- @pytest.mark.production_server()
- @pytest.mark.xfail(reason="failures_issue_1544", strict=False)
- def test_get_structure(self):
- # also responsible for testing: flow.get_subflow
- # We need to use the production server here because 4024 is not the
- # test server
- self.use_production_server()
-
- flow = openml.flows.get_flow(4024)
- flow_structure_name = flow.get_structure("name")
- flow_structure_id = flow.get_structure("flow_id")
- # components: root (filteredclassifier), multisearch, loginboost,
- # reptree
- assert len(flow_structure_name) == 4
- assert len(flow_structure_id) == 4
-
- for sub_flow_name, structure in flow_structure_name.items():
- if len(structure) > 0: # skip root element
- subflow = flow.get_subflow(structure)
- assert subflow.name == sub_flow_name
-
- for sub_flow_id, structure in flow_structure_id.items():
- if len(structure) > 0: # skip root element
- subflow = flow.get_subflow(structure)
- assert subflow.flow_id == sub_flow_id
-
- @pytest.mark.test_server()
- def test_tagging(self):
- flows = openml.flows.list_flows(size=1)
- flow_id = flows["id"].iloc[0]
- flow = openml.flows.get_flow(flow_id)
- # tags can be at most 64 alphanumeric (+ underscore) chars
- unique_indicator = str(time.time()).replace(".", "")
- tag = f"test_tag_TestFlow_{unique_indicator}"
- flows = openml.flows.list_flows(tag=tag)
- assert len(flows) == 0
- flow.push_tag(tag)
- flows = openml.flows.list_flows(tag=tag)
- assert len(flows) == 1
- assert flow_id in flows["id"]
- flow.remove_tag(tag)
- flows = openml.flows.list_flows(tag=tag)
- assert len(flows) == 0
-
- @pytest.mark.test_server()
- def test_from_xml_to_xml(self):
- # Get the raw xml thing
- # TODO maybe get this via get_flow(), which would have to be refactored
- # to allow getting only the xml dictionary
- # TODO: no sklearn flows.
- for flow_id in [
- 3,
- 5,
- 7,
- 9,
- ]:
- flow_xml = _perform_api_call("flow/%d" % flow_id, request_method="get")
- flow_dict = xmltodict.parse(flow_xml)
-
- flow = openml.OpenMLFlow._from_dict(flow_dict)
- new_xml = flow._to_xml()
-
- flow_xml = (
- flow_xml.replace(" ", "")
- .replace("\t", "")
- .strip()
- .replace("\n\n", "\n")
- .replace(""", '"')
- )
- flow_xml = re.sub(r"^$", "", flow_xml)
- new_xml = (
- new_xml.replace(" ", "")
- .replace("\t", "")
- .strip()
- .replace("\n\n", "\n")
- .replace(""", '"')
- )
- new_xml = re.sub(r"^$", "", new_xml)
-
- assert new_xml == flow_xml
-
@pytest.mark.sklearn()
def test_to_xml_from_xml(self):
scaler = sklearn.preprocessing.StandardScaler(with_mean=False)
@@ -180,33 +67,6 @@ def test_to_xml_from_xml(self):
openml.flows.functions.assert_flows_equal(new_flow, flow)
assert new_flow is not flow
- @pytest.mark.sklearn()
- @pytest.mark.test_server()
- def test_publish_flow(self):
- flow = openml.OpenMLFlow(
- name="sklearn.dummy.DummyClassifier",
- class_name="sklearn.dummy.DummyClassifier",
- description="test description",
- model=sklearn.dummy.DummyClassifier(),
- components=collections.OrderedDict(),
- parameters=collections.OrderedDict(),
- parameters_meta_info=collections.OrderedDict(),
- external_version=self.extension._format_external_version(
- "sklearn",
- sklearn.__version__,
- ),
- tags=[],
- language="English",
- dependencies=None,
- )
-
- flow, _ = self._add_sentinel_to_flow_name(flow, None)
-
- flow.publish()
- TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name)
- TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow.flow_id}")
- assert isinstance(flow.flow_id, int)
-
@pytest.mark.sklearn()
@mock.patch("openml.flows.functions.flow_exists")
def test_publish_existing_flow(self, flow_exists_mock):
@@ -222,82 +82,6 @@ def test_publish_existing_flow(self, flow_exists_mock):
f"collected from {__file__.split('/')[-1]}: {flow.flow_id}",
)
- @pytest.mark.sklearn()
- @pytest.mark.test_server()
- def test_publish_flow_with_similar_components(self):
- clf = sklearn.ensemble.VotingClassifier(
- [("lr", sklearn.linear_model.LogisticRegression(solver="lbfgs"))],
- )
- flow = self.extension.model_to_flow(clf)
- flow, _ = self._add_sentinel_to_flow_name(flow, None)
- flow.publish()
- TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name)
- TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow.flow_id}")
- # For a flow where both components are published together, the upload
- # date should be equal
- assert flow.upload_date == flow.components["lr"].upload_date, (
- flow.name,
- flow.flow_id,
- flow.components["lr"].name,
- flow.components["lr"].flow_id,
- )
-
- clf1 = sklearn.tree.DecisionTreeClassifier(max_depth=2)
- flow1 = self.extension.model_to_flow(clf1)
- flow1, sentinel = self._add_sentinel_to_flow_name(flow1, None)
- flow1.publish()
- TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name)
- TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow1.flow_id}")
-
- # In order to assign different upload times to the flows!
- time.sleep(1)
-
- clf2 = sklearn.ensemble.VotingClassifier(
- [("dt", sklearn.tree.DecisionTreeClassifier(max_depth=2))],
- )
- flow2 = self.extension.model_to_flow(clf2)
- flow2, _ = self._add_sentinel_to_flow_name(flow2, sentinel)
- flow2.publish()
- TestBase._mark_entity_for_removal("flow", flow2.flow_id, flow2.name)
- TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow2.flow_id}")
- # If one component was published before the other, the components in
- # the flow should have different upload dates
- assert flow2.upload_date != flow2.components["dt"].upload_date
-
- clf3 = sklearn.ensemble.AdaBoostClassifier(sklearn.tree.DecisionTreeClassifier(max_depth=3))
- flow3 = self.extension.model_to_flow(clf3)
- flow3, _ = self._add_sentinel_to_flow_name(flow3, sentinel)
- # Child flow has different parameter. Check for storing the flow
- # correctly on the server should thus not check the child's parameters!
- flow3.publish()
- TestBase._mark_entity_for_removal("flow", flow3.flow_id, flow3.name)
- TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow3.flow_id}")
-
- @pytest.mark.sklearn()
- @pytest.mark.test_server()
- def test_semi_legal_flow(self):
- # TODO: Test if parameters are set correctly!
- # should not throw error as it contains two differentiable forms of
- # Bagging i.e., Bagging(Bagging(J48)) and Bagging(J48)
- estimator_name = (
- "base_estimator" if Version(sklearn.__version__) < Version("1.4") else "estimator"
- )
- semi_legal = sklearn.ensemble.BaggingClassifier(
- **{
- estimator_name: sklearn.ensemble.BaggingClassifier(
- **{
- estimator_name: sklearn.tree.DecisionTreeClassifier(),
- }
- )
- }
- )
- flow = self.extension.model_to_flow(semi_legal)
- flow, _ = self._add_sentinel_to_flow_name(flow, None)
-
- flow.publish()
- TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name)
- TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow.flow_id}")
-
@pytest.mark.sklearn()
@mock.patch("openml.flows.functions.get_flow")
@mock.patch("openml.flows.functions.flow_exists")
@@ -366,63 +150,16 @@ def test_illegal_flow(self):
)
self.assertRaises(ValueError, self.extension.model_to_flow, illegal)
- @pytest.mark.test_server()
- def test_nonexisting_flow_exists(self):
- def get_sentinel():
- # Create a unique prefix for the flow. Necessary because the flow
- # is identified by its name and external version online. Having a
- # unique name allows us to publish the same flow in each test run
- md5 = hashlib.md5()
- md5.update(str(time.time()).encode("utf-8"))
- sentinel = md5.hexdigest()[:10]
- return f"TEST{sentinel}"
-
- name = get_sentinel() + get_sentinel()
- version = get_sentinel()
-
- flow_id = openml.flows.flow_exists(name, version)
- assert not flow_id
-
- @pytest.mark.sklearn()
- @pytest.mark.test_server()
- def test_existing_flow_exists(self):
- # create a flow
- nb = sklearn.naive_bayes.GaussianNB()
+ def test_extract_tags(self):
+ flow_xml = "study_14"
+ flow_dict = xmltodict.parse(flow_xml)
+ tags = openml.utils.extract_xml_tags("oml:tag", flow_dict)
+ assert tags == ["study_14"]
- sparse = "sparse" if Version(sklearn.__version__) < Version("1.4") else "sparse_output"
- ohe_params = {sparse: False, "handle_unknown": "ignore"}
- if Version(sklearn.__version__) >= Version("0.20"):
- ohe_params["categories"] = "auto"
- steps = [
- ("imputation", SimpleImputer(strategy="median")),
- ("hotencoding", sklearn.preprocessing.OneHotEncoder(**ohe_params)),
- (
- "variencethreshold",
- sklearn.feature_selection.VarianceThreshold(),
- ),
- ("classifier", sklearn.tree.DecisionTreeClassifier()),
- ]
- complicated = sklearn.pipeline.Pipeline(steps=steps)
-
- for classifier in [nb, complicated]:
- flow = self.extension.model_to_flow(classifier)
- flow, _ = self._add_sentinel_to_flow_name(flow, None)
- # publish the flow
- flow = flow.publish()
- TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name)
- TestBase.logger.info(
- f"collected from {__file__.split('/')[-1]}: {flow.flow_id}",
- )
- # redownload the flow
- flow = openml.flows.get_flow(flow.flow_id)
-
- # check if flow exists can find it
- flow = openml.flows.get_flow(flow.flow_id)
- downloaded_flow_id = openml.flows.flow_exists(
- flow.name,
- flow.external_version,
- )
- assert downloaded_flow_id == flow.flow_id
+ flow_xml = "OpenmlWeka\n" "weka"
+ flow_dict = xmltodict.parse(flow_xml)
+ tags = openml.utils.extract_xml_tags("oml:tag", flow_dict["oml:flow"])
+ assert tags == ["OpenmlWeka", "weka"]
@pytest.mark.sklearn()
@pytest.mark.test_server()
@@ -554,40 +291,374 @@ def test_sklearn_to_upload_to_flow(self):
assert new_flow.name == fixture_name
new_flow.model.fit(X, y)
- def test_extract_tags(self):
- flow_xml = "study_14"
- flow_dict = xmltodict.parse(flow_xml)
- tags = openml.utils.extract_xml_tags("oml:tag", flow_dict)
- assert tags == ["study_14"]
- flow_xml = "OpenmlWeka\n" "weka"
- flow_dict = xmltodict.parse(flow_xml)
- tags = openml.utils.extract_xml_tags("oml:tag", flow_dict["oml:flow"])
- assert tags == ["OpenmlWeka", "weka"]
+# ---------------------------------------------------------------------------
+# Module-level mocked tests replacing former @production_server / @test_server
+# tests. Each uses @mock.patch.object(requests.Session, ...) consistent with
+# the test_delete_flow_* pattern already present in test_flow_functions.py.
+# ---------------------------------------------------------------------------
+
+
+def _mock_get_response(filepath):
+ """Build a fake ``requests.Response`` from a fixture XML file."""
+ return create_request_response(status_code=200, content_filepath=filepath)
+
+
+@mock.patch.object(requests.Session, "get")
+def test_get_flow(mock_get, test_files_directory):
+ """Offline replacement of the former production-server test_get_flow."""
+ openml.config.start_using_configuration_for_example()
+ content_file = test_files_directory / "mock_responses" / "flows" / "flow_4024.xml"
+ mock_get.return_value = _mock_get_response(content_file)
+
+ flow = openml.flows.get_flow(4024)
+ assert isinstance(flow, openml.OpenMLFlow)
+ assert flow.flow_id == 4024
+ assert len(flow.parameters) == 24
+ assert len(flow.components) == 1
+
+ subflow_1 = next(iter(flow.components.values()))
+ assert isinstance(subflow_1, openml.OpenMLFlow)
+ assert subflow_1.flow_id == 4025
+ assert len(subflow_1.parameters) == 14
+ assert subflow_1.parameters["E"] == "CC"
+ assert len(subflow_1.components) == 1
+
+ subflow_2 = next(iter(subflow_1.components.values()))
+ assert isinstance(subflow_2, openml.OpenMLFlow)
+ assert subflow_2.flow_id == 4026
+ assert len(subflow_2.parameters) == 13
+ assert subflow_2.parameters["I"] == "10"
+ assert len(subflow_2.components) == 1
+
+ subflow_3 = next(iter(subflow_2.components.values()))
+ assert isinstance(subflow_3, openml.OpenMLFlow)
+ assert subflow_3.flow_id == 1724
+ assert len(subflow_3.parameters) == 11
+ assert subflow_3.parameters["L"] == "-1"
+ assert len(subflow_3.components) == 0
+
+
+@mock.patch.object(requests.Session, "get")
+def test_get_structure(mock_get, test_files_directory):
+ """Offline replacement of the former production-server test_get_structure."""
+ openml.config.start_using_configuration_for_example()
+ content_file = test_files_directory / "mock_responses" / "flows" / "flow_4024.xml"
+ mock_get.return_value = _mock_get_response(content_file)
+
+ flow = openml.flows.get_flow(4024)
+ flow_structure_name = flow.get_structure("name")
+ flow_structure_id = flow.get_structure("flow_id")
+ # components: root (filteredclassifier), multisearch, logitboost, reptree
+ assert len(flow_structure_name) == 4
+ assert len(flow_structure_id) == 4
+
+ for sub_flow_name, structure in flow_structure_name.items():
+ if len(structure) > 0: # skip root element
+ subflow = flow.get_subflow(structure)
+ assert subflow.name == sub_flow_name
+
+ for sub_flow_id, structure in flow_structure_id.items():
+ if len(structure) > 0: # skip root element
+ subflow = flow.get_subflow(structure)
+ assert subflow.flow_id == sub_flow_id
+
+
+@mock.patch.object(requests.Session, "post")
+@mock.patch.object(requests.Session, "get")
+def test_tagging(mock_get, mock_post, test_files_directory, test_api_key):
+ """Offline replacement of the former test-server test_tagging."""
+ openml.config.start_using_configuration_for_example()
+ fixtures = test_files_directory / "mock_responses" / "flows"
+
+ # list_flows(size=1) -> one flow returned
+ flow_list_resp = _mock_get_response(fixtures / "flow_list_1.xml")
+ # get_flow(100) -> flow detail
+ flow_detail_resp = _mock_get_response(fixtures / "flow_100.xml")
+ # list_flows(tag=tag) with no result -> server returns 372 (NoResult)
+ no_result_resp = create_request_response(
+ status_code=412,
+ content_filepath=fixtures / "flow_list_no_result.xml",
+ )
+ # list_flows(tag=tag) with one result -> flow found
+ tagged_resp = _mock_get_response(fixtures / "flow_list_tagged.xml")
+
+ # push_tag / remove_tag responses
+ tag_resp = _mock_get_response(fixtures / "flow_tag.xml")
+ untag_resp = _mock_get_response(fixtures / "flow_untag.xml")
+
+ # Sequence: list_flows(size=1), get_flow(100),
+ # list_flows(tag=...) -> no result,
+ # list_flows(tag=...) -> one result,
+ # list_flows(tag=...) -> no result
+ mock_get.side_effect = [flow_list_resp, flow_detail_resp, no_result_resp, tagged_resp, no_result_resp]
+ mock_post.side_effect = [tag_resp, untag_resp]
+
+ flows = openml.flows.list_flows(size=1)
+ flow_id = flows["id"].iloc[0]
+ flow = openml.flows.get_flow(flow_id)
+
+ tag = "test_tag_TestFlow_1234567890"
+ flows = openml.flows.list_flows(tag=tag)
+ assert len(flows) == 0
+
+ flow.push_tag(tag)
+ flows = openml.flows.list_flows(tag=tag)
+ assert len(flows) == 1
+ assert flow_id in flows["id"].values
+
+ flow.remove_tag(tag)
+ flows = openml.flows.list_flows(tag=tag)
+ assert len(flows) == 0
+
+
+@mock.patch.object(requests.Session, "get")
+def test_from_xml_to_xml(mock_get, test_files_directory):
+ """Offline replacement of the former test-server test_from_xml_to_xml.
+
+ Instead of fetching multiple flows from the server, we use a single
+ fixture and verify the XML round-trip (parse -> serialize) is lossless.
+ """
+ openml.config.start_using_configuration_for_example()
+ content_file = test_files_directory / "mock_responses" / "flows" / "flow_3.xml"
+ mock_get.return_value = _mock_get_response(content_file)
+
+ flow_xml = _perform_api_call("flow/3", request_method="get")
+ flow_dict = xmltodict.parse(flow_xml)
+
+ flow = openml.OpenMLFlow._from_dict(flow_dict)
+ new_xml = flow._to_xml()
+
+ flow_xml = (
+ flow_xml.replace(" ", "")
+ .replace("\t", "")
+ .strip()
+ .replace("\n\n", "\n")
+ .replace(""", '"')
+ )
+ flow_xml = re.sub(r"^$", "", flow_xml)
+ new_xml = (
+ new_xml.replace(" ", "")
+ .replace("\t", "")
+ .strip()
+ .replace("\n\n", "\n")
+ .replace(""", '"')
+ )
+ new_xml = re.sub(r"^$", "", new_xml)
+
+ assert new_xml == flow_xml
+
+
+@pytest.mark.sklearn()
+def test_publish_flow(test_files_directory, test_api_key):
+ """Offline replacement of the former test-server test_publish_flow."""
+ openml.config.start_using_configuration_for_example()
+ extension = SklearnExtension()
+
+ flow = openml.OpenMLFlow(
+ name="sklearn.dummy.DummyClassifier",
+ class_name="sklearn.dummy.DummyClassifier",
+ description="test description",
+ model=sklearn.dummy.DummyClassifier(),
+ components=collections.OrderedDict(),
+ parameters=collections.OrderedDict(),
+ parameters_meta_info=collections.OrderedDict(),
+ external_version=extension._format_external_version(
+ "sklearn",
+ sklearn.__version__,
+ ),
+ tags=[],
+ language="English",
+ dependencies=None,
+ )
+
+ with mock.patch("openml.flows.functions.flow_exists") as fe_mock, \
+ mock.patch("openml.flows.functions.get_flow") as gf_mock, \
+ mock.patch("openml._api_calls._perform_api_call") as api_mock:
+
+ fe_mock.return_value = False
+ api_mock.return_value = "\n 42\n"
+
+ # After publish, get_flow is called to verify; return a copy of the flow
+ published_copy = copy.deepcopy(flow)
+ published_copy.flow_id = 42
+ published_copy.upload_date = "2025-01-01T00:00:00"
+ published_copy.version = "1"
+ published_copy.uploader = "1"
+ gf_mock.return_value = published_copy
+
+ flow.publish()
+ assert isinstance(flow.flow_id, int)
+ assert flow.flow_id == 42
+
+
+@pytest.mark.sklearn()
+def test_publish_flow_with_similar_components(test_files_directory, test_api_key):
+ """Offline replacement of the former test-server test_publish_flow_with_similar_components."""
+ openml.config.start_using_configuration_for_example()
+ extension = SklearnExtension()
+
+ clf = sklearn.ensemble.VotingClassifier(
+ [("lr", sklearn.linear_model.LogisticRegression(solver="lbfgs"))],
+ )
+ flow = extension.model_to_flow(clf)
+
+ with mock.patch("openml.flows.functions.flow_exists") as fe_mock, \
+ mock.patch("openml.flows.functions.get_flow") as gf_mock, \
+ mock.patch("openml._api_calls._perform_api_call") as api_mock:
+
+ api_mock.return_value = "\n 10\n"
+
+ # First publish: flow does not exist yet
+ fe_mock.return_value = False
+ published_copy = copy.deepcopy(flow)
+ published_copy.flow_id = 10
+ published_copy.upload_date = "2025-01-01T00:00:00"
+ published_copy.version = "1"
+ published_copy.uploader = "1"
+ for comp in published_copy.components.values():
+ comp.flow_id = 11
+ comp.upload_date = "2025-01-01T00:00:00"
+ comp.version = "1"
+ comp.uploader = "1"
+ gf_mock.return_value = published_copy
+
+ flow.publish()
+ # For a flow where both components are published together, the upload
+ # date should be equal
+ assert flow.upload_date == flow.components["lr"].upload_date
+
+ # Second publish with a different tree-based component
+ clf2 = sklearn.ensemble.VotingClassifier(
+ [("dt", sklearn.tree.DecisionTreeClassifier(max_depth=2))],
+ )
+ flow2 = extension.model_to_flow(clf2)
+ fe_mock.return_value = False
+ api_mock.return_value = "\n 20\n"
+ published_copy2 = copy.deepcopy(flow2)
+ published_copy2.flow_id = 20
+ published_copy2.upload_date = "2025-01-01T00:01:00"
+ published_copy2.version = "1"
+ published_copy2.uploader = "1"
+ for comp in published_copy2.components.values():
+ comp.flow_id = 21
+ comp.upload_date = "2025-01-01T00:00:00"
+ comp.version = "1"
+ comp.uploader = "1"
+ gf_mock.return_value = published_copy2
- @pytest.mark.production_server()
- def test_download_non_scikit_learn_flows(self):
- self.use_production_server()
-
- flow = openml.flows.get_flow(6742)
- assert isinstance(flow, openml.OpenMLFlow)
- assert flow.flow_id == 6742
- assert len(flow.parameters) == 19
- assert len(flow.components) == 1
- assert flow.model is None
-
- subflow_1 = next(iter(flow.components.values()))
- assert isinstance(subflow_1, openml.OpenMLFlow)
- assert subflow_1.flow_id == 6743
- assert len(subflow_1.parameters) == 8
- assert subflow_1.parameters["U"] == "0"
- assert len(subflow_1.components) == 1
- assert subflow_1.model is None
-
- subflow_2 = next(iter(subflow_1.components.values()))
- assert isinstance(subflow_2, openml.OpenMLFlow)
- assert subflow_2.flow_id == 5888
- assert len(subflow_2.parameters) == 4
- assert subflow_2.parameters["batch-size"] is None
- assert len(subflow_2.components) == 0
- assert subflow_2.model is None
+ flow2.publish()
+ # If one component was published before the other, the components in
+ # the flow should have different upload dates
+ assert flow2.upload_date != flow2.components["dt"].upload_date
+
+
+@pytest.mark.sklearn()
+def test_semi_legal_flow():
+ """Offline replacement of the former test-server test_semi_legal_flow.
+
+ Verifies that a nested BaggingClassifier(BaggingClassifier(DecisionTreeClassifier))
+ can be converted to a flow without error. The publish step is mocked.
+ """
+ extension = SklearnExtension()
+ estimator_name = (
+ "base_estimator" if Version(sklearn.__version__) < Version("1.4") else "estimator"
+ )
+ semi_legal = sklearn.ensemble.BaggingClassifier(
+ **{
+ estimator_name: sklearn.ensemble.BaggingClassifier(
+ **{
+ estimator_name: sklearn.tree.DecisionTreeClassifier(),
+ }
+ )
+ }
+ )
+ flow = extension.model_to_flow(semi_legal)
+
+ with mock.patch("openml.flows.functions.flow_exists") as fe_mock, \
+ mock.patch("openml.flows.functions.get_flow") as gf_mock, \
+ mock.patch("openml._api_calls._perform_api_call") as api_mock:
+
+ fe_mock.return_value = False
+ api_mock.return_value = "\n 99\n"
+ published_copy = copy.deepcopy(flow)
+ published_copy.flow_id = 99
+ published_copy.upload_date = "2025-01-01T00:00:00"
+ published_copy.version = "1"
+ published_copy.uploader = "1"
+ # Set IDs on all sub-components
+ _set_flow_ids(published_copy, start_id=100)
+ gf_mock.return_value = published_copy
+
+ flow.publish()
+ assert flow.flow_id == 99
+
+
+@mock.patch.object(requests.Session, "post")
+def test_nonexisting_flow_exists(mock_post, test_files_directory, test_api_key):
+ """Offline replacement of the former test-server test_nonexisting_flow_exists."""
+ openml.config.start_using_configuration_for_example()
+ content_file = test_files_directory / "mock_responses" / "flows" / "flow_exists_no.xml"
+ mock_post.return_value = _mock_get_response(content_file)
+
+ flow_id = openml.flows.flow_exists("TESTnonexistent_flow_name", "TESTnonexistent_version")
+ assert not flow_id
+
+
+@mock.patch.object(requests.Session, "post")
+def test_existing_flow_exists(mock_post, test_files_directory, test_api_key):
+ """Offline replacement of the former test-server test_existing_flow_exists."""
+ openml.config.start_using_configuration_for_example()
+ content_file = test_files_directory / "mock_responses" / "flows" / "flow_exists_yes.xml"
+ mock_post.return_value = _mock_get_response(content_file)
+
+ flow_id = openml.flows.flow_exists("some.existing.flow", "1.0")
+ assert flow_id == 42
+
+
+@mock.patch.object(requests.Session, "get")
+def test_download_non_scikit_learn_flows(mock_get, test_files_directory):
+ """Offline replacement of the former production-server test_download_non_scikit_learn_flows."""
+ openml.config.start_using_configuration_for_example()
+ content_file = test_files_directory / "mock_responses" / "flows" / "flow_6742.xml"
+ mock_get.return_value = _mock_get_response(content_file)
+
+ flow = openml.flows.get_flow(6742)
+ assert isinstance(flow, openml.OpenMLFlow)
+ assert flow.flow_id == 6742
+ assert len(flow.parameters) == 19
+ assert len(flow.components) == 1
+ assert flow.model is None
+
+ subflow_1 = next(iter(flow.components.values()))
+ assert isinstance(subflow_1, openml.OpenMLFlow)
+ assert subflow_1.flow_id == 6743
+ assert len(subflow_1.parameters) == 8
+ assert subflow_1.parameters["U"] == "0"
+ assert len(subflow_1.components) == 1
+ assert subflow_1.model is None
+
+ subflow_2 = next(iter(subflow_1.components.values()))
+ assert isinstance(subflow_2, openml.OpenMLFlow)
+ assert subflow_2.flow_id == 5888
+ assert len(subflow_2.parameters) == 4
+ assert subflow_2.parameters["batch-size"] is None
+ assert len(subflow_2.components) == 0
+ assert subflow_2.model is None
+
+
+# ---------------------------------------------------------------------------
+# Helpers for mocked tests
+# ---------------------------------------------------------------------------
+
+def _set_flow_ids(flow, start_id=100):
+ """Recursively set flow_id, upload_date, version, uploader on sub-components."""
+ counter = start_id
+ for comp in flow.components.values():
+ comp.flow_id = counter
+ comp.upload_date = "2025-01-01T00:00:00"
+ comp.version = "1"
+ comp.uploader = "1"
+ counter += 1
+ counter = _set_flow_ids(comp, counter)
+ return counter