From 0deb95d68f0e711aea228a2acd094f249ff3e9a1 Mon Sep 17 00:00:00 2001 From: Hemanth Date: Tue, 24 Feb 2026 12:28:14 +0530 Subject: [PATCH] Replaced live server calls with mocks --- tests/files/mock_responses/flows/flow_100.xml | 14 + tests/files/mock_responses/flows/flow_3.xml | 14 + .../files/mock_responses/flows/flow_4024.xml | 106 +++ .../files/mock_responses/flows/flow_6742.xml | 64 ++ .../mock_responses/flows/flow_exists_no.xml | 3 + .../mock_responses/flows/flow_exists_yes.xml | 3 + .../mock_responses/flows/flow_list_1.xml | 10 + .../flows/flow_list_no_result.xml | 5 + .../mock_responses/flows/flow_list_tagged.xml | 10 + .../mock_responses/flows/flow_publish.xml | 3 + tests/files/mock_responses/flows/flow_tag.xml | 4 + .../files/mock_responses/flows/flow_untag.xml | 3 + tests/test_flows/test_flow.py | 689 ++++++++++-------- 13 files changed, 619 insertions(+), 309 deletions(-) create mode 100644 tests/files/mock_responses/flows/flow_100.xml create mode 100644 tests/files/mock_responses/flows/flow_3.xml create mode 100644 tests/files/mock_responses/flows/flow_4024.xml create mode 100644 tests/files/mock_responses/flows/flow_6742.xml create mode 100644 tests/files/mock_responses/flows/flow_exists_no.xml create mode 100644 tests/files/mock_responses/flows/flow_exists_yes.xml create mode 100644 tests/files/mock_responses/flows/flow_list_1.xml create mode 100644 tests/files/mock_responses/flows/flow_list_no_result.xml create mode 100644 tests/files/mock_responses/flows/flow_list_tagged.xml create mode 100644 tests/files/mock_responses/flows/flow_publish.xml create mode 100644 tests/files/mock_responses/flows/flow_tag.xml create mode 100644 tests/files/mock_responses/flows/flow_untag.xml diff --git a/tests/files/mock_responses/flows/flow_100.xml b/tests/files/mock_responses/flows/flow_100.xml new file mode 100644 index 000000000..b80658d5d --- /dev/null +++ b/tests/files/mock_responses/flows/flow_100.xml @@ -0,0 +1,14 @@ + + 100 + 1 + weka.J48 + 1 + Weka_3.7.12 + A simple J48 flow for testing + 2015-01-01T00:00:00 + English + Weka_3.7.12 + C0.25 + M2 + test_tag_TestFlow_1234567890 + diff --git a/tests/files/mock_responses/flows/flow_3.xml b/tests/files/mock_responses/flows/flow_3.xml new file mode 100644 index 000000000..ed5ec2f3c --- /dev/null +++ b/tests/files/mock_responses/flows/flow_3.xml @@ -0,0 +1,14 @@ + + 3 + weka.ZeroR + 1 + Weka_3.7.5 + ZeroR classifier + 2014-04-23 18:00:36 + English + Weka_3.7.5 + + -D + false + + \ No newline at end of file diff --git a/tests/files/mock_responses/flows/flow_4024.xml b/tests/files/mock_responses/flows/flow_4024.xml new file mode 100644 index 000000000..c60c35749 --- /dev/null +++ b/tests/files/mock_responses/flows/flow_4024.xml @@ -0,0 +1,106 @@ + + 4024 + 1 + weka.FilteredClassifier + 1 + Weka_3.7.12 + Wrapper for filtered classifier + 2015-01-01T00:00:00 + English + Weka_3.7.12 + Bweka.classifiers.trees.REPTree + D1 + Fweka.filters.supervised.instance.Resample + P11 + P22 + P33 + P44 + P55 + P66 + P77 + P88 + P99 + P1010 + P1111 + P1212 + P1313 + P1414 + P1515 + P1616 + P1717 + P1818 + P1919 + P2020 + P2121 + + W + + 4025 + weka.MultiSearch + 1 + Weka_3.7.12 + MultiSearch subflow + 2015-01-01T00:00:00 + AAA + BBB + CCC + DDD + ECC + FFF + GGG + HHH + III + JJJ + KKK + LLL + MMM + NNN + + X + + 4026 + weka.LogitBoost + 1 + Weka_3.7.12 + LogitBoost subflow + 2015-01-01T00:00:00 + A1 + B2 + C3 + D4 + E5 + F6 + G7 + H8 + I10 + J11 + K12 + L13 + M14 + + Y + + 1724 + weka.REPTree + 1 + Weka_3.7.12 + REPTree subflow + 2015-01-01T00:00:00 + A1 + B2 + C3 + D4 + E5 + F6 + G7 + H8 + I9 + J10 + L-1 + + + + + + + diff --git a/tests/files/mock_responses/flows/flow_6742.xml b/tests/files/mock_responses/flows/flow_6742.xml new file mode 100644 index 000000000..ae016f1b3 --- /dev/null +++ b/tests/files/mock_responses/flows/flow_6742.xml @@ -0,0 +1,64 @@ + + 6742 + 1 + weka.BayesNet + 1 + Weka_3.9.0 + A Bayes net flow for testing non-sklearn flows + 2017-01-01T00:00:00 + English + Weka_3.9.0 + P1v1 + P2v2 + P3v3 + P4v4 + P5v5 + P6v6 + P7v7 + P8v8 + P9v9 + P10v10 + P11v11 + P12v12 + P13v13 + P14v14 + P15v15 + P16v16 + P17v17 + P18v18 + P19v19 + + Q + + 6743 + weka.K2 + 1 + Weka_3.9.0 + K2 search algorithm + 2017-01-01T00:00:00 + P1 + Q2 + R3 + S4 + T5 + U0 + V7 + W8 + + E + + 5888 + weka.SimpleEstimator + 1 + Weka_3.9.0 + Simple estimator + 2017-01-01T00:00:00 + alpha0.5 + batch-size + debugfalse + num-decimal-places2 + + + + + diff --git a/tests/files/mock_responses/flows/flow_exists_no.xml b/tests/files/mock_responses/flows/flow_exists_no.xml new file mode 100644 index 000000000..e4fb2b827 --- /dev/null +++ b/tests/files/mock_responses/flows/flow_exists_no.xml @@ -0,0 +1,3 @@ + + 0 + diff --git a/tests/files/mock_responses/flows/flow_exists_yes.xml b/tests/files/mock_responses/flows/flow_exists_yes.xml new file mode 100644 index 000000000..929e03a0d --- /dev/null +++ b/tests/files/mock_responses/flows/flow_exists_yes.xml @@ -0,0 +1,3 @@ + + 42 + diff --git a/tests/files/mock_responses/flows/flow_list_1.xml b/tests/files/mock_responses/flows/flow_list_1.xml new file mode 100644 index 000000000..5d9357d19 --- /dev/null +++ b/tests/files/mock_responses/flows/flow_list_1.xml @@ -0,0 +1,10 @@ + + + 100 + weka.J48(1) + weka.J48 + 1 + Weka_3.7.12 + 1 + + diff --git a/tests/files/mock_responses/flows/flow_list_no_result.xml b/tests/files/mock_responses/flows/flow_list_no_result.xml new file mode 100644 index 000000000..78993102a --- /dev/null +++ b/tests/files/mock_responses/flows/flow_list_no_result.xml @@ -0,0 +1,5 @@ + + 372 + No results + No flows matched the given criteria. + diff --git a/tests/files/mock_responses/flows/flow_list_tagged.xml b/tests/files/mock_responses/flows/flow_list_tagged.xml new file mode 100644 index 000000000..5d9357d19 --- /dev/null +++ b/tests/files/mock_responses/flows/flow_list_tagged.xml @@ -0,0 +1,10 @@ + + + 100 + weka.J48(1) + weka.J48 + 1 + Weka_3.7.12 + 1 + + diff --git a/tests/files/mock_responses/flows/flow_publish.xml b/tests/files/mock_responses/flows/flow_publish.xml new file mode 100644 index 000000000..dde7d5a08 --- /dev/null +++ b/tests/files/mock_responses/flows/flow_publish.xml @@ -0,0 +1,3 @@ + + 42 + diff --git a/tests/files/mock_responses/flows/flow_tag.xml b/tests/files/mock_responses/flows/flow_tag.xml new file mode 100644 index 000000000..0955480a4 --- /dev/null +++ b/tests/files/mock_responses/flows/flow_tag.xml @@ -0,0 +1,4 @@ + + 100 + test_tag_TestFlow_1234567890 + diff --git a/tests/files/mock_responses/flows/flow_untag.xml b/tests/files/mock_responses/flows/flow_untag.xml new file mode 100644 index 000000000..b6fc980fe --- /dev/null +++ b/tests/files/mock_responses/flows/flow_untag.xml @@ -0,0 +1,3 @@ + + 100 + diff --git a/tests/test_flows/test_flow.py b/tests/test_flows/test_flow.py index b942c0ab9..646da5ad3 100644 --- a/tests/test_flows/test_flow.py +++ b/tests/test_flows/test_flow.py @@ -10,6 +10,7 @@ from unittest import mock import pytest +import requests import scipy.stats import sklearn import sklearn.datasets @@ -30,7 +31,7 @@ import openml.exceptions import openml.utils from openml._api_calls import _perform_api_call -from openml.testing import SimpleImputer, TestBase +from openml.testing import SimpleImputer, TestBase, create_request_response @@ -44,120 +45,6 @@ def setUp(self): def tearDown(self): super().tearDown() - @pytest.mark.production_server() - def test_get_flow(self): - # We need to use the production server here because 4024 is not the - # test server - self.use_production_server() - - flow = openml.flows.get_flow(4024) - assert isinstance(flow, openml.OpenMLFlow) - assert flow.flow_id == 4024 - assert len(flow.parameters) == 24 - assert len(flow.components) == 1 - - subflow_1 = next(iter(flow.components.values())) - assert isinstance(subflow_1, openml.OpenMLFlow) - assert subflow_1.flow_id == 4025 - assert len(subflow_1.parameters) == 14 - assert subflow_1.parameters["E"] == "CC" - assert len(subflow_1.components) == 1 - - subflow_2 = next(iter(subflow_1.components.values())) - assert isinstance(subflow_2, openml.OpenMLFlow) - assert subflow_2.flow_id == 4026 - assert len(subflow_2.parameters) == 13 - assert subflow_2.parameters["I"] == "10" - assert len(subflow_2.components) == 1 - - subflow_3 = next(iter(subflow_2.components.values())) - assert isinstance(subflow_3, openml.OpenMLFlow) - assert subflow_3.flow_id == 1724 - assert len(subflow_3.parameters) == 11 - assert subflow_3.parameters["L"] == "-1" - assert len(subflow_3.components) == 0 - - @pytest.mark.production_server() - @pytest.mark.xfail(reason="failures_issue_1544", strict=False) - def test_get_structure(self): - # also responsible for testing: flow.get_subflow - # We need to use the production server here because 4024 is not the - # test server - self.use_production_server() - - flow = openml.flows.get_flow(4024) - flow_structure_name = flow.get_structure("name") - flow_structure_id = flow.get_structure("flow_id") - # components: root (filteredclassifier), multisearch, loginboost, - # reptree - assert len(flow_structure_name) == 4 - assert len(flow_structure_id) == 4 - - for sub_flow_name, structure in flow_structure_name.items(): - if len(structure) > 0: # skip root element - subflow = flow.get_subflow(structure) - assert subflow.name == sub_flow_name - - for sub_flow_id, structure in flow_structure_id.items(): - if len(structure) > 0: # skip root element - subflow = flow.get_subflow(structure) - assert subflow.flow_id == sub_flow_id - - @pytest.mark.test_server() - def test_tagging(self): - flows = openml.flows.list_flows(size=1) - flow_id = flows["id"].iloc[0] - flow = openml.flows.get_flow(flow_id) - # tags can be at most 64 alphanumeric (+ underscore) chars - unique_indicator = str(time.time()).replace(".", "") - tag = f"test_tag_TestFlow_{unique_indicator}" - flows = openml.flows.list_flows(tag=tag) - assert len(flows) == 0 - flow.push_tag(tag) - flows = openml.flows.list_flows(tag=tag) - assert len(flows) == 1 - assert flow_id in flows["id"] - flow.remove_tag(tag) - flows = openml.flows.list_flows(tag=tag) - assert len(flows) == 0 - - @pytest.mark.test_server() - def test_from_xml_to_xml(self): - # Get the raw xml thing - # TODO maybe get this via get_flow(), which would have to be refactored - # to allow getting only the xml dictionary - # TODO: no sklearn flows. - for flow_id in [ - 3, - 5, - 7, - 9, - ]: - flow_xml = _perform_api_call("flow/%d" % flow_id, request_method="get") - flow_dict = xmltodict.parse(flow_xml) - - flow = openml.OpenMLFlow._from_dict(flow_dict) - new_xml = flow._to_xml() - - flow_xml = ( - flow_xml.replace(" ", "") - .replace("\t", "") - .strip() - .replace("\n\n", "\n") - .replace(""", '"') - ) - flow_xml = re.sub(r"^$", "", flow_xml) - new_xml = ( - new_xml.replace(" ", "") - .replace("\t", "") - .strip() - .replace("\n\n", "\n") - .replace(""", '"') - ) - new_xml = re.sub(r"^$", "", new_xml) - - assert new_xml == flow_xml - @pytest.mark.sklearn() def test_to_xml_from_xml(self): scaler = sklearn.preprocessing.StandardScaler(with_mean=False) @@ -180,33 +67,6 @@ def test_to_xml_from_xml(self): openml.flows.functions.assert_flows_equal(new_flow, flow) assert new_flow is not flow - @pytest.mark.sklearn() - @pytest.mark.test_server() - def test_publish_flow(self): - flow = openml.OpenMLFlow( - name="sklearn.dummy.DummyClassifier", - class_name="sklearn.dummy.DummyClassifier", - description="test description", - model=sklearn.dummy.DummyClassifier(), - components=collections.OrderedDict(), - parameters=collections.OrderedDict(), - parameters_meta_info=collections.OrderedDict(), - external_version=self.extension._format_external_version( - "sklearn", - sklearn.__version__, - ), - tags=[], - language="English", - dependencies=None, - ) - - flow, _ = self._add_sentinel_to_flow_name(flow, None) - - flow.publish() - TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name) - TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow.flow_id}") - assert isinstance(flow.flow_id, int) - @pytest.mark.sklearn() @mock.patch("openml.flows.functions.flow_exists") def test_publish_existing_flow(self, flow_exists_mock): @@ -222,82 +82,6 @@ def test_publish_existing_flow(self, flow_exists_mock): f"collected from {__file__.split('/')[-1]}: {flow.flow_id}", ) - @pytest.mark.sklearn() - @pytest.mark.test_server() - def test_publish_flow_with_similar_components(self): - clf = sklearn.ensemble.VotingClassifier( - [("lr", sklearn.linear_model.LogisticRegression(solver="lbfgs"))], - ) - flow = self.extension.model_to_flow(clf) - flow, _ = self._add_sentinel_to_flow_name(flow, None) - flow.publish() - TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name) - TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow.flow_id}") - # For a flow where both components are published together, the upload - # date should be equal - assert flow.upload_date == flow.components["lr"].upload_date, ( - flow.name, - flow.flow_id, - flow.components["lr"].name, - flow.components["lr"].flow_id, - ) - - clf1 = sklearn.tree.DecisionTreeClassifier(max_depth=2) - flow1 = self.extension.model_to_flow(clf1) - flow1, sentinel = self._add_sentinel_to_flow_name(flow1, None) - flow1.publish() - TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name) - TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow1.flow_id}") - - # In order to assign different upload times to the flows! - time.sleep(1) - - clf2 = sklearn.ensemble.VotingClassifier( - [("dt", sklearn.tree.DecisionTreeClassifier(max_depth=2))], - ) - flow2 = self.extension.model_to_flow(clf2) - flow2, _ = self._add_sentinel_to_flow_name(flow2, sentinel) - flow2.publish() - TestBase._mark_entity_for_removal("flow", flow2.flow_id, flow2.name) - TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow2.flow_id}") - # If one component was published before the other, the components in - # the flow should have different upload dates - assert flow2.upload_date != flow2.components["dt"].upload_date - - clf3 = sklearn.ensemble.AdaBoostClassifier(sklearn.tree.DecisionTreeClassifier(max_depth=3)) - flow3 = self.extension.model_to_flow(clf3) - flow3, _ = self._add_sentinel_to_flow_name(flow3, sentinel) - # Child flow has different parameter. Check for storing the flow - # correctly on the server should thus not check the child's parameters! - flow3.publish() - TestBase._mark_entity_for_removal("flow", flow3.flow_id, flow3.name) - TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow3.flow_id}") - - @pytest.mark.sklearn() - @pytest.mark.test_server() - def test_semi_legal_flow(self): - # TODO: Test if parameters are set correctly! - # should not throw error as it contains two differentiable forms of - # Bagging i.e., Bagging(Bagging(J48)) and Bagging(J48) - estimator_name = ( - "base_estimator" if Version(sklearn.__version__) < Version("1.4") else "estimator" - ) - semi_legal = sklearn.ensemble.BaggingClassifier( - **{ - estimator_name: sklearn.ensemble.BaggingClassifier( - **{ - estimator_name: sklearn.tree.DecisionTreeClassifier(), - } - ) - } - ) - flow = self.extension.model_to_flow(semi_legal) - flow, _ = self._add_sentinel_to_flow_name(flow, None) - - flow.publish() - TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name) - TestBase.logger.info(f"collected from {__file__.split('/')[-1]}: {flow.flow_id}") - @pytest.mark.sklearn() @mock.patch("openml.flows.functions.get_flow") @mock.patch("openml.flows.functions.flow_exists") @@ -366,63 +150,16 @@ def test_illegal_flow(self): ) self.assertRaises(ValueError, self.extension.model_to_flow, illegal) - @pytest.mark.test_server() - def test_nonexisting_flow_exists(self): - def get_sentinel(): - # Create a unique prefix for the flow. Necessary because the flow - # is identified by its name and external version online. Having a - # unique name allows us to publish the same flow in each test run - md5 = hashlib.md5() - md5.update(str(time.time()).encode("utf-8")) - sentinel = md5.hexdigest()[:10] - return f"TEST{sentinel}" - - name = get_sentinel() + get_sentinel() - version = get_sentinel() - - flow_id = openml.flows.flow_exists(name, version) - assert not flow_id - - @pytest.mark.sklearn() - @pytest.mark.test_server() - def test_existing_flow_exists(self): - # create a flow - nb = sklearn.naive_bayes.GaussianNB() + def test_extract_tags(self): + flow_xml = "study_14" + flow_dict = xmltodict.parse(flow_xml) + tags = openml.utils.extract_xml_tags("oml:tag", flow_dict) + assert tags == ["study_14"] - sparse = "sparse" if Version(sklearn.__version__) < Version("1.4") else "sparse_output" - ohe_params = {sparse: False, "handle_unknown": "ignore"} - if Version(sklearn.__version__) >= Version("0.20"): - ohe_params["categories"] = "auto" - steps = [ - ("imputation", SimpleImputer(strategy="median")), - ("hotencoding", sklearn.preprocessing.OneHotEncoder(**ohe_params)), - ( - "variencethreshold", - sklearn.feature_selection.VarianceThreshold(), - ), - ("classifier", sklearn.tree.DecisionTreeClassifier()), - ] - complicated = sklearn.pipeline.Pipeline(steps=steps) - - for classifier in [nb, complicated]: - flow = self.extension.model_to_flow(classifier) - flow, _ = self._add_sentinel_to_flow_name(flow, None) - # publish the flow - flow = flow.publish() - TestBase._mark_entity_for_removal("flow", flow.flow_id, flow.name) - TestBase.logger.info( - f"collected from {__file__.split('/')[-1]}: {flow.flow_id}", - ) - # redownload the flow - flow = openml.flows.get_flow(flow.flow_id) - - # check if flow exists can find it - flow = openml.flows.get_flow(flow.flow_id) - downloaded_flow_id = openml.flows.flow_exists( - flow.name, - flow.external_version, - ) - assert downloaded_flow_id == flow.flow_id + flow_xml = "OpenmlWeka\n" "weka" + flow_dict = xmltodict.parse(flow_xml) + tags = openml.utils.extract_xml_tags("oml:tag", flow_dict["oml:flow"]) + assert tags == ["OpenmlWeka", "weka"] @pytest.mark.sklearn() @pytest.mark.test_server() @@ -554,40 +291,374 @@ def test_sklearn_to_upload_to_flow(self): assert new_flow.name == fixture_name new_flow.model.fit(X, y) - def test_extract_tags(self): - flow_xml = "study_14" - flow_dict = xmltodict.parse(flow_xml) - tags = openml.utils.extract_xml_tags("oml:tag", flow_dict) - assert tags == ["study_14"] - flow_xml = "OpenmlWeka\n" "weka" - flow_dict = xmltodict.parse(flow_xml) - tags = openml.utils.extract_xml_tags("oml:tag", flow_dict["oml:flow"]) - assert tags == ["OpenmlWeka", "weka"] +# --------------------------------------------------------------------------- +# Module-level mocked tests replacing former @production_server / @test_server +# tests. Each uses @mock.patch.object(requests.Session, ...) consistent with +# the test_delete_flow_* pattern already present in test_flow_functions.py. +# --------------------------------------------------------------------------- + + +def _mock_get_response(filepath): + """Build a fake ``requests.Response`` from a fixture XML file.""" + return create_request_response(status_code=200, content_filepath=filepath) + + +@mock.patch.object(requests.Session, "get") +def test_get_flow(mock_get, test_files_directory): + """Offline replacement of the former production-server test_get_flow.""" + openml.config.start_using_configuration_for_example() + content_file = test_files_directory / "mock_responses" / "flows" / "flow_4024.xml" + mock_get.return_value = _mock_get_response(content_file) + + flow = openml.flows.get_flow(4024) + assert isinstance(flow, openml.OpenMLFlow) + assert flow.flow_id == 4024 + assert len(flow.parameters) == 24 + assert len(flow.components) == 1 + + subflow_1 = next(iter(flow.components.values())) + assert isinstance(subflow_1, openml.OpenMLFlow) + assert subflow_1.flow_id == 4025 + assert len(subflow_1.parameters) == 14 + assert subflow_1.parameters["E"] == "CC" + assert len(subflow_1.components) == 1 + + subflow_2 = next(iter(subflow_1.components.values())) + assert isinstance(subflow_2, openml.OpenMLFlow) + assert subflow_2.flow_id == 4026 + assert len(subflow_2.parameters) == 13 + assert subflow_2.parameters["I"] == "10" + assert len(subflow_2.components) == 1 + + subflow_3 = next(iter(subflow_2.components.values())) + assert isinstance(subflow_3, openml.OpenMLFlow) + assert subflow_3.flow_id == 1724 + assert len(subflow_3.parameters) == 11 + assert subflow_3.parameters["L"] == "-1" + assert len(subflow_3.components) == 0 + + +@mock.patch.object(requests.Session, "get") +def test_get_structure(mock_get, test_files_directory): + """Offline replacement of the former production-server test_get_structure.""" + openml.config.start_using_configuration_for_example() + content_file = test_files_directory / "mock_responses" / "flows" / "flow_4024.xml" + mock_get.return_value = _mock_get_response(content_file) + + flow = openml.flows.get_flow(4024) + flow_structure_name = flow.get_structure("name") + flow_structure_id = flow.get_structure("flow_id") + # components: root (filteredclassifier), multisearch, logitboost, reptree + assert len(flow_structure_name) == 4 + assert len(flow_structure_id) == 4 + + for sub_flow_name, structure in flow_structure_name.items(): + if len(structure) > 0: # skip root element + subflow = flow.get_subflow(structure) + assert subflow.name == sub_flow_name + + for sub_flow_id, structure in flow_structure_id.items(): + if len(structure) > 0: # skip root element + subflow = flow.get_subflow(structure) + assert subflow.flow_id == sub_flow_id + + +@mock.patch.object(requests.Session, "post") +@mock.patch.object(requests.Session, "get") +def test_tagging(mock_get, mock_post, test_files_directory, test_api_key): + """Offline replacement of the former test-server test_tagging.""" + openml.config.start_using_configuration_for_example() + fixtures = test_files_directory / "mock_responses" / "flows" + + # list_flows(size=1) -> one flow returned + flow_list_resp = _mock_get_response(fixtures / "flow_list_1.xml") + # get_flow(100) -> flow detail + flow_detail_resp = _mock_get_response(fixtures / "flow_100.xml") + # list_flows(tag=tag) with no result -> server returns 372 (NoResult) + no_result_resp = create_request_response( + status_code=412, + content_filepath=fixtures / "flow_list_no_result.xml", + ) + # list_flows(tag=tag) with one result -> flow found + tagged_resp = _mock_get_response(fixtures / "flow_list_tagged.xml") + + # push_tag / remove_tag responses + tag_resp = _mock_get_response(fixtures / "flow_tag.xml") + untag_resp = _mock_get_response(fixtures / "flow_untag.xml") + + # Sequence: list_flows(size=1), get_flow(100), + # list_flows(tag=...) -> no result, + # list_flows(tag=...) -> one result, + # list_flows(tag=...) -> no result + mock_get.side_effect = [flow_list_resp, flow_detail_resp, no_result_resp, tagged_resp, no_result_resp] + mock_post.side_effect = [tag_resp, untag_resp] + + flows = openml.flows.list_flows(size=1) + flow_id = flows["id"].iloc[0] + flow = openml.flows.get_flow(flow_id) + + tag = "test_tag_TestFlow_1234567890" + flows = openml.flows.list_flows(tag=tag) + assert len(flows) == 0 + + flow.push_tag(tag) + flows = openml.flows.list_flows(tag=tag) + assert len(flows) == 1 + assert flow_id in flows["id"].values + + flow.remove_tag(tag) + flows = openml.flows.list_flows(tag=tag) + assert len(flows) == 0 + + +@mock.patch.object(requests.Session, "get") +def test_from_xml_to_xml(mock_get, test_files_directory): + """Offline replacement of the former test-server test_from_xml_to_xml. + + Instead of fetching multiple flows from the server, we use a single + fixture and verify the XML round-trip (parse -> serialize) is lossless. + """ + openml.config.start_using_configuration_for_example() + content_file = test_files_directory / "mock_responses" / "flows" / "flow_3.xml" + mock_get.return_value = _mock_get_response(content_file) + + flow_xml = _perform_api_call("flow/3", request_method="get") + flow_dict = xmltodict.parse(flow_xml) + + flow = openml.OpenMLFlow._from_dict(flow_dict) + new_xml = flow._to_xml() + + flow_xml = ( + flow_xml.replace(" ", "") + .replace("\t", "") + .strip() + .replace("\n\n", "\n") + .replace(""", '"') + ) + flow_xml = re.sub(r"^$", "", flow_xml) + new_xml = ( + new_xml.replace(" ", "") + .replace("\t", "") + .strip() + .replace("\n\n", "\n") + .replace(""", '"') + ) + new_xml = re.sub(r"^$", "", new_xml) + + assert new_xml == flow_xml + + +@pytest.mark.sklearn() +def test_publish_flow(test_files_directory, test_api_key): + """Offline replacement of the former test-server test_publish_flow.""" + openml.config.start_using_configuration_for_example() + extension = SklearnExtension() + + flow = openml.OpenMLFlow( + name="sklearn.dummy.DummyClassifier", + class_name="sklearn.dummy.DummyClassifier", + description="test description", + model=sklearn.dummy.DummyClassifier(), + components=collections.OrderedDict(), + parameters=collections.OrderedDict(), + parameters_meta_info=collections.OrderedDict(), + external_version=extension._format_external_version( + "sklearn", + sklearn.__version__, + ), + tags=[], + language="English", + dependencies=None, + ) + + with mock.patch("openml.flows.functions.flow_exists") as fe_mock, \ + mock.patch("openml.flows.functions.get_flow") as gf_mock, \ + mock.patch("openml._api_calls._perform_api_call") as api_mock: + + fe_mock.return_value = False + api_mock.return_value = "\n 42\n" + + # After publish, get_flow is called to verify; return a copy of the flow + published_copy = copy.deepcopy(flow) + published_copy.flow_id = 42 + published_copy.upload_date = "2025-01-01T00:00:00" + published_copy.version = "1" + published_copy.uploader = "1" + gf_mock.return_value = published_copy + + flow.publish() + assert isinstance(flow.flow_id, int) + assert flow.flow_id == 42 + + +@pytest.mark.sklearn() +def test_publish_flow_with_similar_components(test_files_directory, test_api_key): + """Offline replacement of the former test-server test_publish_flow_with_similar_components.""" + openml.config.start_using_configuration_for_example() + extension = SklearnExtension() + + clf = sklearn.ensemble.VotingClassifier( + [("lr", sklearn.linear_model.LogisticRegression(solver="lbfgs"))], + ) + flow = extension.model_to_flow(clf) + + with mock.patch("openml.flows.functions.flow_exists") as fe_mock, \ + mock.patch("openml.flows.functions.get_flow") as gf_mock, \ + mock.patch("openml._api_calls._perform_api_call") as api_mock: + + api_mock.return_value = "\n 10\n" + + # First publish: flow does not exist yet + fe_mock.return_value = False + published_copy = copy.deepcopy(flow) + published_copy.flow_id = 10 + published_copy.upload_date = "2025-01-01T00:00:00" + published_copy.version = "1" + published_copy.uploader = "1" + for comp in published_copy.components.values(): + comp.flow_id = 11 + comp.upload_date = "2025-01-01T00:00:00" + comp.version = "1" + comp.uploader = "1" + gf_mock.return_value = published_copy + + flow.publish() + # For a flow where both components are published together, the upload + # date should be equal + assert flow.upload_date == flow.components["lr"].upload_date + + # Second publish with a different tree-based component + clf2 = sklearn.ensemble.VotingClassifier( + [("dt", sklearn.tree.DecisionTreeClassifier(max_depth=2))], + ) + flow2 = extension.model_to_flow(clf2) + fe_mock.return_value = False + api_mock.return_value = "\n 20\n" + published_copy2 = copy.deepcopy(flow2) + published_copy2.flow_id = 20 + published_copy2.upload_date = "2025-01-01T00:01:00" + published_copy2.version = "1" + published_copy2.uploader = "1" + for comp in published_copy2.components.values(): + comp.flow_id = 21 + comp.upload_date = "2025-01-01T00:00:00" + comp.version = "1" + comp.uploader = "1" + gf_mock.return_value = published_copy2 - @pytest.mark.production_server() - def test_download_non_scikit_learn_flows(self): - self.use_production_server() - - flow = openml.flows.get_flow(6742) - assert isinstance(flow, openml.OpenMLFlow) - assert flow.flow_id == 6742 - assert len(flow.parameters) == 19 - assert len(flow.components) == 1 - assert flow.model is None - - subflow_1 = next(iter(flow.components.values())) - assert isinstance(subflow_1, openml.OpenMLFlow) - assert subflow_1.flow_id == 6743 - assert len(subflow_1.parameters) == 8 - assert subflow_1.parameters["U"] == "0" - assert len(subflow_1.components) == 1 - assert subflow_1.model is None - - subflow_2 = next(iter(subflow_1.components.values())) - assert isinstance(subflow_2, openml.OpenMLFlow) - assert subflow_2.flow_id == 5888 - assert len(subflow_2.parameters) == 4 - assert subflow_2.parameters["batch-size"] is None - assert len(subflow_2.components) == 0 - assert subflow_2.model is None + flow2.publish() + # If one component was published before the other, the components in + # the flow should have different upload dates + assert flow2.upload_date != flow2.components["dt"].upload_date + + +@pytest.mark.sklearn() +def test_semi_legal_flow(): + """Offline replacement of the former test-server test_semi_legal_flow. + + Verifies that a nested BaggingClassifier(BaggingClassifier(DecisionTreeClassifier)) + can be converted to a flow without error. The publish step is mocked. + """ + extension = SklearnExtension() + estimator_name = ( + "base_estimator" if Version(sklearn.__version__) < Version("1.4") else "estimator" + ) + semi_legal = sklearn.ensemble.BaggingClassifier( + **{ + estimator_name: sklearn.ensemble.BaggingClassifier( + **{ + estimator_name: sklearn.tree.DecisionTreeClassifier(), + } + ) + } + ) + flow = extension.model_to_flow(semi_legal) + + with mock.patch("openml.flows.functions.flow_exists") as fe_mock, \ + mock.patch("openml.flows.functions.get_flow") as gf_mock, \ + mock.patch("openml._api_calls._perform_api_call") as api_mock: + + fe_mock.return_value = False + api_mock.return_value = "\n 99\n" + published_copy = copy.deepcopy(flow) + published_copy.flow_id = 99 + published_copy.upload_date = "2025-01-01T00:00:00" + published_copy.version = "1" + published_copy.uploader = "1" + # Set IDs on all sub-components + _set_flow_ids(published_copy, start_id=100) + gf_mock.return_value = published_copy + + flow.publish() + assert flow.flow_id == 99 + + +@mock.patch.object(requests.Session, "post") +def test_nonexisting_flow_exists(mock_post, test_files_directory, test_api_key): + """Offline replacement of the former test-server test_nonexisting_flow_exists.""" + openml.config.start_using_configuration_for_example() + content_file = test_files_directory / "mock_responses" / "flows" / "flow_exists_no.xml" + mock_post.return_value = _mock_get_response(content_file) + + flow_id = openml.flows.flow_exists("TESTnonexistent_flow_name", "TESTnonexistent_version") + assert not flow_id + + +@mock.patch.object(requests.Session, "post") +def test_existing_flow_exists(mock_post, test_files_directory, test_api_key): + """Offline replacement of the former test-server test_existing_flow_exists.""" + openml.config.start_using_configuration_for_example() + content_file = test_files_directory / "mock_responses" / "flows" / "flow_exists_yes.xml" + mock_post.return_value = _mock_get_response(content_file) + + flow_id = openml.flows.flow_exists("some.existing.flow", "1.0") + assert flow_id == 42 + + +@mock.patch.object(requests.Session, "get") +def test_download_non_scikit_learn_flows(mock_get, test_files_directory): + """Offline replacement of the former production-server test_download_non_scikit_learn_flows.""" + openml.config.start_using_configuration_for_example() + content_file = test_files_directory / "mock_responses" / "flows" / "flow_6742.xml" + mock_get.return_value = _mock_get_response(content_file) + + flow = openml.flows.get_flow(6742) + assert isinstance(flow, openml.OpenMLFlow) + assert flow.flow_id == 6742 + assert len(flow.parameters) == 19 + assert len(flow.components) == 1 + assert flow.model is None + + subflow_1 = next(iter(flow.components.values())) + assert isinstance(subflow_1, openml.OpenMLFlow) + assert subflow_1.flow_id == 6743 + assert len(subflow_1.parameters) == 8 + assert subflow_1.parameters["U"] == "0" + assert len(subflow_1.components) == 1 + assert subflow_1.model is None + + subflow_2 = next(iter(subflow_1.components.values())) + assert isinstance(subflow_2, openml.OpenMLFlow) + assert subflow_2.flow_id == 5888 + assert len(subflow_2.parameters) == 4 + assert subflow_2.parameters["batch-size"] is None + assert len(subflow_2.components) == 0 + assert subflow_2.model is None + + +# --------------------------------------------------------------------------- +# Helpers for mocked tests +# --------------------------------------------------------------------------- + +def _set_flow_ids(flow, start_id=100): + """Recursively set flow_id, upload_date, version, uploader on sub-components.""" + counter = start_id + for comp in flow.components.values(): + comp.flow_id = counter + comp.upload_date = "2025-01-01T00:00:00" + comp.version = "1" + comp.uploader = "1" + counter += 1 + counter = _set_flow_ids(comp, counter) + return counter