diff --git a/src/databricks/sqlalchemy/_types.py b/src/databricks/sqlalchemy/_types.py index 6138096..c180404 100644 --- a/src/databricks/sqlalchemy/_types.py +++ b/src/databricks/sqlalchemy/_types.py @@ -88,6 +88,31 @@ def compile_numeric_databricks(type_, compiler, **kw): return compiler.visit_DECIMAL(type_, **kw) +@compiles(sqlalchemy.types.Float, "databricks") +def compile_float_databricks(type_, compiler, **kw): + """Promote ``Float(precision > 24)`` to ``DOUBLE`` (64-bit) on Databricks. + + Databricks ``FLOAT`` is 32-bit (~7 significant digits) and ``DOUBLE`` is + 64-bit (~15-17 significant digits). SQLAlchemy's default ``visit_float`` + drops the precision argument entirely for Databricks (no ``FLOAT(p)`` form + exists), so ``Float(precision=53)`` silently compiles to a 32-bit ``FLOAT`` + column. ``pandas.DataFrame.to_sql`` maps ``float64`` to ``Float(precision=53)``, + which means every ``to_sql`` round-trip of a ``float64`` column was being + permanently truncated at the ``CREATE TABLE`` step — there is no way to + recover the lost bits later, even after the INSERT path was fixed in + databricks-sql-python v4.2.6. + + The 24-bit threshold matches the SQL standard convention: ``FLOAT(p)`` with + ``p <= 24`` is single precision (IEEE 754 binary32's 24-bit significand), + ``p > 24`` is double precision. ``Float()`` with no precision keeps the + current ``FLOAT`` behavior — only callers who explicitly asked for >24-bit + precision get the promotion. + """ + if getattr(type_, "precision", None) is not None and type_.precision > 24: + return "DOUBLE" + return "FLOAT" + + @compiles(sqlalchemy.types.DateTime, "databricks") def compile_datetime_databricks(type_, compiler, **kw): """ diff --git a/tests/test_local/test_types.py b/tests/test_local/test_types.py index 1ddbbf1..7036c8d 100644 --- a/tests/test_local/test_types.py +++ b/tests/test_local/test_types.py @@ -171,6 +171,66 @@ def test_array_string_renders_as_array_of_string(self): ) +class TestFloatPrecisionPromotion(CompilationTestBase): + """Regression coverage for the pandas ``to_sql`` ``float64`` precision loss. + + Databricks ``FLOAT`` is 32-bit; ``DOUBLE`` is 64-bit. SQLAlchemy's default + ``visit_float`` drops the precision argument when rendering for Databricks, + so ``Float(precision=53)`` (what ``pandas.DataFrame.to_sql`` emits for + ``float64`` columns) was silently truncating to a 32-bit ``FLOAT`` column. + + The fix is to promote ``Float`` to ``DOUBLE`` when ``precision > 24``, + matching the SQL standard cutover from single to double precision. + """ + + def test_float_with_no_precision_remains_float(self): + self._assert_compiled_value_explicit(sqlalchemy.types.Float(), "FLOAT") + + def test_float_at_single_precision_boundary_remains_float(self): + """``precision=24`` is the upper bound of IEEE 754 single precision.""" + self._assert_compiled_value_explicit( + sqlalchemy.types.Float(precision=24), "FLOAT" + ) + + def test_float_above_single_precision_boundary_promotes_to_double(self): + self._assert_compiled_value_explicit( + sqlalchemy.types.Float(precision=25), "DOUBLE" + ) + + def test_float_precision_53_promotes_to_double(self): + """``pandas.DataFrame.to_sql`` maps ``float64`` to ``Float(precision=53)``.""" + self._assert_compiled_value_explicit( + sqlalchemy.types.Float(precision=53), "DOUBLE" + ) + + def test_uppercase_float_with_high_precision_stays_float(self): + """``sqlalchemy.types.FLOAT`` is the backend-specific 32-bit type — a + caller who reaches for the uppercase form is explicitly asking for + ``FLOAT``, so the precision argument should not promote it to DOUBLE. + """ + self._assert_compiled_value_explicit( + sqlalchemy.types.FLOAT(precision=53), "FLOAT" + ) + + def test_double_is_unaffected_by_float_compiler(self): + """The ``@compiles(Float)`` dispatch is keyed on ``__visit_name__`` — + ``Double`` has its own (``'double'``) so it must not be affected.""" + self._assert_compiled_value_explicit(sqlalchemy.types.Double(), "DOUBLE") + self._assert_compiled_value_explicit( + sqlalchemy.types.Double(precision=53), "DOUBLE" + ) + + def test_create_table_with_float64_emits_double_column(self): + """End-to-end: what pandas ``to_sql`` of a ``float64`` column produces.""" + from sqlalchemy.schema import CreateTable + + meta = MetaData() + t = Table("df", meta, Column("value", sqlalchemy.types.Float(precision=53))) + ddl = str(CreateTable(t).compile(dialect=self.dialect)) + assert "value DOUBLE" in ddl + assert "value FLOAT" not in ddl + + class TestDatabricksUUID: """Regression coverage for github.com/databricks/databricks-sqlalchemy/issues/50.