Skip to content

Data Assertions

are_unique_together(columns)

This test confirms that the combination of columns is unique. For example, the combination of month and product is unique, however neither column is unique in isolation.

Examples:

are_unique_together([HeartRateAgg.year, HeartRateAgg.month])
Source code in amora/tests/assertions.py
def are_unique_together(columns: Iterable[ColumnElement]) -> Select:
    """
    This test confirms that the combination of columns is unique.
    For example, the combination of month and product is unique,
    however neither column is unique in isolation.

    Example:

    ```python
    are_unique_together([HeartRateAgg.year, HeartRateAgg.month])
    ```

    """
    return select(*columns).group_by(*columns).having(func.count(type_=Integer) > 1)

equality(model_a, model_b, compare_columns=None)

This schema test asserts the equality of two models. Optionally specify a subset of columns to compare.

Source code in amora/tests/assertions.py
def equality(
    model_a: AmoraModel,
    model_b: AmoraModel,
    compare_columns: Optional[Iterable[ColumnElement]] = None,
) -> bool:
    """
    This schema test asserts the equality of two models. Optionally specify a subset of columns to compare.

    """

    raise NotImplementedError

    def comparable_columns(model: AmoraModel) -> Iterable[ColumnElement]:
        if not compare_columns:
            return model
        return [getattr(model, column_name) for column_name in compare_columns]

    a = select(comparable_columns(model_a)).cte("a")
    b = select(comparable_columns(model_b)).cte("b")

    # fixme: google.api_core.exceptions.BadRequest: 400 EXCEPT must be followed by ALL, DISTINCT, or "(" at [34:4]
    a_minus_b = select(a).except_(select(b))
    b_minus_a = select(b).except_(select(a))

    diff_union = union_all(a_minus_b, b_minus_a)

    return _test(statement=diff_union)

expression_is_true(expression, condition=None)

Asserts that a expression is TRUE for all records. This is useful when checking integrity across columns, for example, that a total is equal to the sum of its parts, or that at least one column is true.

Optionally assert expression only for rows where condition is met.

Parameters:

Name Type Description Default
condition object

A query filter

None

Examples:

expression_is_true(StepsAgg._sum > StepsAgg._avg, condition=StepsAgg.year == 2021)
Source code in amora/tests/assertions.py
def expression_is_true(expression, condition=None) -> Optional[bool]:
    """
    Asserts that a expression is TRUE for all records.
    This is useful when checking integrity across columns, for example,
    that a total is equal to the sum of its parts, or that at least one column is true.

    Optionally assert `expression` only for rows where `condition` is met.

    Arguments:
        condition (object): A query filter

    Example:

    ```python
    expression_is_true(StepsAgg._sum > StepsAgg._avg, condition=StepsAgg.year == 2021)
    ```

    """
    statement = select("*").where(~expression)

    if condition is not None:
        statement = statement.where(condition)

    return _test(statement)

has_accepted_values(column, values)

Assert that the values from the column should be one of the provided values

Example SQL:

SELECT {{ column_name }}
FROM {{ model }}
WHERE {{ column_name }} NOT IN {{ values }}

Examples:

has_accepted_values(HeartRate.source, values=["iPhone", "Mi Band"])
Source code in amora/tests/assertions.py
def has_accepted_values(column: ColumnElement, values: Iterable) -> Select:
    """
    Assert that the values from the `column` should be one of the provided `values`

    Example SQL:

    ```sql
    SELECT {{ column_name }}
    FROM {{ model }}
    WHERE {{ column_name }} NOT IN {{ values }}
    ```

    Example:

    ```python
    has_accepted_values(HeartRate.source, values=["iPhone", "Mi Band"])
    ```
    """
    return select(column).where(~column.in_(values))

has_at_least_one_not_null_value(column)

Asserts if column has at least one value.

Example SQL:

SELECT
    count({{ column_name }}) as filler_column
FROM
    {{ model }}
HAVING
    count({{ column_name }}) = 0

Examples:

has_at_least_one_not_null_value(Health.value)
Source code in amora/tests/assertions.py
def has_at_least_one_not_null_value(column: ColumnElement) -> Select:
    """
    Asserts if column has at least one value.

    Example SQL:

    ```sql
    SELECT
        count({{ column_name }}) as filler_column
    FROM
        {{ model }}
    HAVING
        count({{ column_name }}) = 0
    ```

    Example:

    ```python
    has_at_least_one_not_null_value(Health.value)
    ```
    """
    return select(func.count(column, type_=Integer)).having(func.count(column) == 0)

has_the_same_array_length(columns)

Asserts that all array columns has the same length

Example SQL:

SELECT
    arr_col1,
    arr_col2,
    arr_col3
FROM
    {{ model }}
WHERE
    ARRAY_LENGTH(arr_col1) != ARRAY_LENGTH(arr_col2)
    OR ARRAY_LENGTH(arr_col1) != ARRAY_LENGTH(arr_col3)
    OR ARRAY_LENGTH(arr_col2) != ARRAY_LENGTH(arr_col3)

Examples:

assert that([Model.arr_col1, Model.arr_col2, Model.arr_col3], has_the_same_array_length)
Source code in amora/tests/assertions.py
def has_the_same_array_length(columns: Iterable[ColumnElement[ARRAY]]) -> Select:
    """
    Asserts that all array columns has the same length

    Example SQL:

    ```sql
    SELECT
        arr_col1,
        arr_col2,
        arr_col3
    FROM
        {{ model }}
    WHERE
        ARRAY_LENGTH(arr_col1) != ARRAY_LENGTH(arr_col2)
        OR ARRAY_LENGTH(arr_col1) != ARRAY_LENGTH(arr_col3)
        OR ARRAY_LENGTH(arr_col2) != ARRAY_LENGTH(arr_col3)
    ```

    Example:

    ```python
    assert that([Model.arr_col1, Model.arr_col2, Model.arr_col3], has_the_same_array_length)
    ```
    """
    conditions = (
        func.array_length(a) != func.array_length(b)
        for (a, b) in itertools.combinations(columns, 2)
    )
    return select(*columns).where(or_(*conditions))

is_a_non_empty_string(column)

Asserts that the column isn't an empty string

Example SQL:

SELECT
     {{ column_name }}
FROM
     {{ model }}
WHERE
     TRIM({{ column_name }}) = ""

Examples:

is_a_non_empty_string(Health.source)
Source code in amora/tests/assertions.py
def is_a_non_empty_string(column: ColumnElement) -> Select:
    """
    Asserts that the column isn't an empty string

    Example SQL:

    ```sql
    SELECT
         {{ column_name }}
    FROM
         {{ model }}
    WHERE
         TRIM({{ column_name }}) = ""
    ```

    Example:

    ```python
    is_a_non_empty_string(Health.source)
    ```
    """
    return select(column).where(func.trim(column) == literal(""))

is_non_negative(column)

Asserts that every column value should be >= 0

Example SQL:

SELECT {{ column_name }}
FROM {{ model }}
WHERE {{ column_name }} < 0

Examples:

is_non_negative(HeartRate.value)
Source code in amora/tests/assertions.py
def is_non_negative(column: ColumnElement) -> Select:
    """
    Asserts that every column value should be >= 0

    Example SQL:

    ```sql
    SELECT {{ column_name }}
    FROM {{ model }}
    WHERE {{ column_name }} < 0
    ```

    Example:

    ```python
    is_non_negative(HeartRate.value)
    ```
    """
    return select(column).where(column < 0)

is_not_null(column)

Asserts that the column does not contain null values

Results in the following query:

SELECT {{ column_name }}
FROM {{ model }}
WHERE {{ column_name }} IS NULL

Examples:

is_not_null(HeartRate.id)
Source code in amora/tests/assertions.py
def is_not_null(column: ColumnElement) -> Select:
    """
    Asserts that the `column` does not contain `null` values

    Results in the following query:

    ```sql
    SELECT {{ column_name }}
    FROM {{ model }}
    WHERE {{ column_name }} IS NULL
    ```

    Example:

    ```python
    is_not_null(HeartRate.id)
    ```

    """
    return select(column).where(column == None)

is_numeric(column)

Asserts that each not null value is a number

Example SQL:

    SELECT
        {{ column }}
    FROM
        {{ model }}
    WHERE
        REGEXP_CONTAINS({{ column }}, "[^0-9]")

Examples:

is_numeric(func.cast(Health.value, String).label("value_as_str"))
Source code in amora/tests/assertions.py
def is_numeric(column: ColumnElement) -> Select:
    """
    Asserts that each not null value is a number

    Example SQL:

    ```sql
        SELECT
            {{ column }}
        FROM
            {{ model }}
        WHERE
            REGEXP_CONTAINS({{ column }}, "[^0-9]")
    ```

    Example:

    ```python
    is_numeric(func.cast(Health.value, String).label("value_as_str"))
    ```
    """
    return select(column).where(func.REGEXP_CONTAINS(column, "[^0-9]"))

is_unique(column)

Assert that the column values are unique

Example SQL:

SELECT {{ column_name }}
FROM (
    SELECT {{ column_name }}
    FROM {{ model }}
    WHERE {{ column_name }} IS NOT NULL
    GROUP BY {{ column_name }}
    HAVING COUNT(*) > 1
) validation_errors

Examples:

is_unique(HeartRate.id)
Source code in amora/tests/assertions.py
def is_unique(column: ColumnElement) -> Select:
    """
    Assert that the `column` values are unique

    Example SQL:

    ```sql
    SELECT {{ column_name }}
    FROM (
        SELECT {{ column_name }}
        FROM {{ model }}
        WHERE {{ column_name }} IS NOT NULL
        GROUP BY {{ column_name }}
        HAVING COUNT(*) > 1
    ) validation_errors
    ```

    Example:

    ```python
    is_unique(HeartRate.id)
    ```
    """
    return select(column).group_by(column).having(func.count(column) > 1)

relationship(from_, to, from_condition=None, to_condition=None)

Each value of the from_ column exists as a value in the to column. Also known as referential integrity.

This test validates the referential integrity between two relations with a predicate (from_condition and to_condition) to filter out some rows from the test. This is useful to exclude records such as test entities, rows created in the last X minutes/hours to account for temporary gaps due to data ingestion limitations, etc.

Example SQL:

WITH left_table AS (
  SELECT
    {{from_column_name}} AS id
  FROM {{from_table}}
  WHERE
    {{from_column_name}} IS NOT NULL
    AND {{from_condition}}
),
right_table AS (
  SELECT
    {{to_column_name}} AS id
  FROM {{to_table}}
  WHERE
    {{to_column_name}} IS NOT NULL
    AND {{to_condition}}
),
exceptions as (
  SELECT
    left_table.id AS {{from_column_name}}}
  FROM
    left_table
  LEFT JOIN
    right_table
    ON left_table.id = right_table.id
  WHERE
    right_table.id IS NULL
)

SELECT * FROM exceptions

Examples:

relationship(HeartRate.id, to=Health.id)
Source code in amora/tests/assertions.py
def relationship(
    from_: ColumnElement,
    to: ColumnElement,
    from_condition=None,
    to_condition=None,
) -> Optional[bool]:
    """
    Each value of the `from_` column exists as a value in the `to` column.
    Also known as referential integrity.

    This test validates the referential integrity between two relations
    with a predicate (`from_condition` and `to_condition`) to filter out
    some rows from the test. This is useful to exclude records such as
    test entities, rows created in the last X minutes/hours to account
    for temporary gaps due to data ingestion limitations, etc.

    Example SQL:

    ```sql
    WITH left_table AS (
      SELECT
        {{from_column_name}} AS id
      FROM {{from_table}}
      WHERE
        {{from_column_name}} IS NOT NULL
        AND {{from_condition}}
    ),
    right_table AS (
      SELECT
        {{to_column_name}} AS id
      FROM {{to_table}}
      WHERE
        {{to_column_name}} IS NOT NULL
        AND {{to_condition}}
    ),
    exceptions as (
      SELECT
        left_table.id AS {{from_column_name}}}
      FROM
        left_table
      LEFT JOIN
        right_table
        ON left_table.id = right_table.id
      WHERE
        right_table.id IS NULL
    )

    SELECT * FROM exceptions
    ```

    Example:

    ```python
    relationship(HeartRate.id, to=Health.id)
    ```

    """
    left_table = (
        select(from_.label("id"))
        .where(from_ != None)
        .where(from_condition or and_(True))
        .cte("left_table")
    )
    right_table = (
        select(to.label("id"))
        .where(to != None)
        .where(to_condition or and_(True))
        .cte("right_table")
    )

    exceptions = (
        select(left_table.c["id"].label(from_.key))
        .select_from(
            left_table.join(
                right_table,
                onclause=left_table.c["id"] == right_table.c["id"],
                isouter=True,
            )
        )
        .where(right_table.c["id"] == None)
    )

    return _test(statement=exceptions)

that(column, test, raise_on_fail=True, **test_kwargs)

Executes the test, returning True if the test is successful and raising a pytest fail otherwise

Examples:

assert that(HeartRate.value, is_not_null)
:param column: An AmoraModel column to test :param test: The test assertion function :param raise_on_fail: By default, the test will raise a pytest Fail exception, with a debug message. Default True. :param test_kwargs: Keyword arguments passed to the test function

Source code in amora/tests/assertions.py
def that(
    column: ColumnElement,
    test: Test,
    raise_on_fail: bool = True,
    **test_kwargs,
) -> Optional[bool]:
    """
    Executes the test, returning `True` if the test is successful and raising a pytest fail otherwise

    Example:

    ```python
    assert that(HeartRate.value, is_not_null)
    ```
    :param column: An AmoraModel column to test
    :param test: The test assertion function
    :param raise_on_fail: By default, the test will raise a pytest Fail exception, with a debug message.  Default `True`.
    :param test_kwargs: Keyword arguments passed to the `test` function

    """
    return _test(statement=test(column, **test_kwargs), raise_on_fail=raise_on_fail)

Last update: 2023-11-23