Skip to content

Amora Model

An Amora Model is a subclass of amora.models.AmoraModel . A way of expressing a data schema, the data materialization and an optional transformation statement. AmoraModel is built on top of SQLAlchemy.

Data schema

from datetime import datetime

from pydantic import NameEmail
from sqlalchemy import TIMESTAMP, Float, Integer, String

from amora.models import AmoraModel, Field, MaterializationTypes, ModelConfig


class Health(AmoraModel):
    __model_config__ = ModelConfig(
        materialized=MaterializationTypes.table,
        description="Health data exported by the Apple Health App",
        owner=NameEmail(
            name="Diogo Magalhães Machado", email="diogo.martins@stone.com.br"
        ),
    )

    id: int = Field(Integer, primary_key=True, doc="Identificador único da medida")
    type: str = Field(String, doc="Tipo da métrica coletada")
    sourceName: str = Field(String, doc="Origem dos dados")
    sourceVersion: str = Field(String, doc="Versão da origem de dados")
    unit: str = Field(String, doc="Unidade de medida")
    value: float = Field(Float, doc="Valor observado")
    device: str = Field(String, doc="Dispositivo de origem dos dados")
    creationDate: datetime = Field(TIMESTAMP, doc="Data de inserção dos dados")
    startDate: datetime = Field(TIMESTAMP, doc="Data do início da medida")
    endDate: datetime = Field(TIMESTAMP, doc="Data do fim da medida")

Model Configuration

Model configuration metadata

Attributes:

Name Type Description
cluster_by List[str]

BigQuery tables can be clustered to colocate related data. Expects a list of columns, as strings.

description Optional[str]

A string description of the model, used for documentation

labels Labels

Labels that can be used for data catalog and resource selection

materialized amora.models.MaterializationTypes

The materialization configuration: view, table, ephemeral. Default: view

partition_by Optional[PartitionConfig]

BigQuery supports the use of a partition by clause to easily partition a table by a column or expression. This option can help decrease latency and cost when querying large tables.

hours_to_expire Optional[int]

Bigquery supports setting an expiration time for a table. (https://cloud.google.com/bigquery/docs/managing-tables?hl=pt-br#updating_a_tables_expiration_time). Having this option can help us free up more space.

Source code in amora/models.py
@dataclasses.dataclass
class ModelConfig:
    """
    Model configuration metadata

    Attributes:
        cluster_by (List[str]): BigQuery tables can be [clustered](https://cloud.google.com/bigquery/docs/clustered-tables) to colocate related data. Expects a list of columns, as strings.
        description (Optional[str]): A string description of the model, used for documentation
        labels (Labels): Labels that can be used for data catalog and resource selection
        materialized (amora.models.MaterializationTypes): The materialization configuration: `view`, `table`, `ephemeral`. Default: `view`
        partition_by (Optional[PartitionConfig]): BigQuery supports the use of a [partition by](https://cloud.google.com/bigquery/docs/partitioned-tables) clause to easily partition a table by a column or expression. This option can help decrease latency and cost when querying large tables.
        hours_to_expire (Optional[int]): Bigquery supports setting an expiration time for a table. (https://cloud.google.com/bigquery/docs/managing-tables?hl=pt-br#updating_a_tables_expiration_time). Having this option can help us free up more space.
    """

    description: str = "Undocumented! Generated by Amora Data Build Tool 💚"
    materialized: MaterializationTypes = MaterializationTypes.view
    partition_by: Optional[PartitionConfig] = None
    cluster_by: Optional[List[str]] = None
    labels: Labels = dataclasses.field(default_factory=set)
    owner: Optional[Owner] = None
    hours_to_expire: Optional[int] = None

    @property
    def labels_dict(self) -> Dict[str, str]:
        return {label.key: label.value for label in self.labels}

Transformation

Data transformation is defined at the model source() -> Compilable classmethod.

Called when amora compile is executed, Amora will build this model in your data warehouse by wrapping it in a create view as or create table as statement.

Return None for defining models for tables/views that already exist on the data warehouse and shouldn't be managed by Amora.

Return a Compilable, which is a sqlalchemy select statement, in order to compile the model with the given statement :return:

Source code in amora/models.py
@classmethod
def source(cls) -> Optional[Compilable]:
    """
    Called when `amora compile` is executed, Amora will build this model
    in your data warehouse by wrapping it in a `create view as` or `create table as` statement.

    Return `None` for defining models for tables/views that already exist on the data warehouse
    and shouldn't be managed by Amora.

    Return a `Compilable`, which is a sqlalchemy select statement, in order to compile the model with the given statement
    :return:
    """
    return None

Dependencies

A list of Amora Models that the current model depends on

Source models

Tables/views that already exist on the Data Warehouse and shouldn't be managed by Amora.

from datetime import datetime

from pydantic import NameEmail
from sqlalchemy import TIMESTAMP, Float, Integer, String

from amora.models import AmoraModel, Field, MaterializationTypes, ModelConfig


class Health(AmoraModel):
    __model_config__ = ModelConfig(
        materialized=MaterializationTypes.table,
        description="Health data exported by the Apple Health App",
        owner=NameEmail(
            name="Diogo Magalhães Machado", email="diogo.martins@stone.com.br"
        ),
    )

    id: int = Field(Integer, primary_key=True, doc="Identificador único da medida")
    type: str = Field(String, doc="Tipo da métrica coletada")
    sourceName: str = Field(String, doc="Origem dos dados")
    sourceVersion: str = Field(String, doc="Versão da origem de dados")
    unit: str = Field(String, doc="Unidade de medida")
    value: float = Field(Float, doc="Valor observado")
    device: str = Field(String, doc="Dispositivo de origem dos dados")
    creationDate: datetime = Field(TIMESTAMP, doc="Data de inserção dos dados")
    startDate: datetime = Field(TIMESTAMP, doc="Data do início da medida")
    endDate: datetime = Field(TIMESTAMP, doc="Data do fim da medida")

Source models are models managed outside the scope of Amora, without a source implementation and no dependencies. Model configurations such as materialization type and description are optional, and used for documentation purposes only.

Materialized models

from sqlalchemy import Float, Integer, func, select

from amora.compilation import Compilable
from amora.models import AmoraModel, Field, MaterializationTypes, ModelConfig
from examples.amora_project.models.steps import Steps


class StepsAgg(AmoraModel):
    __depends_on__ = [Steps]
    __tablename__override__ = "steps_agg"
    __model_config__ = ModelConfig(materialized=MaterializationTypes.table)

    avg: float = Field(Float)
    sum: float = Field(Float)
    count: int = Field(Integer)
    year: int = Field(Integer, primary_key=True)
    month: int = Field(Integer, primary_key=True)

    @classmethod
    def source(cls) -> Compilable:
        sub = select(
            [
                func.avg(Steps.value).label("avg"),
                func.sum(Steps.value).label("sum"),
                func.count(Steps.value).label("count"),
                func.extract("year", Steps.creationDate).label("year"),
                func.extract("month", Steps.creationDate).label("month"),
            ]
        ).group_by(
            func.extract("year", Steps.creationDate),
            func.extract("month", Steps.creationDate),
        )
        return sub
To-do


Last update: 2023-11-23