c

org.checkita.dqf.config.jobconf.Sources

ParquetFileSourceConfig

final case class ParquetFileSourceConfig(id: ID, description: Option[NonEmptyString], path: URI, schema: Option[ID], persist: Option[StorageLevel], windowBy: StreamWindowing = ProcessingTime, options: Seq[SparkParam] = Seq.empty, keyFields: Seq[NonEmptyString] = Seq.empty, metadata: Seq[SparkParam] = Seq.empty) extends FileSourceConfig with ParquetFileConfig with Product with Serializable

Parquet file source configuration

id

Source ID

description

Source description

path

Path to file

schema

Schema ID

persist

Spark storage level in order to persist dataframe during job execution.

windowBy

Source of timestamp used to build windows. Applicable only for streaming jobs! Default: processingTime - uses current timestamp at the moment when Spark processes row. Other options are:

  • eventTime - uses column with name 'timestamp' (column must be of TimestampType).
  • customTime(columnName) - uses arbitrary user-defined column (column must be of TimestampType)
options

List of additional spark options required to read the source (if any)

keyFields

Sequence of key fields (columns that identify data row)

metadata

List of metadata parameters specific to this source

Linear Supertypes
Product, Equals, ParquetFileConfig, FileConfig, FileSourceConfig, SourceConfig, JobConfigEntity, Serializable, Serializable, AnyRef, Any
Ordering
  1. Alphabetic
  2. By Inheritance
Inherited
  1. ParquetFileSourceConfig
  2. Product
  3. Equals
  4. ParquetFileConfig
  5. FileConfig
  6. FileSourceConfig
  7. SourceConfig
  8. JobConfigEntity
  9. Serializable
  10. Serializable
  11. AnyRef
  12. Any
  1. Hide All
  2. Show All
Visibility
  1. Public
  2. All

Instance Constructors

  1. new ParquetFileSourceConfig(id: ID, description: Option[NonEmptyString], path: URI, schema: Option[ID], persist: Option[StorageLevel], windowBy: StreamWindowing = ProcessingTime, options: Seq[SparkParam] = Seq.empty, keyFields: Seq[NonEmptyString] = Seq.empty, metadata: Seq[SparkParam] = Seq.empty)

    id

    Source ID

    description

    Source description

    path

    Path to file

    schema

    Schema ID

    persist

    Spark storage level in order to persist dataframe during job execution.

    windowBy

    Source of timestamp used to build windows. Applicable only for streaming jobs! Default: processingTime - uses current timestamp at the moment when Spark processes row. Other options are:

    • eventTime - uses column with name 'timestamp' (column must be of TimestampType).
    • customTime(columnName) - uses arbitrary user-defined column (column must be of TimestampType)
    options

    List of additional spark options required to read the source (if any)

    keyFields

    Sequence of key fields (columns that identify data row)

    metadata

    List of metadata parameters specific to this source

Value Members

  1. final def !=(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  2. final def ##(): Int
    Definition Classes
    AnyRef → Any
  3. final def ==(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  4. final def asInstanceOf[T0]: T0
    Definition Classes
    Any
  5. def clone(): AnyRef
    Attributes
    protected[lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( ... ) @native()
  6. val description: Option[NonEmptyString]
    Definition Classes
    ParquetFileSourceConfig → JobConfigEntity
  7. final def eq(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  8. def finalize(): Unit
    Attributes
    protected[lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( classOf[java.lang.Throwable] )
  9. final def getClass(): Class[_]
    Definition Classes
    AnyRef → Any
    Annotations
    @native()
  10. val id: ID
    Definition Classes
    ParquetFileSourceConfig → JobConfigEntity
  11. final def isInstanceOf[T0]: Boolean
    Definition Classes
    Any
  12. val keyFields: Seq[NonEmptyString]
    Definition Classes
    ParquetFileSourceConfigSourceConfig
  13. val metadata: Seq[SparkParam]
    Definition Classes
    ParquetFileSourceConfig → JobConfigEntity
  14. val metadataString: Option[String]
    Definition Classes
    JobConfigEntity
  15. final def ne(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  16. final def notify(): Unit
    Definition Classes
    AnyRef
    Annotations
    @native()
  17. final def notifyAll(): Unit
    Definition Classes
    AnyRef
    Annotations
    @native()
  18. val options: Seq[SparkParam]
  19. val path: URI
    Definition Classes
    ParquetFileSourceConfigFileConfig
  20. val persist: Option[StorageLevel]
    Definition Classes
    ParquetFileSourceConfigSourceConfig
  21. val schema: Option[ID]
    Definition Classes
    ParquetFileSourceConfigFileConfig
  22. val streamable: Boolean
    Definition Classes
    ParquetFileSourceConfigSourceConfig
  23. final def synchronized[T0](arg0: ⇒ T0): T0
    Definition Classes
    AnyRef
  24. final def wait(): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  25. final def wait(arg0: Long, arg1: Int): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  26. final def wait(arg0: Long): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... ) @native()
  27. val windowBy: StreamWindowing

Inherited from Product

Inherited from Equals

Inherited from ParquetFileConfig

Inherited from FileConfig

Inherited from FileSourceConfig

Inherited from SourceConfig

Inherited from JobConfigEntity

Inherited from Serializable

Inherited from Serializable

Inherited from AnyRef

Inherited from Any

Ungrouped