diff --git a/index.src.html b/index.src.html index c371ad9e..e9c26734 100644 --- a/index.src.html +++ b/index.src.html @@ -115,6 +115,17 @@ :: Refers generically to an instance of {{AudioDecoder}}, {{AudioEncoder}}, {{VideoDecoder}}, or {{VideoEncoder}}. +: presentation timestamp +:: The presentation timestamp (PTS) of a media sample, which determines its + relative ordering and timing for rendering and processing. It is expressed + in microseconds. The specific epoch (e.g., zero-based from the start of a + capture, or matching a system clock) is not defined by this specification, + but timestamps SHOULD be consistent within a single + stream. + +: presentation duration +:: The duration of a media sample, expressed in microseconds. + : Key Chunk :: An encoded chunk that does not depend on any other frames for decoding. Also commonly referred to as a "key frame". @@ -2579,9 +2590,9 @@ : \[[type]] :: Describes whether the chunk is a [=key chunk=]. : \[[timestamp]] -:: The presentation timestamp, given in microseconds. +:: The [=presentation timestamp=] of this {{EncodedAudioChunk}}. : \[[duration]] -:: The presentation duration, given in microseconds. +:: The [=presentation duration=] of this {{EncodedAudioChunk}}. : [[byte length]] :: The byte length of {{EncodedAudioChunk/[[internal data]]}}. @@ -2618,10 +2629,10 @@ :: Returns the value of {{EncodedAudioChunk/[[type]]}}. : timestamp -:: Returns the value of {{EncodedAudioChunk/[[timestamp]]}}. +:: Returns the [=presentation timestamp=] of this {{EncodedAudioChunk}}. : duration -:: Returns the value of {{EncodedAudioChunk/[[duration]]}}. +:: Returns the [=presentation duration=] of this {{EncodedAudioChunk}}. : byteLength :: Returns the value of {{EncodedAudioChunk/[[byte length]]}}. @@ -2685,9 +2696,9 @@ : \[[type]] :: The {{EncodedVideoChunkType}} of this {{EncodedVideoChunk}}; : \[[timestamp]] -:: The presentation timestamp, given in microseconds. +:: The [=presentation timestamp=] of this {{EncodedVideoChunk}}. : \[[duration]] -:: The presentation duration, given in microseconds. +:: The [=presentation duration=] of this {{EncodedVideoChunk}}. : [[byte length]] :: The byte length of {{EncodedVideoChunk/[[internal data]]}}. @@ -2725,10 +2736,10 @@ :: Returns the value of {{EncodedVideoChunk/[[type]]}}. : timestamp -:: Returns the value of {{EncodedVideoChunk/[[timestamp]]}}. +:: Returns the [=presentation timestamp=] of this {{EncodedVideoChunk}}. : duration -:: Returns the value of {{EncodedVideoChunk/[[duration]]}}. +:: Returns the [=presentation duration=] of this {{EncodedVideoChunk}}. : byteLength :: Returns the value of {{EncodedVideoChunk/[[byte length]]}}. @@ -2879,7 +2890,7 @@ :: The number of audio channels for this {{AudioData}}. : \[[timestamp]] -:: The presentation timestamp, in microseconds, for this {{AudioData}}. +:: The [=presentation timestamp=] of this {{AudioData}}. ### Constructors ###{#audiodata-constructors} @@ -2947,13 +2958,13 @@ {{AudioData/[[number of channels]]}}. : timestamp -:: The presentation timestamp, in microseconds, for this {{AudioData}}. +:: Returns the [=presentation timestamp=] of this {{AudioData}}. - The {{AudioData/numberOfChannels}} getter steps are to return + The {{AudioData/timestamp}} getter steps are to return {{AudioData/[[timestamp]]}}. : duration -:: The duration, in microseconds, for this {{AudioData}}. +:: Returns the [=presentation duration=] of this {{AudioData}}. The {{AudioData/duration}} getter steps are to: 1. Let |microsecondsPerSecond| be `1,000,000`. @@ -3490,12 +3501,10 @@ adjustments. : \[[duration]] -:: The presentation duration, given in microseconds. The duration is copied - from the {{EncodedVideoChunk}} corresponding to this {{VideoFrame}}. +:: The [=presentation duration=] of this {{VideoFrame}}. : \[[timestamp]] -:: The presentation timestamp, given in microseconds. The timestamp is copied - from the {{EncodedVideoChunk}} corresponding to this {{VideoFrame}}. +:: The [=presentation timestamp=] of this {{VideoFrame}}. : [[color space]] :: The {{VideoColorSpace}} associated with this frame. @@ -3782,17 +3791,13 @@ {{VideoFrame/[[display height]]}}. : timestamp -:: The presentation timestamp, given in microseconds. For decode, - timestamp is copied from the {{EncodedVideoChunk}} corresponding - to this {{VideoFrame}}. For encode, timestamp is copied to the - {{EncodedVideoChunk}}s corresponding to this {{VideoFrame}}. +:: Returns the [=presentation timestamp=] of this {{VideoFrame}}. The {{VideoFrame/timestamp}} getter steps are to return {{VideoFrame/[[timestamp]]}}. : duration -:: The presentation duration, given in microseconds. The duration is copied - from the {{EncodedVideoChunk}} corresponding to this VideoFrame. +:: Returns the [=presentation duration=] of this {{VideoFrame}}. The {{VideoFrame/duration}} getter steps are to return {{VideoFrame/[[duration]]}}. @@ -6261,6 +6266,7 @@ conditions, such as allowing a site to mutate a codec input or output while the underlying codec is still operating on that data. This concern is mitigated by ensuring that input and output interfaces are immutable. + Privacy Considerations{#privacy-considerations} =============================================== @@ -6308,6 +6314,7 @@ budget", which depletes as authors use WebCodecs and other identifying APIs. Upon exhaustion of the privacy budget, codec capabilities could be reduced to a common baseline or prompt for user approval. + Best Practices for Authors Using WebCodecs{#best-practices-developers} ====================================================================== diff --git a/output.html b/output.html new file mode 100644 index 00000000..3294531e --- /dev/null +++ b/output.html @@ -0,0 +1,12878 @@ + + + + + WebCodecs + + + + + + + + + + + + + + + + + + + +
+

+

+

WebCodecs

+

Editor’s Draft, +

+
+ More details about this document +
+
+
This version: +
https://w3c.github.io/webcodecs/ +
Latest published version: +
https://www.w3.org/TR/webcodecs/ +
Feedback: +
GitHub +
Inline In Spec +
Editors: +
Paul Adenot (Mozilla) +
Eugene Zemtsov (Google LLC) +
Former Editors: +
Bernard Aboba (Microsoft Corporation) +
Chris Cunningham (Google Inc.) +
Participate: +
Git Repository. +
File an issue. +
Version History: +
https://github.com/w3c/webcodecs/commits +
+
+
+
+ +
+
+
+

Abstract

+

This specification defines interfaces to codecs for encoding and +decoding of audio, video, and images.

+

This specification does not specify or require any particular codec or +method of encoding or decoding. The purpose of this specification is to +provide JavaScript interfaces to implementations of existing codec +technology developed elsewhere. Implementers are free to +support any combination of codecs or none at all.

+
+

Status of this document

+
+

+ This section describes the status of this document at the time of its publication. A list of current W3C publications and the latest revision of this technical report can be found in the W3C standards and drafts index. +

+

+ Feedback and comments on this specification are welcome. + GitHub Issues are preferred for discussion on this specification. Alternatively, you can send comments to the Media Working Group’s mailing-list, public-media-wg@w3.org (archives). + This draft highlights some of the pending issues that are still to be discussed in the working group. + No decision has been taken on the outcome of these issues including whether they are valid. +

+

+ This document was published by the Media Working Group as an Editor’s Draft. + This document is intended to become a W3C Recommendation. +

+

+ Publication as an Editor’s Draft does not imply endorsement by + W3C and its Members. +

+

+ This document was produced by a group operating under the W3C Patent Policy. + W3C maintains a public list of any patent disclosures made in connection with the deliverables of the group; that page also includes instructions for disclosing a patent. An individual who has actual knowledge of a patent that the individual believes contains Essential Claim(s) must disclose the information in accordance with section 6 of the W3C Patent Policy. +

+

+ This document is governed by the 18 August 2025 W3C Process Document. +

+

+
+
+ +
+

1. Definitions

+
+
Codec +
+

Refers generically to an instance of AudioDecoder, AudioEncoder, +VideoDecoder, or VideoEncoder.

+
presentation timestamp +
+

The presentation timestamp (PTS) of a media sample, which determines its +relative ordering and timing for rendering and processing. It is expressed +in microseconds. The specific epoch (e.g., zero-based from the start of a +capture, or matching a system clock) is not defined by this specification, +but timestamps SHOULD be consistent within a single +stream.

+
presentation duration +
+

The duration of a media sample, expressed in microseconds.

+
Key Chunk +
+

An encoded chunk that does not depend on any other frames for decoding. Also +commonly referred to as a "key frame".

+
Internal Pending Output +
+

Codec outputs such as VideoFrames that currently reside in the internal +pipeline of the underlying codec implementation. The underlying codec +implementation MAY emit new outputs only when new +inputs are provided. The underlying codec implementation MUST emit all outputs in response to a flush.

+
Codec System Resources +
+

Resources including CPU memory, GPU memory, and exclusive handles to specific +decoding/encoding hardware that MAY be allocated by +the User Agent as part of codec configuration or generation of AudioData +and VideoFrame objects. Such resources MAY be +quickly exhausted and SHOULD be released immediately +when no longer in use.

+
Temporal Layer +
+

A grouping of EncodedVideoChunks whose timestamp cadence produces a +particular framerate. See scalabilityMode.

+
Progressive Image +
+

An image that supports decoding to multiple levels of detail, with lower +levels becoming available while the encoded data is not yet fully buffered.

+
Progressive Image Frame Generation +
+

A generational identifier for a given Progressive Image decoded output. +Each successive generation adds additional detail to the decoded output. +The mechanism for computing a frame’s generation is implementer defined.

+
Primary Image Track +
+

An image track that is marked by the given image file as being the default +track. The mechanism for indicating a primary track is format defined.

+
RGB Format +
+

A VideoPixelFormat containing red, green, and blue color channels in +any order or layout (interleaved or planar), and irrespective of whether an +alpha channel is present.

+
sRGB Color Space +
+

A VideoColorSpace object, initialized as follows:

+
    +
  1. +

    [[primaries]] is set to bt709,

    +
  2. +

    [[transfer]] is set to iec61966-2-1,

    +
  3. +

    [[matrix]] is set to rgb,

    +
  4. +

    [[full range]] is set to true

    +
+
Display P3 Color Space +
+

A VideoColorSpace object, initialized as follows:

+
    +
  1. +

    [[primaries]] is set to smpte432,

    +
  2. +

    [[transfer]] is set to iec61966-2-1,

    +
  3. +

    [[matrix]] is set to rgb,

    +
  4. +

    [[full range]] is set to true

    +
+
REC709 Color Space +
+

A VideoColorSpace object, initialized as follows:

+
    +
  1. +

    [[primaries]] is set to bt709,

    +
  2. +

    [[transfer]] is set to bt709,

    +
  3. +

    [[matrix]] is set to bt709,

    +
  4. +

    [[full range]] is set to false

    +
+
Codec Saturation +
+

The state of an underlying codec implementation where the number of active +decoding or encoding requests has reached an implementation specific +maximum such that it is temporarily unable to accept more work. The maximum +may be any value greater than 1, including infinity (no maximum). While +saturated, additional calls to decode() or encode() will be buffered +in the control message queue, and will increment the respective +decodeQueueSize and encodeQueueSize attributes. The codec implementation +will become unsaturated after making sufficient progress on the current +workload.

+
+

2. Codec Processing Model

+

2.1. Background

+
+ + This section is non-normative. + + +

The codec interfaces defined by the specification are designed such that new + codec tasks can be scheduled while previous tasks are still pending. For + example, web authors can call decode() without waiting for a previous + decode() to complete. This is achieved by offloading underlying codec tasks + to a separate parallel queue for parallel execution.

+

This section describes threading behaviors as they are visible from the + perspective of web authors. Implementers can choose to use more threads, as + long as the externally visible behaviors of blocking and sequencing are + maintained as follows.

+
+

2.2. Control Messages

+

A control message defines a sequence of steps corresponding to a +method invocation on a codec instance (e.g. encode()).

+

A control message queue is a queue of +control messages. Each codec instance has a control message queue +stored in an internal slot named +[[control message queue]].

+

Queuing a control +message means enqueuing the message to a codec’s [[control message queue]]. Invoking codec methods will generally queue a control message +to schedule work.

+

Running a control +message means performing a sequence of steps specified by the method +that enqueued the message.

+

The steps of a given control message can block processing later messages in the +control message queue. Each codec instance has a boolean internal slot named +[[message queue blocked]] that is set to true when this occurs. A +blocking message will conclude by setting [[message queue blocked]] to +false and rerunning the Process the control message queue steps.

+

All control messages will return either "processed" or "not processed". +Returning "processed" indicates the message steps are being (or have been) +executed and the message may be removed from the control message queue. +"not processed" indicates the message must not be processed at this time +and should remain in the control message queue to be retried later.

+

To Process the control message queue, run these steps:

+
    +
  1. +

    While [[message queue blocked]] is false and +[[control message queue]] is not empty:

    +
      +
    1. +

      Let front message be the first message in +[[control message queue]].

      +
    2. +

      Let outcome be the result of running the control message steps +described by front message.

      +
    3. +

      If outcome equals "not processed", break.

      +
    4. +

      Otherwise, dequeue front message from the +[[control message queue]].

      +
    +
+

2.3. Codec Work Parallel Queue

+

Each codec instance has an internal slot named +[[codec work queue]] that is a parallel queue.

+

Each codec instance has an internal slot named +[[codec implementation]] that refers to the underlying platform +encoder or decoder. Except for the initial assignment, any steps that reference +[[codec implementation]] will be enqueued to the [[codec work queue]].

+

Each codec instance has a unique codec task source. Tasks +queued from the [[codec work queue]] to the event loop +will use the codec task source.

+

3. AudioDecoder Interface

+
[Exposed=(Window,DedicatedWorker), SecureContext]
+interface AudioDecoder : EventTarget {
+  constructor(AudioDecoderInit init);
+
+  readonly attribute CodecState state;
+  readonly attribute unsigned long decodeQueueSize;
+  attribute EventHandler ondequeue;
+
+  undefined configure(AudioDecoderConfig config);
+  undefined decode(EncodedAudioChunk chunk);
+  Promise<undefined> flush();
+  undefined reset();
+  undefined close();
+
+  static Promise<AudioDecoderSupport> isConfigSupported(AudioDecoderConfig config);
+};
+
+dictionary AudioDecoderInit {
+  required AudioDataOutputCallback output;
+  required WebCodecsErrorCallback error;
+};
+
+callback AudioDataOutputCallback = undefined(AudioData output);
+
+

3.1. Internal Slots

+
+
[[control message queue]] +
+

A queue of control messages to be performed upon this codec +instance. See [[control message queue]].

+
[[message queue blocked]] +
+

A boolean indicating when processing the +[[control message queue]] is blocked by a pending +control message. See [[message queue blocked]].

+
[[codec implementation]] +
+

Underlying decoder implementation provided by the User Agent. See +[[codec implementation]].

+
[[codec work queue]] +
+

A parallel queue used for running parallel steps that reference the +[[codec implementation]]. See [[codec work queue]].

+
[[codec saturated]] +
+

A boolean indicating when the [[codec implementation]] is +unable to accept additional decoding work.

+
[[output callback]] +
+

Callback given at construction for decoded outputs.

+
[[error callback]] +
+

Callback given at construction for decode errors.

+
[[key chunk required]] +
+

A boolean indicating that the next chunk passed to decode() +MUST describe a key chunk as indicated by +[[type]].

+
[[state]] +
+

The current CodecState of this AudioDecoder.

+
[[decodeQueueSize]] +
+

The number of pending decode requests. This number will decrease as the +underlying codec is ready to accept new input.

+
[[pending flush promises]] +
+

A list of unresolved promises returned by calls to flush().

+
[[dequeue event scheduled]] +
+

A boolean indicating whether a dequeue event is already +scheduled to fire. Used to avoid event spam.

+
+

3.2. Constructors

+ + + AudioDecoder(init) + + +
    +
  1. +

    Let d be a new AudioDecoder object.

    +
  2. +

    Assign a new queue to [[control message queue]].

    +
  3. +

    Assign false to [[message queue blocked]].

    +
  4. +

    Assign null to [[codec implementation]].

    +
  5. +

    Assign the result of starting a new parallel queue to +[[codec work queue]].

    +
  6. +

    Assign false to [[codec saturated]].

    +
  7. +

    Assign init.output to [[output callback]].

    +
  8. +

    Assign init.error to [[error callback]].

    +
  9. +

    Assign true to [[key chunk required]].

    +
  10. +

    Assign "unconfigured" to [[state]]

    +
  11. +

    Assign 0 to [[decodeQueueSize]].

    +
  12. +

    Assign a new list to [[pending flush promises]].

    +
  13. +

    Assign false to [[dequeue event scheduled]].

    +
  14. +

    Return d.

    +
+

3.3. Attributes

+
+
state, of type CodecState, readonly +
+

Returns the value of [[state]].

+
decodeQueueSize, of type unsigned long, readonly +
+

Returns the value of [[decodeQueueSize]].

+
ondequeue, of type EventHandler +
+

An event handler IDL attribute whose event handler event type is +dequeue.

+
+

3.4. Event Summary

+
+
dequeue +
+

Fired at the AudioDecoder when the decodeQueueSize has +decreased.

+
+

3.5. Methods

+
+
configure(config) +
+ + Enqueues a control message to configure the audio decoder for decoding + chunks as described by config. + + +

NOTE: This method will trigger a NotSupportedError if the User Agent + does not support config. Authors are encouraged to first check support + by calling isConfigSupported() with config. User + Agents don’t have to support any particular codec type or configuration.

+

When invoked, run these steps:

+
    +
  1. +

    If config is not a valid AudioDecoderConfig, throw a +TypeError.

    +
  2. +

    If [[state]] is “closed”, throw an InvalidStateError.

    +
  3. +

    Set [[state]] to "configured".

    +
  4. +

    Set [[key chunk required]] to true.

    +
  5. +

    Queue a control message to configure the decoder with config.

    +
  6. +

    Process the control message queue.

    +
+

Running a control message to configure the decoder means running + these steps:

+
    +
  1. +

    Assign true to [[message queue blocked]].

    +
  2. +

    Enqueue the following steps to [[codec work queue]]:

    +
      +
    1. +

      Let supported be the result of running the Check Configuration +Support algorithm with config.

      +
    2. +

      If supported is false, queue a task to run the Close +AudioDecoder algorithm with NotSupportedError and abort +these steps.

      +
    3. +

      If needed, assign [[codec implementation]] with an +implementation supporting config.

      +
    4. +

      Configure [[codec implementation]] with config.

      +
    5. +

      queue a task to run the following steps:

      +
        +
      1. +

        Assign false to [[message queue blocked]].

        +
      2. +

        Queue a task to Process the control message queue.

        +
      +
    +
  3. +

    Return "processed".

    +
+
decode(chunk) +
+ + Enqueues a control message to decode the given chunk. + + +

When invoked, run these steps:

+
    +
  1. +

    If [[state]] is not "configured", throw an +InvalidStateError.

    +
  2. +

    If [[key chunk required]] is true:

    +
      +
    1. +

      If chunk.[[type]] is not +key, throw a DataError.

      +
    2. +

      Implementers SHOULD inspect the chunk’s +[[internal data]] to verify that +it is truly a key chunk. If a mismatch is detected, throw a +DataError.

      +
    3. +

      Otherwise, assign false to +[[key chunk required]].

      +
    +
  3. +

    Increment [[decodeQueueSize]].

    +
  4. +

    Queue a control message to decode the chunk.

    +
  5. +

    Process the control message queue.

    +
+

Running a control message to decode the chunk means performing these + steps:

+
    +
  1. +

    If [[codec saturated]] equals true, return "not processed".

    +
  2. +

    If decoding chunk will cause the +[[codec implementation]] to become saturated, +assign true to [[codec saturated]].

    +
  3. +

    Decrement [[decodeQueueSize]] and run the +Schedule Dequeue Event algorithm.

    +
  4. +

    Enqueue the following steps to the [[codec work queue]]:

    +
      +
    1. +

      Attempt to use [[codec implementation]] to decode +the chunk.

      +
    2. +

      If decoding results in an error, queue a task to run the +Close AudioDecoder algorithm with EncodingError and return.

      +
    3. +

      If [[codec saturated]] equals true and +[[codec implementation]] is no longer +saturated, queue a task to perform the following steps:

      +
        +
      1. +

        Assign false to [[codec saturated]].

        +
      2. +

        Process the control message queue.

        +
      +
    4. +

      Let decoded outputs be a list of decoded audio data outputs +emitted by [[codec implementation]].

      +
    5. +

      If decoded outputs is not empty, queue a task to run the +Output AudioData algorithm with decoded outputs.

      +
    +
  5. +

    Return "processed".

    +
+
flush() +
+ + Completes all control messages in the control message queue + and emits all outputs. + + +

When invoked, run these steps:

+
    +
  1. +

    If [[state]] is not "configured", return +a promise rejected with InvalidStateError DOMException.

    +
  2. +

    Set [[key chunk required]] to true.

    +
  3. +

    Let promise be a new Promise.

    +
  4. +

    Append promise to [[pending flush promises]].

    +
  5. +

    Queue a control message to flush the codec with promise.

    +
  6. +

    Process the control message queue.

    +
  7. +

    Return promise.

    +
+

Running a control message to flush the codec means performing these + steps with promise.

+
    +
  1. +

    Enqueue the following steps to the [[codec work queue]]:

    +
      +
    1. +

      Signal [[codec implementation]] to emit all +internal pending outputs.

      +
    2. +

      Let decoded outputs be a list of decoded audio data outputs +emitted by [[codec implementation]].

      +
    3. +

      Queue a task to perform these steps:

      +
        +
      1. +

        If decoded outputs is not empty, run the Output AudioData +algorithm with decoded outputs.

        +
      2. +

        Remove promise from +[[pending flush promises]].

        +
      3. +

        Resolve promise.

        +
      +
    +
  2. +

    Return "processed".

    +
+
reset() +
+ + Immediately resets all state including configuration, + control messages in the control message queue, and all pending + callbacks. + + +

When invoked, run the Reset AudioDecoder algorithm with an + AbortError DOMException.

+
close() +
+ + Immediately aborts all pending work and releases system resources. + Close is final. + + +

When invoked, run the Close AudioDecoder algorithm with an + AbortError DOMException.

+
isConfigSupported(config) +
+ + Returns a promise indicating whether the provided config is supported by + the User Agent. + + +

NOTE: The returned AudioDecoderSupport config + will contain only the dictionary members that User Agent recognized. + Unrecognized dictionary members will be ignored. Authors can detect + unrecognized dictionary members by comparing + config to their provided config.

+

When invoked, run these steps:

+
    +
  1. +

    If config is not a valid AudioDecoderConfig, return +a promise rejected with TypeError.

    +
  2. +

    Let p be a new Promise.

    +
  3. +

    Let checkSupportQueue be the result of starting a new parallel +queue.

    +
  4. +

    Enqueue the following steps to checkSupportQueue:

    +
      +
    1. +

      Let supported be the result of running +the Check Configuration Support algorithm with config.

      +
    2. +

      Queue a task to run the following steps:

      +
        +
      1. +

        Let decoderSupport be a newly constructed +AudioDecoderSupport, initialized as follows:

        +
          +
        1. +

          Set config to the result of running the +Clone Configuration algorithm with config.

          +
        2. +

          Set supported to supported.

          +
        +
      2. +

        Resolve p with decoderSupport.

        +
      +
    +
  5. +

    Return p.

    +
+
+

3.6. Algorithms

+
+
Schedule Dequeue Event + +
+
    +
  1. +

    If [[dequeue event scheduled]] equals true, return.

    +
  2. +

    Assign true to [[dequeue event scheduled]].

    +
  3. +

    Queue a task to run the following steps:

    +
      +
    1. +

      Fire a simple event named dequeue at this.

      +
    2. +

      Assign false to [[dequeue event scheduled]].

      +
    +
+
Output AudioData (with outputs) +
+ + Run these steps: + +
    +
  1. +

    For each output in outputs:

    +
      +
    1. +

      Let data be an AudioData, initialized as follows:

      +
        +
      1. +

        Assign false to [[Detached]].

        +
      2. +

        Let resource be the media resource described by output.

        +
      3. +

        Let resourceReference be a reference to resource.

        +
      4. +

        Assign resourceReference to +[[resource reference]].

        +
      5. +

        Let timestamp be the [[timestamp]] of the +EncodedAudioChunk associated with output.

        +
      6. +

        Assign timestamp to [[timestamp]].

        +
      7. +

        If output uses a recognized AudioSampleFormat, assign that +format to [[format]]. Otherwise, assign null to +[[format]].

        +
      8. +

        Assign values to [[sample rate]], +[[number of frames]], and +[[number of channels]] as determined by output.

        +
      +
    2. +

      Invoke [[output callback]] with data.

      +
    +
+
Reset AudioDecoder (with exception) +
+ + Run these steps: + +
    +
  1. +

    If [[state]] is "closed", throw an InvalidStateError.

    +
  2. +

    Set [[state]] to "unconfigured".

    +
  3. +

    Signal [[codec implementation]] to cease producing +output for the previous configuration.

    +
  4. +

    Remove all control messages from the +[[control message queue]].

    +
  5. +

    If [[decodeQueueSize]] is greater than zero:

    +
      +
    1. +

      Set [[decodeQueueSize]] to zero.

      +
    2. +

      Run the Schedule Dequeue Event algorithm.

      +
    +
  6. +

    For each promise in [[pending flush promises]]:

    +
      +
    1. +

      Reject promise with exception.

      +
    2. +

      Remove promise from [[pending flush promises]].

      +
    +
+
Close AudioDecoder (with exception) +
+ + Run these steps: + +
    +
  1. +

    Run the Reset AudioDecoder algorithm with exception.

    +
  2. +

    Set [[state]] to "closed".

    +
  3. +

    Clear [[codec implementation]] and release associated +system resources.

    +
  4. +

    If exception is not an AbortError DOMException, +invoke the [[error callback]] with exception.

    +
+
+

4. VideoDecoder Interface

+
[Exposed=(Window,DedicatedWorker), SecureContext]
+interface VideoDecoder : EventTarget {
+  constructor(VideoDecoderInit init);
+
+  readonly attribute CodecState state;
+  readonly attribute unsigned long decodeQueueSize;
+  attribute EventHandler ondequeue;
+
+  undefined configure(VideoDecoderConfig config);
+  undefined decode(EncodedVideoChunk chunk);
+  Promise<undefined> flush();
+  undefined reset();
+  undefined close();
+
+  static Promise<VideoDecoderSupport> isConfigSupported(VideoDecoderConfig config);
+};
+
+dictionary VideoDecoderInit {
+  required VideoFrameOutputCallback output;
+  required WebCodecsErrorCallback error;
+};
+
+callback VideoFrameOutputCallback = undefined(VideoFrame output);
+
+

4.1. Internal Slots

+
+
[[control message queue]] +
+

A queue of control messages to be performed upon this codec +instance. See [[control message queue]].

+
[[message queue blocked]] +
+

A boolean indicating when processing the +[[control message queue]] is blocked by a pending +control message. See [[message queue blocked]].

+
[[codec implementation]] +
+

Underlying decoder implementation provided by the User Agent. See +[[codec implementation]].

+
[[codec work queue]] +
+

A parallel queue used for running parallel steps that reference the +[[codec implementation]]. See [[codec work queue]].

+
[[codec saturated]] +
+

A boolean indicating when the [[codec implementation]] is +unable to accept additional decoding work.

+
[[output callback]] +
+

Callback given at construction for decoded outputs.

+
[[error callback]] +
+

Callback given at construction for decode errors.

+
[[active decoder config]] +
+

The VideoDecoderConfig that is actively applied.

+
[[key chunk required]] +
+

A boolean indicating that the next chunk passed to decode() +MUST describe a key chunk as indicated by +type.

+
[[state]] +
+

The current CodecState of this VideoDecoder.

+
[[decodeQueueSize]] +
+

The number of pending decode requests. This number will decrease as the +underlying codec is ready to accept new input.

+
[[pending flush promises]] +
+

A list of unresolved promises returned by calls to flush().

+
[[dequeue event scheduled]] +
+

A boolean indicating whether a dequeue event is already +scheduled to fire. Used to avoid event spam.

+
+

4.2. Constructors

+ + + VideoDecoder(init) + + +
    +
  1. +

    Let d be a new VideoDecoder object.

    +
  2. +

    Assign a new queue to [[control message queue]].

    +
  3. +

    Assign false to [[message queue blocked]].

    +
  4. +

    Assign null to [[codec implementation]].

    +
  5. +

    Assign the result of starting a new parallel queue to +[[codec work queue]].

    +
  6. +

    Assign false to [[codec saturated]].

    +
  7. +

    Assign init.output to [[output callback]].

    +
  8. +

    Assign init.error to [[error callback]].

    +
  9. +

    Assign null to [[active decoder config]].

    +
  10. +

    Assign true to [[key chunk required]].

    +
  11. +

    Assign "unconfigured" to [[state]]

    +
  12. +

    Assign 0 to [[decodeQueueSize]].

    +
  13. +

    Assign a new list to [[pending flush promises]].

    +
  14. +

    Assign false to [[dequeue event scheduled]].

    +
  15. +

    Return d.

    +
+

4.3. Attributes

+
+
state, of type CodecState, readonly +
+

Returns the value of [[state]].

+
decodeQueueSize, of type unsigned long, readonly +
+

Returns the value of [[decodeQueueSize]].

+
ondequeue, of type EventHandler +
+

An event handler IDL attribute whose event handler event type is +dequeue.

+
+

4.4. Event Summary

+
+
dequeue +
+

Fired at the VideoDecoder when the decodeQueueSize has +decreased.

+
+

4.5. Methods

+
+
configure(config) +
+ + Enqueues a control message to configure the video decoder for decoding + chunks as described by config. + + +

NOTE: This method will trigger a NotSupportedError if the User Agent + does not support config. Authors are encouraged to first check support + by calling isConfigSupported() with config. User Agents + don’t have to support any particular codec type or configuration.

+

When invoked, run these steps:

+
    +
  1. +

    If config is not a valid VideoDecoderConfig, throw a +TypeError.

    +
  2. +

    If [[state]] is “closed”, throw an +InvalidStateError.

    +
  3. +

    Set [[state]] to "configured".

    +
  4. +

    Set [[key chunk required]] to true.

    +
  5. +

    Queue a control message to configure the decoder with config.

    +
  6. +

    Process the control message queue.

    +
+

Running a control message to configure the decoder means running + these steps:

+
    +
  1. +

    Assign true to [[message queue blocked]].

    +
  2. +

    Enqueue the following steps to [[codec work queue]]:

    +
      +
    1. +

      Let supported be the result of running the Check Configuration +Support algorithm with config.

      +
    2. +

      If supported is false, queue a task to run the Close +VideoDecoder algorithm with NotSupportedError and abort +these steps.

      +
    3. +

      If needed, assign [[codec implementation]] with an +implementation supporting config.

      +
    4. +

      Configure [[codec implementation]] with config.

      +
    5. +

      queue a task to run the following steps:

      +
        +
      1. +

        Assign false to [[message queue blocked]].

        +
      2. +

        Queue a task to Process the control message queue.

        +
      +
    +
  3. +

    Return "processed".

    +
+
decode(chunk) +
+ + Enqueues a control message to decode the given chunk. + + +

NOTE: Authors are encouraged to call close() on output + VideoFrames immediately when frames are no longer needed. The + underlying media resources are owned by the VideoDecoder and + failing to release them (or waiting for garbage collection) can cause + decoding to stall.

+

NOTE: VideoDecoder requires that frames are output in the order they + expect to be presented, commonly known as presentation order. When using + some [[codec implementation]]s the User Agent will have + to reorder outputs into presentation order.

+

When invoked, run these steps:

+
    +
  1. +

    If [[state]] is not "configured", throw an +InvalidStateError.

    +
  2. +

    If [[key chunk required]] is true:

    +
      +
    1. +

      If chunk.type is not +key, throw a DataError.

      +
    2. +

      Implementers SHOULD inspect the chunk’s +[[internal data]] to verify that +it is truly a key chunk. If a mismatch is detected, throw a +DataError.

      +
    3. +

      Otherwise, assign false to +[[key chunk required]].

      +
    +
  3. +

    Increment [[decodeQueueSize]].

    +
  4. +

    Queue a control message to decode the chunk.

    +
  5. +

    Process the control message queue.

    +
+

Running a control message to decode the chunk means performing these steps:

+
    +
  1. +

    If [[codec saturated]] equals true, return "not processed".

    +
  2. +

    If decoding chunk will cause the +[[codec implementation]] to become saturated, +assign true to [[codec saturated]].

    +
  3. +

    Decrement [[decodeQueueSize]] and run the +Schedule Dequeue Event algorithm.

    +
  4. +

    Enqueue the following steps to the [[codec work queue]]:

    +
      +
    1. +

      Attempt to use [[codec implementation]] to decode +the chunk.

      +
    2. +

      If decoding results in an error, queue a task to run the +Close VideoDecoder algorithm with EncodingError and return.

      +
    3. +

      If [[codec saturated]] equals true and +[[codec implementation]] is no longer +saturated, queue a task to perform the following steps:

      +
        +
      1. +

        Assign false to [[codec saturated]].

        +
      2. +

        Process the control message queue.

        +
      +
    4. +

      Let decoded outputs be a list of decoded video data outputs +emitted by [[codec implementation]] in +presentation order.

      +
    5. +

      If decoded outputs is not empty, queue a task to run the +Output VideoFrame algorithm with decoded outputs.

      +
    +
  5. +

    Return "processed".

    +
+
flush() +
+ + Completes all control messages in the control message queue + and emits all outputs. + + +

When invoked, run these steps:

+
    +
  1. +

    If [[state]] is not "configured", return +a promise rejected with InvalidStateError DOMException.

    +
  2. +

    Set [[key chunk required]] to true.

    +
  3. +

    Let promise be a new Promise.

    +
  4. +

    Append promise to [[pending flush promises]].

    +
  5. +

    Queue a control message to flush the codec with promise.

    +
  6. +

    Process the control message queue.

    +
  7. +

    Return promise.

    +
+

Running a control message to flush the codec means performing these + steps with promise.

+
    +
  1. +

    Enqueue the following steps to the [[codec work queue]]:

    +
      +
    1. +

      Signal [[codec implementation]] to emit all +internal pending outputs.

      +
    2. +

      Let decoded outputs be a list of decoded video data outputs +emitted by [[codec implementation]].

      +
    3. +

      Queue a task to perform these steps:

      +
        +
      1. +

        If decoded outputs is not empty, run the Output VideoFrame +algorithm with decoded outputs.

        +
      2. +

        Remove promise from +[[pending flush promises]].

        +
      3. +

        Resolve promise.

        +
      +
    +
  2. +

    Return "processed".

    +
+
reset() +
+ + Immediately resets all state including configuration, + control messages in the control message queue, and all pending + callbacks. + + +

When invoked, run the Reset VideoDecoder algorithm with an + AbortError DOMException.

+
close() +
+ + Immediately aborts all pending work and releases system resources. + Close is final. + + +

When invoked, run the Close VideoDecoder algorithm with an + AbortError DOMException.

+
isConfigSupported(config) +
+ + Returns a promise indicating whether the provided config is supported by + the User Agent. + + +

NOTE: The returned VideoDecoderSupport config + will contain only the dictionary members that User Agent recognized. + Unrecognized dictionary members will be ignored. Authors can detect + unrecognized dictionary members by comparing + config to their provided config.

+

When invoked, run these steps:

+
    +
  1. +

    If config is not a valid VideoDecoderConfig, return +a promise rejected with TypeError.

    +
  2. +

    Let p be a new Promise.

    +
  3. +

    Let checkSupportQueue be the result of starting a new parallel +queue.

    +
  4. +

    Enqueue the following steps to checkSupportQueue:

    +
      +
    1. +

      Let supported be the result of running +the Check Configuration Support algorithm with config.

      +
    2. +

      Queue a task to run the following steps:

      +
        +
      1. +

        Let decoderSupport be a newly constructed +VideoDecoderSupport, initialized as follows:

        +
          +
        1. +

          Set config to the result of running the +Clone Configuration algorithm with config.

          +
        2. +

          Set supported to supported.

          +
        +
      2. +

        Resolve p with decoderSupport.

        +
      +
    +
  5. +

    Return p.

    +
+
+

4.6. Algorithms

+
+
Schedule Dequeue Event + +
+
    +
  1. +

    If [[dequeue event scheduled]] equals true, return.

    +
  2. +

    Assign true to [[dequeue event scheduled]].

    +
  3. +

    Queue a task to run the following steps:

    +
      +
    1. +

      Fire a simple event named dequeue at this.

      +
    2. +

      Assign false to [[dequeue event scheduled]].

      +
    +
+
Output VideoFrames (with outputs) +
+ + Run these steps: + +
    +
  1. +

    For each output in outputs:

    +
      +
    1. +

      Let timestamp and duration be the +timestamp and duration +from the EncodedVideoChunk associated with output.

      +
    2. +

      Let displayAspectWidth and displayAspectHeight be undefined.

      +
    3. +

      If displayAspectWidth and +displayAspectHeight exist in the +[[active decoder config]], assign their values to +displayAspectWidth and displayAspectHeight respectively.

      +
    4. +

      Let colorSpace be the VideoColorSpace for output as detected +by the codec implementation. If no VideoColorSpace is detected, +let colorSpace be undefined.

      +

      NOTE: The codec implementation can detect a VideoColorSpace by + analyzing the bitstream. Detection is made on a best-effort + basis. The exact method of detection is implementer defined and + codec-specific. Authors can override the detected + VideoColorSpace by providing a + colorSpace in the VideoDecoderConfig.

      +
    5. +

      If colorSpace exists in the +[[active decoder config]], assign its value to +colorSpace.

      +
    6. +

      Assign the values of rotation and +flip to rotation and flip respectively.

      +
    7. +

      Let frame be the result of running the Create a VideoFrame +algorithm with output, timestamp, duration, +displayAspectWidth, displayAspectHeight, colorSpace, +rotation, and flip.

      +
    8. +

      Invoke [[output callback]] with frame.

      +
    +
+
Reset VideoDecoder (with exception) +
+ + Run these steps: + +
    +
  1. +

    If state is "closed", throw an InvalidStateError.

    +
  2. +

    Set state to "unconfigured".

    +
  3. +

    Signal [[codec implementation]] to cease producing +output for the previous configuration.

    +
  4. +

    Remove all control messages from the +[[control message queue]].

    +
  5. +

    If [[decodeQueueSize]] is greater than zero:

    +
      +
    1. +

      Set [[decodeQueueSize]] to zero.

      +
    2. +

      Run the Schedule Dequeue Event algorithm.

      +
    +
  6. +

    For each promise in [[pending flush promises]]:

    +
      +
    1. +

      Reject promise with exception.

      +
    2. +

      Remove promise from [[pending flush promises]].

      +
    +
+
Close VideoDecoder (with exception) +
+ + Run these steps: + +
    +
  1. +

    Run the Reset VideoDecoder algorithm with exception.

    +
  2. +

    Set state to "closed".

    +
  3. +

    Clear [[codec implementation]] and release associated +system resources.

    +
  4. +

    If exception is not an AbortError DOMException, +invoke the [[error callback]] with exception.

    +
+
+

5. AudioEncoder Interface

+
[Exposed=(Window,DedicatedWorker), SecureContext]
+interface AudioEncoder : EventTarget {
+  constructor(AudioEncoderInit init);
+
+  readonly attribute CodecState state;
+  readonly attribute unsigned long encodeQueueSize;
+  attribute EventHandler ondequeue;
+
+  undefined configure(AudioEncoderConfig config);
+  undefined encode(AudioData data);
+  Promise<undefined> flush();
+  undefined reset();
+  undefined close();
+
+  static Promise<AudioEncoderSupport> isConfigSupported(AudioEncoderConfig config);
+};
+
+dictionary AudioEncoderInit {
+  required EncodedAudioChunkOutputCallback output;
+  required WebCodecsErrorCallback error;
+};
+
+callback EncodedAudioChunkOutputCallback =
+    undefined (EncodedAudioChunk output,
+               optional EncodedAudioChunkMetadata metadata = {});
+
+

5.1. Internal Slots

+
+
[[control message queue]] +
+

A queue of control messages to be performed upon this codec +instance. See [[control message queue]].

+
[[message queue blocked]] +
+

A boolean indicating when processing the +[[control message queue]] is blocked by a pending +control message. See [[message queue blocked]].

+
[[codec implementation]] +
+

Underlying encoder implementation provided by the User Agent. See +[[codec implementation]].

+
[[codec work queue]] +
+

A parallel queue used for running parallel steps that reference the +[[codec implementation]]. See [[codec work queue]].

+
[[codec saturated]] +
+

A boolean indicating when the [[codec implementation]] is +unable to accept additional encoding work.

+
[[output callback]] +
+

Callback given at construction for encoded outputs.

+
[[error callback]] +
+

Callback given at construction for encode errors.

+
[[active encoder config]] +
+

The AudioEncoderConfig that is actively applied.

+
[[active output config]] +
+

The AudioDecoderConfig that describes how to decode the most recently +emitted EncodedAudioChunk.

+
[[state]] +
+

The current CodecState of this AudioEncoder.

+
[[encodeQueueSize]] +
+

The number of pending encode requests. This number will decrease as the +underlying codec is ready to accept new input.

+
[[pending flush promises]] +
+

A list of unresolved promises returned by calls to flush().

+
[[dequeue event scheduled]] +
+

A boolean indicating whether a dequeue event is already +scheduled to fire. Used to avoid event spam.

+
+

5.2. Constructors

+ + + AudioEncoder(init) + + +
    +
  1. +

    Let e be a new AudioEncoder object.

    +
  2. +

    Assign a new queue to [[control message queue]].

    +
  3. +

    Assign false to [[message queue blocked]].

    +
  4. +

    Assign null to [[codec implementation]].

    +
  5. +

    Assign the result of starting a new parallel queue to +[[codec work queue]].

    +
  6. +

    Assign false to [[codec saturated]].

    +
  7. +

    Assign init.output to [[output callback]].

    +
  8. +

    Assign init.error to [[error callback]].

    +
  9. +

    Assign null to [[active encoder config]].

    +
  10. +

    Assign null to [[active output config]].

    +
  11. +

    Assign "unconfigured" to [[state]]

    +
  12. +

    Assign 0 to [[encodeQueueSize]].

    +
  13. +

    Assign a new list to [[pending flush promises]].

    +
  14. +

    Assign false to [[dequeue event scheduled]].

    +
  15. +

    Return e.

    +
+

5.3. Attributes

+
+
state, of type CodecState, readonly +
+

Returns the value of [[state]].

+
encodeQueueSize, of type unsigned long, readonly +
+

Returns the value of [[encodeQueueSize]].

+
ondequeue, of type EventHandler +
+

An event handler IDL attribute whose event handler event type is +dequeue.

+
+

5.4. Event Summary

+
+
dequeue +
+

Fired at the AudioEncoder when the encodeQueueSize has +decreased.

+
+

5.5. Methods

+
+
configure(config) +
+ + Enqueues a control message to configure the audio encoder for + encoding audio data as described by config. + + +

NOTE: This method will trigger a NotSupportedError if the User Agent + does not support config. Authors are encouraged to first check support + by calling isConfigSupported() with config. User + Agents don’t have to support any particular codec type or configuration.

+

When invoked, run these steps:

+
    +
  1. +

    If config is not a valid AudioEncoderConfig, throw a +TypeError.

    +
  2. +

    If [[state]] is "closed", throw an +InvalidStateError.

    +
  3. +

    Set [[state]] to "configured".

    +
  4. +

    Queue a control message to configure the encoder using config.

    +
  5. +

    Process the control message queue.

    +
+

Running a control message to configure the encoder means performing + these steps:

+
    +
  1. +

    Assign true to [[message queue blocked]].

    +
  2. +

    Enqueue the following steps to [[codec work queue]]:

    +
      +
    1. +

      Let supported be the result of running the Check Configuration +Support algorithm with config.

      +
    2. +

      If supported is false, queue a task to run the Close +AudioEncoder algorithm with NotSupportedError and abort +these steps.

      +
    3. +

      If needed, assign [[codec implementation]] with an +implementation supporting config.

      +
    4. +

      Configure [[codec implementation]] with config.

      +
    5. +

      queue a task to run the following steps:

      +
        +
      1. +

        Assign false to [[message queue blocked]].

        +
      2. +

        Queue a task to Process the control message queue.

        +
      +
    +
  3. +

    Return "processed".

    +
+
encode(data) +
+ + Enqueues a control message to encode the given data. + + +

When invoked, run these steps:

+
    +
  1. +

    If the value of data’s [[Detached]] internal slot +is true, throw a TypeError.

    +
  2. +

    If [[state]] is not "configured", throw an +InvalidStateError.

    +
  3. +

    Let dataClone hold the result of running the Clone AudioData +algorithm with data.

    +
  4. +

    Increment [[encodeQueueSize]].

    +
  5. +

    Queue a control message to encode dataClone.

    +
  6. +

    Process the control message queue.

    +
+

Running a control message to encode the data means performing these + steps:

+
    +
  1. +

    If [[codec saturated]] equals true, return "not processed".

    +
  2. +

    If encoding data will cause the +[[codec implementation]] to become saturated, +assign true to [[codec saturated]].

    +
  3. +

    Decrement [[encodeQueueSize]] and run the +Schedule Dequeue Event algorithm.

    +
  4. +

    Enqueue the following steps to the [[codec work queue]]:

    +
      +
    1. +

      Attempt to use [[codec implementation]] to encode +the media resource described by dataClone.

      +
    2. +

      If encoding results in an error, queue a task to run the +Close AudioEncoder algorithm with EncodingError and return.

      +
    3. +

      If [[codec saturated]] equals true and +[[codec implementation]] is no longer +saturated, queue a task to perform the following steps:

      +
        +
      1. +

        Assign false to [[codec saturated]].

        +
      2. +

        Process the control message queue.

        +
      +
    4. +

      Let encoded outputs be a list of encoded audio data outputs +emitted by [[codec implementation]].

      +
    5. +

      If encoded outputs is not empty, queue a task to run the +Output EncodedAudioChunks algorithm with encoded outputs.

      +
    +
  5. +

    Return "processed".

    +
+
flush() +
+ + Completes all control messages in the control message queue + and emits all outputs. + + +

When invoked, run these steps:

+
    +
  1. +

    If [[state]] is not "configured", return +a promise rejected with InvalidStateError DOMException.

    +
  2. +

    Let promise be a new Promise.

    +
  3. +

    Append promise to [[pending flush promises]].

    +
  4. +

    Queue a control message to flush the codec with promise.

    +
  5. +

    Process the control message queue.

    +
  6. +

    Return promise.

    +
+

Running a control message to flush the codec means performing these + steps with promise.

+
    +
  1. +

    Enqueue the following steps to the [[codec work queue]]:

    +
      +
    1. +

      Signal [[codec implementation]] to emit all +internal pending outputs.

      +
    2. +

      Let encoded outputs be a list of encoded audio data outputs +emitted by [[codec implementation]].

      +
    3. +

      Queue a task to perform these steps:

      +
        +
      1. +

        If encoded outputs is not empty, run the +Output EncodedAudioChunks algorithm with encoded outputs.

        +
      2. +

        Remove promise from +[[pending flush promises]].

        +
      3. +

        Resolve promise.

        +
      +
    +
  2. +

    Return "processed".

    +
+
reset() +
+ + Immediately resets all state including configuration, + control messages in the control message queue, and all pending + callbacks. + + +

When invoked, run the Reset AudioEncoder algorithm with an + AbortError DOMException.

+
close() +
+ + Immediately aborts all pending work and releases system resources. + Close is final. + + +

When invoked, run the Close AudioEncoder algorithm with an + AbortError DOMException.

+
isConfigSupported(config) +
+ + Returns a promise indicating whether the provided config is supported by + the User Agent. + + +

NOTE: The returned AudioEncoderSupport config + will contain only the dictionary members that User Agent recognized. + Unrecognized dictionary members will be ignored. Authors can detect + unrecognized dictionary members by comparing + config to their provided config.

+

When invoked, run these steps:

+
    +
  1. +

    If config is not a valid AudioEncoderConfig, return +a promise rejected with TypeError.

    +
  2. +

    Let p be a new Promise.

    +
  3. +

    Let checkSupportQueue be the result of starting a new parallel +queue.

    +
  4. +

    Enqueue the following steps to checkSupportQueue:

    +
      +
    1. +

      Let supported be the result of running +the Check Configuration Support algorithm with config.

      +
    2. +

      Queue a task to run the following steps:

      +
        +
      1. +

        Let encoderSupport be a newly constructed +AudioEncoderSupport, initialized as follows:

        +
          +
        1. +

          Set config to the result of running the +Clone Configuration algorithm with config.

          +
        2. +

          Set supported to supported.

          +
        +
      2. +

        Resolve p with encoderSupport.

        +
      +
    +
  5. +

    Return p.

    +
+
+

5.6. Algorithms

+
+
Schedule Dequeue Event + +
+
    +
  1. +

    If [[dequeue event scheduled]] equals true, return.

    +
  2. +

    Assign true to [[dequeue event scheduled]].

    +
  3. +

    Queue a task to run the following steps:

    +
      +
    1. +

      Fire a simple event named dequeue at this.

      +
    2. +

      Assign false to [[dequeue event scheduled]].

      +
    +
+
Output EncodedAudioChunks (with outputs) +
+ + Run these steps: + +
    +
  1. +

    For each output in outputs:

    +
      +
    1. +

      Let chunkInit be an EncodedAudioChunkInit with the following +keys:

      +
        +
      1. +

        Let data contain the encoded audio data +from output.

        +
      2. +

        Let type be the +EncodedAudioChunkType of output.

        +
      3. +

        Let timestamp be the +timestamp from the AudioData associated with +output.

        +
      4. +

        Let duration be the +duration from the AudioData associated with +output.

        +
      +
    2. +

      Let chunk be a new EncodedAudioChunk constructed with +chunkInit.

      +
    3. +

      Let chunkMetadata be a new EncodedAudioChunkMetadata.

      +
    4. +

      Let encoderConfig be the +[[active encoder config]].

      +
    5. +

      Let outputConfig be a new AudioDecoderConfig that describes +output. Initialize outputConfig as follows:

      +
        +
      1. +

        Assign encoderConfig.codec to +outputConfig.codec.

        +
      2. +

        Assign encoderConfig.sampleRate to +outputConfig.sampleRate.

        +
      3. +

        Assign to +encoderConfig.numberOfChannels to +outputConfig.numberOfChannels.

        +
      4. +

        Assign outputConfig.description with a +sequence of codec specific bytes as determined by the +[[codec implementation]]. The User Agent MUST ensure that the provided description +could be used to correctly decode output.

        +

        NOTE: The codec specific requirements for populating the + description are described in the + [WEBCODECS-CODEC-REGISTRY].

        +
      +
    6. +

      If outputConfig and [[active output config]] are +not equal dictionaries:

      +
        +
      1. +

        Assign outputConfig to +chunkMetadata.decoderConfig.

        +
      2. +

        Assign outputConfig to +[[active output config]].

        +
      +
    7. +

      Invoke [[output callback]] with chunk and +chunkMetadata.

      +
    +
+
Reset AudioEncoder (with exception) +
+ + Run these steps: + +
    +
  1. +

    If [[state]] is "closed", throw an InvalidStateError.

    +
  2. +

    Set [[state]] to "unconfigured".

    +
  3. +

    Set [[active encoder config]] to null.

    +
  4. +

    Set [[active output config]] to null.

    +
  5. +

    Signal [[codec implementation]] to cease producing +output for the previous configuration.

    +
  6. +

    Remove all control messages from the +[[control message queue]].

    +
  7. +

    If [[encodeQueueSize]] is greater than zero:

    +
      +
    1. +

      Set [[encodeQueueSize]] to zero.

      +
    2. +

      Run the Schedule Dequeue Event algorithm.

      +
    +
  8. +

    For each promise in [[pending flush promises]]:

    +
      +
    1. +

      Reject promise with exception.

      +
    2. +

      Remove promise from [[pending flush promises]].

      +
    +
+
Close AudioEncoder (with exception) +
+ + Run these steps: + +
    +
  1. +

    Run the Reset AudioEncoder algorithm with exception.

    +
  2. +

    Set [[state]] to "closed".

    +
  3. +

    Clear [[codec implementation]] and release associated +system resources.

    +
  4. +

    If exception is not an AbortError DOMException, +invoke the [[error callback]] with exception.

    +
+
+

5.7. EncodedAudioChunkMetadata

+ +The following metadata dictionary is emitted by the +EncodedAudioChunkOutputCallback alongside an associated +EncodedAudioChunk. + + +
dictionary EncodedAudioChunkMetadata {
+  AudioDecoderConfig decoderConfig;
+};
+
+
+
decoderConfig, of type AudioDecoderConfig +
+

A AudioDecoderConfig that authors MAY use to +decode the associated EncodedAudioChunk.

+
+

6. VideoEncoder Interface

+
[Exposed=(Window,DedicatedWorker), SecureContext]
+interface VideoEncoder : EventTarget {
+  constructor(VideoEncoderInit init);
+
+  readonly attribute CodecState state;
+  readonly attribute unsigned long encodeQueueSize;
+  attribute EventHandler ondequeue;
+
+  undefined configure(VideoEncoderConfig config);
+  undefined encode(VideoFrame frame, optional VideoEncoderEncodeOptions options = {});
+  Promise<undefined> flush();
+  undefined reset();
+  undefined close();
+
+  static Promise<VideoEncoderSupport> isConfigSupported(VideoEncoderConfig config);
+};
+
+dictionary VideoEncoderInit {
+  required EncodedVideoChunkOutputCallback output;
+  required WebCodecsErrorCallback error;
+};
+
+callback EncodedVideoChunkOutputCallback =
+    undefined (EncodedVideoChunk chunk,
+               optional EncodedVideoChunkMetadata metadata = {});
+
+

6.1. Internal Slots

+
+
[[control message queue]] +
+

A queue of control messages to be performed upon this codec +instance. See [[control message queue]].

+
[[message queue blocked]] +
+

A boolean indicating when processing the +[[control message queue]] is blocked by a pending +control message. See [[message queue blocked]].

+
[[codec implementation]] +
+

Underlying encoder implementation provided by the User Agent. See +[[codec implementation]].

+
[[codec work queue]] +
+

A parallel queue used for running parallel steps that reference the +[[codec implementation]]. See [[codec work queue]].

+
[[codec saturated]] +
+

A boolean indicating when the [[codec implementation]] is +unable to accept additional encoding work.

+
[[output callback]] +
+

Callback given at construction for encoded outputs.

+
[[error callback]] +
+

Callback given at construction for encode errors.

+
[[active encoder config]] +
+

The VideoEncoderConfig that is actively applied.

+
[[active output config]] +
+

The VideoDecoderConfig that describes how to decode the most recently +emitted EncodedVideoChunk.

+
[[state]] +
+

The current CodecState of this VideoEncoder.

+
[[encodeQueueSize]] +
+

The number of pending encode requests. This number will decrease as the +underlying codec is ready to accept new input.

+
[[pending flush promises]] +
+

A list of unresolved promises returned by calls to flush().

+
[[dequeue event scheduled]] +
+

A boolean indicating whether a dequeue event is already +scheduled to fire. Used to avoid event spam.

+
[[active orientation]] +
+

An integer and boolean pair indicating the [[flip]] and +[[rotation]] of the first VideoFrame given to +encode() after configure().

+
+

6.2. Constructors

+ + + VideoEncoder(init) + + +
    +
  1. +

    Let e be a new VideoEncoder object.

    +
  2. +

    Assign a new queue to [[control message queue]].

    +
  3. +

    Assign false to [[message queue blocked]].

    +
  4. +

    Assign null to [[codec implementation]].

    +
  5. +

    Assign the result of starting a new parallel queue to +[[codec work queue]].

    +
  6. +

    Assign false to [[codec saturated]].

    +
  7. +

    Assign init.output to [[output callback]].

    +
  8. +

    Assign init.error to [[error callback]].

    +
  9. +

    Assign null to [[active encoder config]].

    +
  10. +

    Assign null to [[active output config]].

    +
  11. +

    Assign "unconfigured" to [[state]]

    +
  12. +

    Assign 0 to [[encodeQueueSize]].

    +
  13. +

    Assign a new list to [[pending flush promises]].

    +
  14. +

    Assign false to [[dequeue event scheduled]].

    +
  15. +

    Return e.

    +
+

6.3. Attributes

+
+
state, of type CodecState, readonly +
+

Returns the value of [[state]].

+
encodeQueueSize, of type unsigned long, readonly +
+

Returns the value of [[encodeQueueSize]].

+
ondequeue, of type EventHandler +
+

An event handler IDL attribute whose event handler event type is +dequeue.

+
+

6.4. Event Summary

+
+
dequeue +
+

Fired at the VideoEncoder when the encodeQueueSize has +decreased.

+
+

6.5. Methods

+
+
configure(config) +
+ + Enqueues a control message to configure the video encoder for + encoding video frames as described by config. + + +

NOTE: This method will trigger a NotSupportedError if the User Agent + does not support config. Authors are encouraged to first check support + by calling isConfigSupported() with config. User + Agents don’t have to support any particular codec type or + configuration.

+

When invoked, run these steps:

+
    +
  1. +

    If config is not a valid VideoEncoderConfig, throw a +TypeError.

    +
  2. +

    If [[state]] is "closed", throw an +InvalidStateError.

    +
  3. +

    Set [[state]] to "configured".

    +
  4. +

    Set [[active orientation]] to null.

    +
  5. +

    Queue a control message to configure the encoder using config.

    +
  6. +

    Process the control message queue.

    +
+

Running a control message to configure the encoder means performing + these steps:

+
    +
  1. +

    Assign true to [[message queue blocked]].

    +
  2. +

    Enqueue the following steps to [[codec work queue]]:

    +
      +
    1. +

      Let supported be the result of running the Check Configuration +Support algorithm with config.

      +
    2. +

      If supported is false, queue a task to run the Close +VideoEncoder algorithm with NotSupportedError and abort +these steps.

      +
    3. +

      If needed, assign [[codec implementation]] with an +implementation supporting config.

      +
    4. +

      Configure [[codec implementation]] with config.

      +
    5. +

      queue a task to run the following steps:

      +
        +
      1. +

        Assign false to [[message queue blocked]].

        +
      2. +

        Queue a task to Process the control message queue.

        +
      +
    +
  3. +

    Return "processed".

    +
+
encode(frame, options) +
+ + Enqueues a control message to encode the given frame. + + +

When invoked, run these steps:

+
    +
  1. +

    If the value of frame’s [[Detached]] internal slot +is true, throw a TypeError.

    +
  2. +

    If [[state]] is not "configured", throw an +InvalidStateError.

    +
  3. +

    If [[active orientation]] is not null and does not match +frame’s [[rotation]] and [[flip]] throw a +DataError.

    +
  4. +

    If [[active orientation]] is null, set it to frame’s +[[rotation]] and [[flip]].

    +
  5. +

    Let frameClone hold the result of running the Clone VideoFrame +algorithm with frame.

    +
  6. +

    Increment [[encodeQueueSize]].

    +
  7. +

    Queue a control message to encode frameClone.

    +
  8. +

    Process the control message queue.

    +
+

Running a control message to encode the frame means performing these + steps:

+
    +
  1. +

    If [[codec saturated]] equals true, return "not processed".

    +
  2. +

    If encoding frame will cause the +[[codec implementation]] to become saturated, +assign true to [[codec saturated]].

    +
  3. +

    Decrement [[encodeQueueSize]] and run the +Schedule Dequeue Event algorithm.

    +
  4. +

    Enqueue the following steps to the [[codec work queue]]:

    +
      +
    1. +

      Attempt to use [[codec implementation]] to encode +the frameClone according to options.

      +
    2. +

      If encoding results in an error, queue a task to run the +Close VideoEncoder algorithm with EncodingError and return.

      +
    3. +

      If [[codec saturated]] equals true and +[[codec implementation]] is no longer +saturated, queue a task to perform the following steps:

      +
        +
      1. +

        Assign false to [[codec saturated]].

        +
      2. +

        Process the control message queue.

        +
      +
    4. +

      Let encoded outputs be a list of encoded video data outputs +emitted by [[codec implementation]].

      +
    5. +

      If encoded outputs is not empty, queue a task to run the +Output EncodedVideoChunks algorithm with encoded outputs.

      +
    +
  5. +

    Return "processed".

    +
+
flush() +
+ + Completes all control messages in the control message queue + and emits all outputs. + + +

When invoked, run these steps:

+
    +
  1. +

    If [[state]] is not "configured", return +a promise rejected with InvalidStateError DOMException.

    +
  2. +

    Let promise be a new Promise.

    +
  3. +

    Append promise to [[pending flush promises]].

    +
  4. +

    Queue a control message to flush the codec with promise.

    +
  5. +

    Process the control message queue.

    +
  6. +

    Return promise.

    +
+

Running a control message to flush the codec means performing these + steps with promise:

+
    +
  1. +

    Enqueue the following steps to the [[codec work queue]]:

    +
      +
    1. +

      Signal [[codec implementation]] to emit all +internal pending outputs.

      +
    2. +

      Let encoded outputs be a list of encoded video data outputs +emitted by [[codec implementation]].

      +
    3. +

      Queue a task to perform these steps:

      +
        +
      1. +

        If encoded outputs is not empty, run the +Output EncodedVideoChunks algorithm with encoded outputs.

        +
      2. +

        Remove promise from +[[pending flush promises]].

        +
      3. +

        Resolve promise.

        +
      +
    +
  2. +

    Return "processed".

    +
+
reset() +
+ + Immediately resets all state including configuration, + control messages in the control message queue, and all pending + callbacks. + + +

When invoked, run the Reset VideoEncoder algorithm with an + AbortError DOMException.

+
close() +
+ + Immediately aborts all pending work and releases system resources. + Close is final. + + +

When invoked, run the Close VideoEncoder algorithm with an + AbortError DOMException.

+
isConfigSupported(config) +
+ + Returns a promise indicating whether the provided config is supported by + the User Agent. + + +

NOTE: The returned VideoEncoderSupport config + will contain only the dictionary members that User Agent recognized. + Unrecognized dictionary members will be ignored. Authors can detect + unrecognized dictionary members by comparing + config to their provided config.

+

When invoked, run these steps:

+
    +
  1. +

    If config is not a valid VideoEncoderConfig, return +a promise rejected with TypeError.

    +
  2. +

    Let p be a new Promise.

    +
  3. +

    Let checkSupportQueue be the result of starting a new parallel +queue.

    +
  4. +

    Enqueue the following steps to checkSupportQueue:

    +
      +
    1. +

      Let supported be the result of running +the Check Configuration Support algorithm with config.

      +
    2. +

      Queue a task to run the following steps:

      +
        +
      1. +

        Let encoderSupport be a newly constructed +VideoEncoderSupport, initialized as follows:

        +
          +
        1. +

          Set config to the result of running the +Clone Configuration algorithm with config.

          +
        2. +

          Set supported to supported.

          +
        +
      +
    3. +

      Resolve p with encoderSupport.

      +
    +
  5. +

    Return p.

    +
+
+

6.6. Algorithms

+
+
Schedule Dequeue Event + +
+
    +
  1. +

    If [[dequeue event scheduled]] equals true, return.

    +
  2. +

    Assign true to [[dequeue event scheduled]].

    +
  3. +

    Queue a task to run the following steps:

    +
      +
    1. +

      Fire a simple event named dequeue at this.

      +
    2. +

      Assign false to [[dequeue event scheduled]].

      +
    +
+
Output EncodedVideoChunks (with outputs) +
+ + Run these steps: + +
    +
  1. +

    For each output in outputs:

    +
      +
    1. +

      Let chunkInit be an EncodedVideoChunkInit with the following +keys:

      +
        +
      1. +

        Let data contain the encoded video data +from output.

        +
      2. +

        Let type be the +EncodedVideoChunkType of output.

        +
      3. +

        Let timestamp be the +[[timestamp]] from the VideoFrame +associated with output.

        +
      4. +

        Let duration be the +[[duration]] from the VideoFrame associated +with output.

        +
      +
    2. +

      Let chunk be a new EncodedVideoChunk constructed with +chunkInit.

      +
    3. +

      Let chunkMetadata be a new EncodedVideoChunkMetadata.

      +
    4. +

      Let encoderConfig be the +[[active encoder config]].

      +
    5. +

      Let outputConfig be a VideoDecoderConfig that describes +output. Initialize outputConfig as follows:

      +
        +
      1. +

        Assign encoderConfig.codec to outputConfig.codec.

        +
      2. +

        Assign encoderConfig.width to +outputConfig.codedWidth.

        +
      3. +

        Assign encoderConfig.height to +outputConfig.codedHeight.

        +
      4. +

        Assign encoderConfig.displayWidth to +outputConfig.displayAspectWidth.

        +
      5. +

        Assign encoderConfig.displayHeight to +outputConfig.displayAspectHeight.

        +
      6. +

        Assign [[rotation]] from the VideoFrame +associated with output to outputConfig.rotation.

        +
      7. +

        Assign [[flip]] from the VideoFrame +associated with output to outputConfig.flip.

        +
      8. +

        Assign the remaining keys of outputConfig as determined by +[[codec implementation]]. The User Agent +MUST ensure that the configuration is +completely described such that outputConfig could be used to +correctly decode output.

        +

        NOTE: The codec specific requirements for populating the + description are described in the + [WEBCODECS-CODEC-REGISTRY].

        +
      +
    6. +

      If outputConfig and [[active output config]] are +not equal dictionaries:

      +
        +
      1. +

        Assign outputConfig to +chunkMetadata.decoderConfig.

        +
      2. +

        Assign outputConfig to +[[active output config]].

        +
      +
    7. +

      If encoderConfig.scalabilityMode +describes multiple temporal layers:

      +
        +
      1. +

        Let svc be a new SvcOutputMetadata instance.

        +
      2. +

        Let temporal_layer_id be the zero-based index describing the +temporal layer for output.

        +
      3. +

        Assign temporal_layer_id to +svc.temporalLayerId.

        +
      4. +

        Assign svc to +chunkMetadata.svc.

        +
      +
    8. +

      If encoderConfig.alpha is set to "keep":

      +
        +
      1. +

        Let alphaSideData be the encoded alpha data in output.

        +
      2. +

        Assign alphaSideData to +chunkMetadata.alphaSideData.

        +
      +
    9. +

      Invoke [[output callback]] with chunk and +chunkMetadata.

      +
    +
+
Reset VideoEncoder (with exception) +
+ + Run these steps: + +
    +
  1. +

    If [[state]] is "closed", throw an InvalidStateError.

    +
  2. +

    Set [[state]] to "unconfigured".

    +
  3. +

    Set [[active encoder config]] to null.

    +
  4. +

    Set [[active output config]] to null.

    +
  5. +

    Signal [[codec implementation]] to cease producing +output for the previous configuration.

    +
  6. +

    Remove all control messages from the +[[control message queue]].

    +
  7. +

    If [[encodeQueueSize]] is greater than zero:

    +
      +
    1. +

      Set [[encodeQueueSize]] to zero.

      +
    2. +

      Run the Schedule Dequeue Event algorithm.

      +
    +
  8. +

    For each promise in [[pending flush promises]]:

    +
      +
    1. +

      Reject promise with exception.

      +
    2. +

      Remove promise from [[pending flush promises]].

      +
    +
+
Close VideoEncoder (with exception) +
+ + Run these steps: + +
    +
  1. +

    Run the Reset VideoEncoder algorithm with exception.

    +
  2. +

    Set [[state]] to "closed".

    +
  3. +

    Clear [[codec implementation]] and release associated +system resources.

    +
  4. +

    If exception is not an AbortError DOMException, +invoke the [[error callback]] with exception.

    +
+
+

6.7. EncodedVideoChunkMetadata

+ +The following metadata dictionary is emitted by the +EncodedVideoChunkOutputCallback alongside an associated +EncodedVideoChunk. + + +
dictionary EncodedVideoChunkMetadata {
+  VideoDecoderConfig decoderConfig;
+  SvcOutputMetadata svc;
+  BufferSource alphaSideData;
+};
+
+dictionary SvcOutputMetadata {
+  unsigned long temporalLayerId;
+};
+
+
+
decoderConfig, of type VideoDecoderConfig +
+

A VideoDecoderConfig that authors MAY use to +decode the associated EncodedVideoChunk.

+
svc, of type SvcOutputMetadata +
+

A collection of metadata describing this EncodedVideoChunk with respect +to the configured scalabilityMode.

+
alphaSideData, of type BufferSource +
+

A BufferSource that contains the EncodedVideoChunk’s extra alpha +channel data.

+
temporalLayerId, of type unsigned long +
+

A number that identifies the temporal layer for the associated +EncodedVideoChunk.

+
+

7. Configurations

+

7.1. Check Configuration Support (with config)

+ +Run these steps: + +
    +
  1. +

    If the codec string in config.codec is not a +valid codec string or is otherwise unrecognized by the User Agent, +return false.

    +
  2. +

    If config is an AudioDecoderConfig or VideoDecoderConfig and the +User Agent can’t provide a codec that can decode the exact profile +(where present), level (where present), and constraint bits (where present) +indicated by the codec string in config.codec, return false.

    +
  3. +

    If config is an AudioEncoderConfig or VideoEncoderConfig:

    +
      +
    1. +

      If the codec string in config.codec contains a profile and the +User Agent can’t provide a codec that can encode the exact +profile indicated by config.codec, return false.

      +
    2. +

      If the codec string in config.codec contains a level and the +User Agent can’t provide a codec that can encode to a level less +than or equal to the level indicated by config.codec, return false.

      +
    3. +

      If the codec string in config.codec contains constraint bits and +the User Agent can’t provide a codec that can produce an encoded +bitstream at least as constrained as indicated by config.codec, return +false.

      +
    +
  4. +

    If the User Agent can provide a codec to support all entries of the +config, including applicable default values for keys that are not +included, return true.

    +

    NOTE: The types AudioDecoderConfig, VideoDecoderConfig, + AudioEncoderConfig, and VideoEncoderConfig each define their + respective configuration entries and defaults.

    +

    NOTE: Support for a given configuration can change dynamically if the + hardware is altered (e.g. external GPU unplugged) or if essential + hardware resources are exhausted. User Agents describe support on + a best-effort basis given the resources that are available at the time + of the query.

    +
  5. +

    Otherwise, return false.

    +
+

7.2. Clone Configuration (with config)

+

NOTE: This algorithm will copy only the dictionary members that the User Agent + recognizes as part of the dictionary type.

+

Run these steps:

+
    +
  1. +

    Let dictType be the type of dictionary config.

    +
  2. +

    Let clone be a new empty instance of dictType.

    +
  3. +

    For each dictionary member m defined on dictType:

    +
      +
    1. +

      If m does not exist in config, then continue.

      +
    2. +

      If config[m] is a nested dictionary, set clone[m] to the result of +recursively running the Clone Configuration algorithm with +config[m].

      +
    3. +

      Otherwise, assign a copy of config[m] to clone[m].

      +
    +
+

Note: This implements a "deep-copy". These configuration objects are +frequently used as the input of asynchronous operations. Copying means that +modifying the original object while the operation is in flight won’t change the +operation’s outcome.

+

7.3. Signalling Configuration Support

+

7.3.1. AudioDecoderSupport

+
dictionary AudioDecoderSupport {
+  boolean supported;
+  AudioDecoderConfig config;
+};
+
+
+
supported, of type boolean +
+ A boolean indicating the whether the corresponding + config is supported by the User Agent. + +
config, of type AudioDecoderConfig +
+ An AudioDecoderConfig used by the User Agent in determining the value of + supported. + +
+

7.3.2. VideoDecoderSupport

+
dictionary VideoDecoderSupport {
+  boolean supported;
+  VideoDecoderConfig config;
+};
+
+
+
supported, of type boolean +
+ A boolean indicating the whether the corresponding + config is supported by the User Agent. + +
config, of type VideoDecoderConfig +
+ A VideoDecoderConfig used by the User Agent in determining the value of + supported. + +
+

7.3.3. AudioEncoderSupport

+
dictionary AudioEncoderSupport {
+  boolean supported;
+  AudioEncoderConfig config;
+};
+
+
+
supported, of type boolean +
+ A boolean indicating the whether the corresponding + config is supported by the User Agent. + +
config, of type AudioEncoderConfig +
+ An AudioEncoderConfig used by the User Agent in determining the value of + supported. + +
+

7.3.4. VideoEncoderSupport

+
dictionary VideoEncoderSupport {
+  boolean supported;
+  VideoEncoderConfig config;
+};
+
+
+
supported, of type boolean +
+ A boolean indicating the whether the corresponding + config is supported by the User Agent. + +
config, of type VideoEncoderConfig +
+ A VideoEncoderConfig used by the User Agent in determining the value of + supported. + +
+

7.4. Codec String

+ +A codec string describes a given codec format to be used for encoding or +decoding. + + +

A valid codec string MUST meet the following +conditions.

+
    +
  1. +

    Is valid per the relevant codec specification (see examples below).

    +
  2. +

    It describes a single codec.

    +
  3. +

    It is unambiguous about codec profile, level, and constraint bits for codecs +that define these concepts.

    +
+

NOTE: In other media specifications, codec strings historically accompanied a + MIME type as the "codecs=" parameter + (isTypeSupported(), canPlayType()) + [RFC6381]. In this specification, encoded media is not containerized; + hence, only the value of the codecs parameter is accepted.

+

NOTE: Encoders for codecs that define level and constraint bits have flexibility + around these parameters, but won’t produce bitstreams that have a higher + level or are less constrained than requested.

+

The format and semantics for codec strings are defined by codec registrations +listed in the [WEBCODECS-CODEC-REGISTRY]. A compliant implementation MAY support any combination of codec registrations or +none at all.

+

7.5. AudioDecoderConfig

+
dictionary AudioDecoderConfig {
+  required DOMString codec;
+  [EnforceRange] required unsigned long sampleRate;
+  [EnforceRange] required unsigned long numberOfChannels;
+  AllowSharedBufferSource description;
+};
+
+

To check if an AudioDecoderConfig is a valid AudioDecoderConfig, + run these steps:

+
    +
  1. +

    If codec is empty after +stripping leading and trailing ASCII whitespace, +return false.

    +
  2. +

    If description is [detached], return + false.

    +
  3. +

    Return true.

    +
+
+
codec, of type DOMString +
Contains a codec string in config.codec describing the codec. +
sampleRate, of type unsigned long +
The number of frame samples per second. +
numberOfChannels, of type unsigned long +
The number of audio channels. +
description, of type AllowSharedBufferSource +
+ + A sequence of codec specific bytes, commonly known as extradata. + + +

NOTE: The registrations in the [WEBCODECS-CODEC-REGISTRY] describe whether/how to + populate this sequence, corresponding to the provided + codec.

+
+

7.6. VideoDecoderConfig

+
dictionary VideoDecoderConfig {
+  required DOMString codec;
+  AllowSharedBufferSource description;
+  [EnforceRange] unsigned long codedWidth;
+  [EnforceRange] unsigned long codedHeight;
+  [EnforceRange] unsigned long displayAspectWidth;
+  [EnforceRange] unsigned long displayAspectHeight;
+  VideoColorSpaceInit colorSpace;
+  HardwareAcceleration hardwareAcceleration = "no-preference";
+  boolean optimizeForLatency;
+  double rotation = 0;
+  boolean flip = false;
+};
+
+

To check if a VideoDecoderConfig is a valid VideoDecoderConfig, +run these steps:

+
    +
  1. +

    If codec is empty after +stripping leading and trailing ASCII whitespace, +return false.

    +
  2. +

    If one of codedWidth or +codedHeight is provided but the other isn’t, +return false.

    +
  3. +

    If codedWidth = 0 or +codedHeight = 0, return false.

    +
  4. +

    If one of displayAspectWidth or +displayAspectHeight is provided but the other isn’t, +return false.

    +
  5. +

    If displayAspectWidth = 0 or +displayAspectHeight = 0, return false.

    +
  6. +

    If description is [detached], +return false.

    +
  7. +

    Return true.

    +
+
+
codec, of type DOMString +
Contains a codec string describing the codec. +
description, of type AllowSharedBufferSource +
+ + A sequence of codec specific bytes, commonly known as extradata. + + +

NOTE: The registrations in the [WEBCODECS-CODEC-REGISTRY] describes + whether/how to populate this sequence, corresponding to the provided + codec.

+
codedWidth, of type unsigned long +
+ Width of the VideoFrame in pixels, potentially including non-visible + padding, and prior to considering potential ratio adjustments. + +
codedHeight, of type unsigned long +
+ + Height of the VideoFrame in pixels, potentially including non-visible + padding, and prior to considering potential ratio adjustments. + + +

NOTE: codedWidth and codedHeight + are used when selecting a [[codec implementation]].

+
displayAspectWidth, of type unsigned long +
+ Horizontal dimension of the VideoFrame’s aspect ratio when displayed. + +
displayAspectHeight, of type unsigned long +
+ + Vertical dimension of the VideoFrame’s aspect ratio when displayed. + + +

NOTE: displayWidth and displayHeight can both be + different from displayAspectWidth and + displayAspectHeight, but have identical ratios, after scaling is applied when creating the + video frame.

+
colorSpace, of type VideoColorSpaceInit +
+ Configures the VideoFrame.colorSpace for VideoFrames + associated with this VideoDecoderConfig. If + colorSpace exists, the provided values will + override any in-band values from the bitsream. + +
hardwareAcceleration, of type HardwareAcceleration, defaulting to "no-preference" +
+ Hint that configures hardware acceleration for this codec. See + HardwareAcceleration. + +
optimizeForLatency, of type boolean +
+ + Hint that the selected decoder SHOULD be configured + to minimize the number of EncodedVideoChunks that have to be decoded + before a VideoFrame is output. + + +

NOTE: In addition to User Agent and hardware limitations, some codec + bitstreams require a minimum number of inputs before any output can be + produced.

+
rotation, of type double, defaulting to 0 +
+ Sets the rotation attribute on decoded frames. + +
flip, of type boolean, defaulting to false +
+ Sets the flip attribute on decoded frames. + +
+

7.7. AudioEncoderConfig

+
dictionary AudioEncoderConfig {
+  required DOMString codec;
+  [EnforceRange] required unsigned long sampleRate;
+  [EnforceRange] required unsigned long numberOfChannels;
+  [EnforceRange] unsigned long long bitrate;
+  BitrateMode bitrateMode = "variable";
+};
+
+

NOTE: Codec-specific extensions to AudioEncoderConfig are described in + their registrations in the [WEBCODECS-CODEC-REGISTRY].

+

To check if an AudioEncoderConfig is a valid AudioEncoderConfig, +run these steps:

+
    +
  1. +

    If codec is empty after +stripping leading and trailing ASCII whitespace, +return false.

    +
  2. +

    If the AudioEncoderConfig has a codec-specific extension and the corresponding +registration in the [WEBCODECS-CODEC-REGISTRY] defines steps to check whether +the extension is a valid extension, return the result of running those steps.

    +
  3. +

    If sampleRate or numberOfChannels are + equal to zero, return false.

    +
  4. +

    Return true.

    +
+
+
codec, of type DOMString +
Contains a codec string describing the codec. +
sampleRate, of type unsigned long +
The number of frame samples per second. +
numberOfChannels, of type unsigned long +
The number of audio channels. +
bitrate, of type unsigned long long +
+ The average bitrate of the encoded audio given in units of bits per second. + +
bitrateMode, of type BitrateMode, defaulting to "variable" +
+ + Configures the encoder to use a constant or + variable bitrate as defined by [MEDIASTREAM-RECORDING]. + + +

NOTE: Not all audio codecs support specific BitrateModes, Authors are + encouraged to check by calling isConfigSupported() with + config.

+
+

7.8. VideoEncoderConfig

+
dictionary VideoEncoderConfig {
+  required DOMString codec;
+  [EnforceRange] required unsigned long width;
+  [EnforceRange] required unsigned long height;
+  [EnforceRange] unsigned long displayWidth;
+  [EnforceRange] unsigned long displayHeight;
+  [EnforceRange] unsigned long long bitrate;
+  double framerate;
+  HardwareAcceleration hardwareAcceleration = "no-preference";
+  AlphaOption alpha = "discard";
+  DOMString scalabilityMode;
+  VideoEncoderBitrateMode bitrateMode = "variable";
+  LatencyMode latencyMode = "quality";
+  DOMString contentHint;
+};
+
+

NOTE: Codec-specific extensions to VideoEncoderConfig are described in their + registrations in the [WEBCODECS-CODEC-REGISTRY].

+

To check if a VideoEncoderConfig is a valid VideoEncoderConfig, + run these steps:

+
    +
  1. +

    If codec is empty after +stripping leading and trailing ASCII whitespace, +return false.

    +
  2. +

    If width = 0 or height += 0, return false.

    +
  3. +

    If displayWidth = 0 or +displayHeight = 0, return false.

    +
  4. +

    Return true.

    +
+
+
codec, of type DOMString +
Contains a codec string in config.codec describing the codec. +
width, of type unsigned long +
+ + The encoded width of output EncodedVideoChunks in pixels, prior to any + display aspect ratio adjustments. + + +

The encoder MUST scale any VideoFrame whose + [[visible width]] differs from this value.

+
height, of type unsigned long +
+ + The encoded height of output EncodedVideoChunks in pixels, prior to any + display aspect ratio adjustments. + + +

The encoder MUST scale any VideoFrame whose + [[visible height]] differs from this value.

+
+
+
displayWidth, of type unsigned long +
+ The intended display width of output EncodedVideoChunks in pixels. + Defaults to width if not present. + +
displayHeight, of type unsigned long +
+ The intended display height of output EncodedVideoChunks in pixels. + Defaults to width if not present. + +
+
+ + NOTE: Providing a displayWidth or + displayHeight that differs from + width and height signals + that chunks are to be scaled after decoding to arrive at the final + display aspect ratio. + + +

For many codecs this is merely pass-through information, but some codecs + can sometimes include display sizing in the bitstream.

+
+
+
bitrate, of type unsigned long long +
+ + The average bitrate of the encoded video given in units of bits per second. + + +

NOTE: Authors are encouraged to additionally provide a + framerate to inform rate control.

+
framerate, of type double +
+ The expected frame rate in frames per second, if known. This value, along + with the frame timestamp, SHOULD be + used by the video encoder to calculate the optimal byte length for each + encoded frame. Additionally, the value SHOULD be + considered a target deadline for outputting encoding chunks when + latencyMode is set to realtime. + +
hardwareAcceleration, of type HardwareAcceleration, defaulting to "no-preference" +
+ Hint that configures hardware acceleration for this codec. See + HardwareAcceleration. + +
alpha, of type AlphaOption, defaulting to "discard" +
+ Whether the alpha component of the VideoFrame inputs SHOULD be kept or discarded prior to encoding. If + alpha is equal to discard, alpha data + is always discarded, regardless of a VideoFrame’s + [[format]]. + +
scalabilityMode, of type DOMString +
+ An encoding scalability mode identifier as defined by [WebRTC-SVC]. + +
bitrateMode, of type VideoEncoderBitrateMode, defaulting to "variable" +
+ + Configures encoding to use one of the rate control modes specified by + VideoEncoderBitrateMode. + + +

NOTE: The precise degree of bitrate fluctuation in either mode is + implementation defined.

+
latencyMode, of type LatencyMode, defaulting to "quality" +
+ Configures latency related behaviors for this codec. See LatencyMode. + +
contentHint, of type DOMString +
+ + An encoding video content hint as defined by [mst-content-hint]. + + +

The User Agent MAY use this hint to set + expectations about incoming VideoFrames and to improve encoding quality. + If using this hint:

+
    +
  • +

    The User Agent MUST respect other explicitly +set encoding options when configuring the encoder, whether they are +codec-specific encoding options or not.

    +
  • +

    The User Agent SHOULD make a best-effort +attempt to use additional configuration options to improve encoding +quality, according to the goals defined by the corresponding +video content hint.

    +
+

NOTE: Some encoder options are implementation specific, and mappings between + contentHint and those options cannot be + prescribed.

+

The User Agent MUST NOT refuse the configuration + if it doesn’t support this content hint. + See isConfigSupported().

+
+

7.9. Hardware Acceleration

+
enum HardwareAcceleration {
+  "no-preference",
+  "prefer-hardware",
+  "prefer-software",
+};
+
+

When supported, hardware acceleration offloads encoding or decoding to +specialized hardware. prefer-hardware and +prefer-software are hints. While User Agents +SHOULD respect these values when possible, User +Agents may ignore these values in some or all circumstances for any reason.

+

To prevent fingerprinting, if a User Agent implements [media-capabilities], +the User Agent MUST ensure rejection or acceptance of +a given HardwareAcceleration preference reveals no additional information +on top of what is inherent to the User Agent and revealed by +[media-capabilities]. If a User Agent does not implement +[media-capabilities] for reasons of fingerprinting, they +SHOULD ignore the HardwareAcceleration preference.

+
+ + NOTE: Good examples of when a User Agent can ignore + prefer-hardware or + prefer-software are for reasons of user privacy or + circumstances where the User Agent determines an alternative setting would + better serve the end user. + + +

Most authors will be best served by using the default of + no-preference. This gives the User Agent flexibility + to optimize based on its knowledge of the system and configuration. A common + strategy will be to prioritize hardware acceleration at higher resolutions + with a fallback to software codecs if hardware acceleration fails.

+

Authors are encouraged to carefully weigh the tradeoffs when setting a + hardware acceleration preference. The precise tradeoffs will be + device-specific, but authors can generally expect the following:

+
    +
  • +

    Setting a value of prefer-hardware or + prefer-software can significantly restrict what + configurations are supported. It can occur that the user’s device does not + offer acceleration for any codec, or only for the most common profiles of + older codecs. It can also occur that a given User Agent lacks a software + based codec implementation.

    +
  • +

    Hardware acceleration does not simply imply faster encoding / decoding. + Hardware acceleration often has higher startup latency but more consistent + throughput performance. Acceleration will generally reduce CPU load.

    +
  • +

    For decoding, hardware acceleration is often less robust to inputs that are + mislabeled or violate the relevant codec specification.

    +
  • +

    Hardware acceleration will often be more power efficient than purely + software based codecs.

    +
  • +

    For lower resolution content, the overhead added by hardware acceleration + can yield decreased performance and power efficiency compared to purely + software based codecs.

    +
+

Given these tradeoffs, a good example of using "prefer-hardware" would be if + an author intends to provide their own software based fallback via + WebAssembly.

+

Alternatively, a good example of using "prefer-software" would be if an author + is especially sensitive to the higher startup latency or decreased robustness + generally associated with hardware acceleration.

+
+
+
no-preference +
+ Indicates that the User Agent MAY use hardware + acceleration if it is available and compatible with other aspects of the + codec configuration. + +
prefer-software +
+ + Indicates that the User Agent SHOULD prefer a + software codec implementation. User Agents may ignore this value for any + reason. + + +

NOTE: This can cause the configuration to be unsupported on platforms where + an unaccelerated codec is unavailable or is incompatible with other aspects + of the codec configuration.

+
prefer-hardware +
+ + Indicates that the User Agent SHOULD prefer + hardware acceleration. User Agents may ignore this value for any reason. + + +

NOTE: This can cause the configuration to be unsupported on platforms where + an accelerated codec is unavailable or is incompatible with other aspects of + the codec configuration.

+
+

7.10. Alpha Option

+
enum AlphaOption {
+  "keep",
+  "discard",
+};
+
+

Describes how the user agent SHOULD behave when dealing +with alpha channels, for a variety of different operations.

+
+
keep +
+ Indicates that the user agent SHOULD preserve alpha + channel data for VideoFrames, if it is present. + +
discard +
+ Indicates that the user agent SHOULD ignore or + remove VideoFrame’s alpha channel data. + +
+

7.11. Latency Mode

+
enum LatencyMode {
+  "quality",
+  "realtime"
+};
+
+
+
quality +
+

Indicates that the User Agent SHOULD optimize for +encoding quality. In this mode:

+
    +
  • +

    User Agents MAY increase encoding latency to +improve quality.

    +
  • +

    User Agents MUST not drop frames to achieve the +target bitrate and/or +framerate.

    +
  • +

    framerate SHOULD not be +used as a target deadline for emitting encoded chunks.

    +
+
realtime +
+

Indicates that the User Agent SHOULD optimize for +low latency. In this +mode:

+
    +
  • +

    User Agents MAY sacrifice quality to improve +latency.

    +
  • +

    User Agents MAY drop frames to achieve the target +bitrate and/or framerate.

    +
  • +

    framerate SHOULD be used +as a target deadline for emitting encoded chunks.

    +
+
+

7.12. Configuration Equivalence

+ +Two dictionaries are equal dictionaries if they contain the same +keys and values. For nested dictionaries, apply this definition recursively. + + + +

7.13. VideoEncoderEncodeOptions

+
dictionary VideoEncoderEncodeOptions {
+  boolean keyFrame = false;
+};
+
+

NOTE: Codec-specific extensions to VideoEncoderEncodeOptions are described in + their registrations in the [WEBCODECS-CODEC-REGISTRY].

+
+
keyFrame, of type boolean, defaulting to false +
+ A value of true indicates that the given frame + MUST be encoded as a key frame. A value of false + indicates that the User Agent has flexibility to decide whether the frame + will be encoded as a key frame. + +
+

7.14. VideoEncoderBitrateMode

+
enum VideoEncoderBitrateMode {
+  "constant",
+  "variable",
+  "quantizer"
+};
+
+
+
constant +
Encode at a constant bitrate. See bitrate. +
variable +
+ Encode using a variable bitrate, allowing more space to be used for + complex signals and less space for less complex signals. + See bitrate. + +
quantizer +
+ Encode using a quantizer, that is specified for each video + frame in codec specific extensions of VideoEncoderEncodeOptions. + +
+

7.15. CodecState

+
enum CodecState {
+  "unconfigured",
+  "configured",
+  "closed"
+};
+
+
+
unconfigured +
The codec is not configured for encoding or decoding. +
configured +
+ A valid configuration has been provided. The codec is ready for encoding or + decoding. + +
closed +
+ The codec is no longer usable and underlying system resources have + been released. + +
+

7.16. WebCodecsErrorCallback

+
callback WebCodecsErrorCallback = undefined(DOMException error);
+
+

8. Encoded Media Interfaces (Chunks)

+ +These interfaces represent chunks of encoded media. + + +

8.1. EncodedAudioChunk Interface

+
[Exposed=(Window,DedicatedWorker), Serializable]
+interface EncodedAudioChunk {
+  constructor(EncodedAudioChunkInit init);
+  readonly attribute EncodedAudioChunkType type;
+  readonly attribute long long timestamp;          // microseconds
+  readonly attribute unsigned long long? duration; // microseconds
+  readonly attribute unsigned long byteLength;
+
+  undefined copyTo(AllowSharedBufferSource destination);
+};
+
+dictionary EncodedAudioChunkInit {
+  required EncodedAudioChunkType type;
+  [EnforceRange] required long long timestamp;    // microseconds
+  [EnforceRange] unsigned long long duration;     // microseconds
+  required AllowSharedBufferSource data;
+  sequence<ArrayBuffer> transfer = [];
+};
+
+enum EncodedAudioChunkType {
+    "key",
+    "delta",
+};
+
+

8.1.1. Internal Slots

+
+
[[internal data]] +
+

An array of bytes representing the encoded chunk data.

+
[[type]] +
+

Describes whether the chunk is a key chunk.

+
[[timestamp]] +
+

The presentation timestamp of this EncodedAudioChunk.

+
[[duration]] +
+

The presentation duration of this EncodedAudioChunk.

+
[[byte length]] +
+

The byte length of [[internal data]].

+
+

8.1.2. Constructors

+ + + EncodedAudioChunk(init) + + +
    +
  1. +

    If init.transfer contains more than one reference + to the same ArrayBuffer, then throw a DataCloneError DOMException.

    +
  2. +

    For each transferable in init.transfer:

    +
      +
    1. +

      If [[Detached]] internal slot is true, + then throw a DataCloneError DOMException.

      +
    +
  3. +

    Let chunk be a new EncodedAudioChunk object, initialized as follows

    +
      +
    1. +

      Assign init.type to [[type]].

      +
    2. +

      Assign init.timestamp to [[timestamp]].

      +
    3. +

      If init.duration exists, assign it to +[[duration]], or assign null otherwise.

      +
    4. +

      Assign init.data.byteLength to [[byte length]];

      +
    5. +

      If init.transfer contains an ArrayBuffer +referenced by init.data the User Agent +MAY choose to:

      +
        +
      1. +

        Let resource be a new media resource referencing sample data + in init.data.

        +
      +
    6. +

      Otherwise:

      +
        +
      1. +

        Assign a copy of init.data + to [[internal data]].

        +
      +
    +
  4. +

    For each transferable in init.transfer:

    +
      +
    1. +

      Perform DetachArrayBuffer +on transferable

      +
    +
  5. +

    Return chunk.

    +
+

8.1.3. Attributes

+
+
type, of type EncodedAudioChunkType, readonly +
+

Returns the value of [[type]].

+
timestamp, of type long long, readonly +
+

Returns the presentation timestamp of this EncodedAudioChunk.

+
duration, of type unsigned long long, readonly, nullable +
+

Returns the presentation duration of this EncodedAudioChunk.

+
byteLength, of type unsigned long, readonly +
+

Returns the value of [[byte length]].

+
+

8.1.4. Methods

+
+
copyTo(destination) +
+

When invoked, run these steps:

+
    +
  1. +

    If the [[byte length]] of this EncodedAudioChunk is +greater than in destination, throw a TypeError.

    +
  2. +

    Copy the [[internal data]] into destination.

    +
+
+

8.1.5. Serialization

+
+
The EncodedAudioChunk serialization steps (with value, serialized, +and forStorage) are: +
+
    +
  1. +

    If forStorage is true, throw a DataCloneError.

    +
  2. +

    For each EncodedAudioChunk internal slot in value, assign the value +of each internal slot to a field in serialized with the same name as +the internal slot.

    +
+
The EncodedAudioChunk deserialization steps (with serialized and +value) are: +
+
    +
  1. +

    For all named fields in serialized, assign the value of each named field +to the EncodedAudioChunk internal slot in value with the same name +as the named field.

    +
+
+

NOTE: Since EncodedAudioChunks are immutable, User + Agents can choose to implement serialization using a reference counting + model similar to § 9.2.6 Transfer and Serialization.

+

8.2. EncodedVideoChunk Interface

+
[Exposed=(Window,DedicatedWorker), Serializable]
+interface EncodedVideoChunk {
+  constructor(EncodedVideoChunkInit init);
+  readonly attribute EncodedVideoChunkType type;
+  readonly attribute long long timestamp;             // microseconds
+  readonly attribute unsigned long long? duration;    // microseconds
+  readonly attribute unsigned long byteLength;
+
+  undefined copyTo(AllowSharedBufferSource destination);
+};
+
+dictionary EncodedVideoChunkInit {
+  required EncodedVideoChunkType type;
+  [EnforceRange] required long long timestamp;        // microseconds
+  [EnforceRange] unsigned long long duration;         // microseconds
+  required AllowSharedBufferSource data;
+  sequence<ArrayBuffer> transfer = [];
+};
+
+enum EncodedVideoChunkType {
+    "key",
+    "delta",
+};
+
+

8.2.1. Internal Slots

+
+
[[internal data]] +
+

An array of bytes representing the encoded chunk data.

+
[[type]] +
+

The EncodedVideoChunkType of this EncodedVideoChunk;

+
[[timestamp]] +
+

The presentation timestamp of this EncodedVideoChunk.

+
[[duration]] +
+

The presentation duration of this EncodedVideoChunk.

+
[[byte length]] +
+

The byte length of [[internal data]].

+
+

8.2.2. Constructors

+ + + EncodedVideoChunk(init) + + +
    +
  1. +

    If init.transfer contains more than one reference + to the same ArrayBuffer, then throw a DataCloneError DOMException.

    +
  2. +

    For each transferable in init.transfer:

    +
      +
    1. +

      If [[Detached]] internal slot is true, + then throw a DataCloneError DOMException.

      +
    +
  3. +

    Let chunk be a new EncodedVideoChunk object, initialized as follows

    +
      +
    1. +

      Assign init.type to [[type]].

      +
    2. +

      Assign init.timestamp to [[timestamp]].

      +
    3. +

      If duration is present in init, assign init.duration to +[[duration]]. Otherwise, assign null to +[[duration]].

      +
    4. +

      Assign init.data.byteLength to [[byte length]];

      +
    5. +

      If init.transfer contains an ArrayBuffer +referenced by init.data the User Agent +MAY choose to:

      +
        +
      1. +

        Let resource be a new media resource referencing sample data + in init.data.

        +
      +
    6. +

      Otherwise:

      +
        +
      1. +

        Assign a copy of init.data + to [[internal data]].

        +
      +
    +
  4. +

    For each transferable in init.transfer:

    +
      +
    1. +

      Perform DetachArrayBuffer +on transferable

      +
    +
  5. +

    Return chunk.

    +
+

8.2.3. Attributes

+
+
type, of type EncodedVideoChunkType, readonly +
+

Returns the value of [[type]].

+
timestamp, of type long long, readonly +
+

Returns the presentation timestamp of this EncodedVideoChunk.

+
duration, of type unsigned long long, readonly, nullable +
+

Returns the presentation duration of this EncodedVideoChunk.

+
byteLength, of type unsigned long, readonly +
+

Returns the value of [[byte length]].

+
+

8.2.4. Methods

+
+
copyTo(destination) +
+

When invoked, run these steps:

+
    +
  1. +

    If [[byte length]] is greater than +the [[byte length]] of destination, throw a +TypeError.

    +
  2. +

    Copy the [[internal data]] into destination.

    +
+
+

8.2.5. Serialization

+
+
The EncodedVideoChunk serialization steps (with value, serialized, +and forStorage) are: +
+
    +
  1. +

    If forStorage is true, throw a DataCloneError.

    +
  2. +

    For each EncodedVideoChunk internal slot in value, assign the value +of each internal slot to a field in serialized with the same name as +the internal slot.

    +
+
The EncodedVideoChunk deserialization steps (with serialized and +value) are: +
+
    +
  1. +

    For all named fields in serialized, assign the value of each named field +to the EncodedVideoChunk internal slot in value with the same name +as the named field.

    +
+
+

NOTE: Since EncodedVideoChunks are immutable, User + Agents can choose to implement serialization using a reference counting + model similar to § 9.4.7 Transfer and Serialization.

+

9. Raw Media Interfaces

+ +These interfaces represent unencoded (raw) media. + + +

9.1. Memory Model

+

9.1.1. Background

+

This section is non-normative.

+

Decoded media data MAY occupy a large amount of system +memory. To minimize the need for expensive copies, this specification defines a +scheme for reference counting (clone() and close()).

+

NOTE: Authors are encouraged to call close() immediately when frames are + no longer needed.

+

9.1.2. Reference Counting

+

A media resource is storage for the actual pixel data or the audio +sample data described by a VideoFrame or AudioData.

+

The AudioData [[resource reference]] and VideoFrame +[[resource reference]] internal slots hold a reference to a +media resource.

+

VideoFrame.clone() and +AudioData.clone() return new objects whose +[[resource reference]] points to the same media resource as the original +object.

+

VideoFrame.close() and AudioData.close() +will clear their [[resource reference]] slot, releasing the reference their +media resource.

+

A media resource MUST remain alive at least as long +as it continues to be referenced by a [[resource reference]].

+

NOTE: When a media resource is no longer referenced by a + [[resource reference]], the resource can be destroyed. User Agents are + encouraged to destroy such resources quickly to reduce memory pressure and + facilitate resource reuse.

+

9.1.3. Transfer and Serialization

+

This section is non-normative.

+

AudioData and VideoFrame are both +transferable and +serializable objects. Their transfer and +serialization steps are defined in § 9.2.6 Transfer and Serialization and +§ 9.4.7 Transfer and Serialization respectively.

+

Transferring an AudioData or VideoFrame moves its [[resource reference]] to the destination object and closes (as in close()) +the source object. Authors MAY use this facility +to move an AudioData or VideoFrame between realms without copying the +underlying media resource.

+

Serializing an AudioData or VideoFrame effectively clones (as in +clone()) the source object, resulting in two objects that +reference the same media resource. Authors MAY use +this facility to clone an AudioData or VideoFrame to another realm +without copying the underlying media resource.

+

9.2. AudioData Interface

+
[Exposed=(Window,DedicatedWorker), Serializable, Transferable]
+interface AudioData {
+  constructor(AudioDataInit init);
+
+  readonly attribute AudioSampleFormat? format;
+  readonly attribute float sampleRate;
+  readonly attribute unsigned long numberOfFrames;
+  readonly attribute unsigned long numberOfChannels;
+  readonly attribute unsigned long long duration;  // microseconds
+  readonly attribute long long timestamp;          // microseconds
+
+  unsigned long allocationSize(AudioDataCopyToOptions options);
+  undefined copyTo(AllowSharedBufferSource destination, AudioDataCopyToOptions options);
+  AudioData clone();
+  undefined close();
+};
+
+dictionary AudioDataInit {
+  required AudioSampleFormat format;
+  required float sampleRate;
+  [EnforceRange] required unsigned long numberOfFrames;
+  [EnforceRange] required unsigned long numberOfChannels;
+  [EnforceRange] required long long timestamp;  // microseconds
+  required BufferSource data;
+  sequence<ArrayBuffer> transfer = [];
+};
+
+

9.2.1. Internal Slots

+
+
[[resource reference]] +
+

A reference to a media resource that stores the audio sample data for +this AudioData.

+
[[format]] +
+

The AudioSampleFormat used by this AudioData. Will be null whenever +the underlying format does not map to an AudioSampleFormat or when +[[Detached]] is true.

+
[[sample rate]] +
+

The sample-rate, in Hz, for this AudioData.

+
[[number of frames]] +
+

The number of frames for this AudioData.

+
[[number of channels]] +
+

The number of audio channels for this AudioData.

+
[[timestamp]] +
+

The presentation timestamp of this AudioData.

+
+

9.2.2. Constructors

+ + + AudioData(init) + + +
    +
  1. +

    If init is not a valid AudioDataInit, throw a TypeError.

    +
  2. +

    If init.transfer contains more than one reference + to the same ArrayBuffer, then throw a DataCloneError DOMException.

    +
  3. +

    For each transferable in init.transfer:

    +
      +
    1. +

      If [[Detached]] internal slot is true, + then throw a DataCloneError DOMException.

      +
    +
  4. +

    Let frame be a new AudioData object, initialized as follows:

    +
      +
    1. +

      Assign false to [[Detached]].

      +
    2. +

      Assign init.format to +[[format]].

      +
    3. +

      Assign init.sampleRate to +[[sample rate]].

      +
    4. +

      Assign init.numberOfFrames to +[[number of frames]].

      +
    5. +

      Assign init.numberOfChannels to +[[number of channels]].

      +
    6. +

      Assign init.timestamp to +[[timestamp]].

      +
    7. +

      If init.transfer contains an ArrayBuffer +referenced by init.data the User Agent +MAY choose to:

      +
        +
      1. +

        Let resource be a new media resource referencing sample data + in data.

        +
      +
    8. +

      Otherwise:

      +
        +
      1. +

        Let resource be a media resource containing a copy of +init.data.

        +
      +
    9. +

      Let resourceReference be a reference to resource.

      +
    10. +

      Assign resourceReference to [[resource reference]].

      +
    +
  5. +

    For each transferable in init.transfer:

    +
      +
    1. +

      Perform DetachArrayBuffer +on transferable

      +
    +
  6. +

    Return frame.

    +
+

9.2.3. Attributes

+
+
format, of type AudioSampleFormat, readonly, nullable +
+

The AudioSampleFormat used by this AudioData. Will be null whenever +the underlying format does not map to a AudioSampleFormat or when +[[Detached]] is true.

+

The format getter steps are to return +[[format]].

+
sampleRate, of type float, readonly +
+

The sample-rate, in Hz, for this AudioData.

+

The sampleRate getter steps are to return +[[sample rate]].

+
numberOfFrames, of type unsigned long, readonly +
+

The number of frames for this AudioData.

+

The numberOfFrames getter steps are to return +[[number of frames]].

+
numberOfChannels, of type unsigned long, readonly +
+

The number of audio channels for this AudioData.

+

The numberOfChannels getter steps are to return +[[number of channels]].

+
timestamp, of type long long, readonly +
+

Returns the presentation timestamp of this AudioData.

+

The timestamp getter steps are to return +[[timestamp]].

+
duration, of type unsigned long long, readonly +
+

Returns the presentation duration of this AudioData.

+

The duration getter steps are to:

+
    +
  1. +

    Let microsecondsPerSecond be 1,000,000.

    +
  2. +

    Let durationInSeconds be the result of dividing +[[number of frames]] by [[sample rate]].

    +
  3. +

    Return the product of durationInSeconds and microsecondsPerSecond.

    +
+
+

9.2.4. Methods

+
+
allocationSize(options) +
+

Returns the number of bytes required to hold the samples as described by +options.

+

When invoked, run these steps:

+
    +
  1. +

    If [[Detached]] is true, throw an +InvalidStateError DOMException.

    +
  2. +

    Let copyElementCount be the result of running the +Compute Copy Element Count algorithm with options.

    +
  3. +

    Let destFormat be the value of [[format]].

    +
  4. +

    If options.format exists, assign +options.format to destFormat.

    +
  5. +

    Let bytesPerSample be the number of bytes per sample, as defined by +the destFormat.

    +
  6. +

    Return the product of multiplying bytesPerSample by +copyElementCount.

    +
+
copyTo(destination, options) +
+

Copies the samples from the specified plane of the AudioData to the +destination buffer.

+

When invoked, run these steps:

+
    +
  1. +

    If [[Detached]] is true, throw an +InvalidStateError DOMException.

    +
  2. +

    Let copyElementCount be the result of running the +Compute Copy Element Count algorithm with options.

    +
  3. +

    Let destFormat be the value of [[format]].

    +
  4. +

    If options.format exists, assign +options.format to destFormat.

    +
  5. +

    Let bytesPerSample be the number of bytes per sample, as defined by +the destFormat.

    +
  6. +

    If the product of multiplying bytesPerSample by copyElementCount is +greater than destination.byteLength, throw a RangeError.

    +
  7. +

    Let resource be the media resource referenced by +[[resource reference]].

    +
  8. +

    Let planeFrames be the region of resource corresponding to +options.planeIndex.

    +
  9. +

    Copy elements of planeFrames into destination, starting with the +frame positioned at options.frameOffset +and stopping after copyElementCount samples have been copied. If +destFormat does not equal [[format]], convert elements +to the destFormat AudioSampleFormat while making the copy.

    +
+
clone() +
+

Creates a new AudioData with a reference to the same media resource.

+

When invoked, run these steps:

+
    +
  1. +

    If [[Detached]] is true, throw an +InvalidStateError DOMException.

    +
  2. +

    Return the result of running the Clone AudioData algorithm with +this.

    +
+
close() +
+

Clears all state and releases the reference to the media resource. +Close is final.

+

When invoked, run the Close AudioData algorithm with this.

+
+

9.2.5. Algorithms

+
+
Compute Copy Element Count (with options) +
+

Run these steps:

+
    +
  1. +

    Let destFormat be the value of [[format]].

    +
  2. +

    If options.format exists, assign +options.format to destFormat.

    +
  3. +

    If destFormat describes an interleaved AudioSampleFormat and +options.planeIndex is greater than 0, +throw a RangeError.

    +
  4. +

    Otherwise, if destFormat describes a planar AudioSampleFormat +and if options.planeIndex is greater or +equal to [[number of channels]], throw a +RangeError.

    +
  5. +

    If [[format]] does not equal destFormat and the User +Agent does not support the requested AudioSampleFormat conversion, +throw a NotSupportedError DOMException. +Conversion to f32-planar MUST always be +supported.

    +
  6. +

    Let frameCount be the number of frames in the plane identified by +options.planeIndex.

    +
  7. +

    If options.frameOffset is greater than or +equal to frameCount, throw a RangeError.

    +
  8. +

    Let copyFrameCount be the difference of subtracting +options.frameOffset from frameCount.

    +
  9. +

    If options.frameCount exists:

    +
      +
    1. +

      If options.frameCount is greater than +copyFrameCount, throw a RangeError.

      +
    2. +

      Otherwise, assign options.frameCount +to copyFrameCount.

      +
    +
  10. +

    Let elementCount be copyFrameCount.

    +
  11. +

    If destFormat describes an interleaved +AudioSampleFormat, multiply elementCount by +[[number of channels]]

    +
  12. +

    return elementCount.

    +
+
Clone AudioData (with data) +
+

Run these steps:

+
    +
  1. +

    Let clone be a new AudioData initialized as follows:

    +
      +
    1. +

      Let resource be the media resource referenced by data’s +[[resource reference]].

      +
    2. +

      Let reference be a new reference to resource.

      +
    3. +

      Assign reference to [[resource reference]].

      +
    4. +

      Assign the values of data’s [[Detached]], +[[format]], [[sample rate]], +[[number of frames]], +[[number of channels]], and +[[timestamp]] slots to the corresponding slots in +clone.

      +
    +
  2. +

    Return clone.

    +
+
Close AudioData (with data) +
+

Run these steps:

+
    +
  1. +

    Assign true to data’s [[Detached]] internal slot.

    +
  2. +

    Assign null to data’s [[resource reference]].

    +
  3. +

    Assign 0 to data’s [[sample rate]].

    +
  4. +

    Assign 0 to data’s [[number of frames]].

    +
  5. +

    Assign 0 to data’s [[number of channels]].

    +
  6. +

    Assign null to data’s [[format]].

    +
+
To check if a AudioDataInit is a +valid AudioDataInit, run these steps: +
+
    +
  1. +

    If sampleRate less than or equal to 0, return false.

    +
  2. +

    If numberOfFrames = 0, return false.

    +
  3. +

    If numberOfChannels = 0, return false.

    +
  4. +

    Verify data has enough data by running the following +steps:

    +
      +
    1. +

      Let totalSamples be the product of multiplying +numberOfFrames by +numberOfChannels.

      +
    2. +

      Let bytesPerSample be the number of bytes per sample, as defined by +the format.

      +
    3. +

      Let totalSize be the product of multiplying bytesPerSample with +totalSamples.

      +
    4. +

      Let dataSize be the size in bytes of data.

      +
    5. +

      If dataSize is less than totalSize, return false.

      +
    +
  5. +

    Return true.

    +
+
+
+Note: It’s expected that AudioDataInit’s data’s memory + layout matches the expectations of the planar or interleaved + format. There is no real way to verify whether the samples + conform to their AudioSampleFormat. +
+

9.2.6. Transfer and Serialization

+
+
The AudioData transfer steps (with value and dataHolder) are: +
+
    +
  1. +

    If value’s [[Detached]] is true, throw a +DataCloneError DOMException.

    +
  2. +

    For all AudioData internal slots in value, assign the value of +each internal slot to a field in dataHolder with the same name as the +internal slot.

    +
  3. +

    Run the Close AudioData algorithm with value.

    +
+
The AudioData transfer-receiving steps (with dataHolder and value) +are: +
+
    +
  1. +

    For all named fields in dataHolder, assign the value of each named +field to the AudioData internal slot in value with the same name +as the named field.

    +
+
The AudioData serialization steps (with value, serialized, and +forStorage) are: +
+
    +
  1. +

    If value’s [[Detached]] is true, throw a +DataCloneError DOMException.

    +
  2. +

    If forStorage is true, throw a DataCloneError.

    +
  3. +

    Let resource be the media resource referenced by + value’s [[resource reference]].

    +
  4. +

    Let newReference be a new reference to resource.

    +
  5. +

    Assign newReference to |serialized.resource reference|.

    +
  6. +

    For all remaining AudioData internal slots (excluding +[[resource reference]]) in value, assign the value of +each internal slot to a field in serialized with the same name as the +internal slot.

    +
+
The AudioData deserialization steps (with serialized and value) +are: +
+
    +
  1. +

    For all named fields in serialized, assign the value of each named +field to the AudioData internal slot in value with the same name +as the named field.

    +
+
+

9.2.7. AudioDataCopyToOptions

+
dictionary AudioDataCopyToOptions {
+  [EnforceRange] required unsigned long planeIndex;
+  [EnforceRange] unsigned long frameOffset = 0;
+  [EnforceRange] unsigned long frameCount;
+  AudioSampleFormat format;
+};
+
+
+
planeIndex, of type unsigned long +
+

The index identifying the plane to copy from.

+
frameOffset, of type unsigned long, defaulting to 0 +
+

An offset into the source plane data indicating which frame to begin +copying from. Defaults to 0.

+
frameCount, of type unsigned long +
+

The number of frames to copy. If not provided, the copy will include all +frames in the plane beginning with frameOffset.

+
format, of type AudioSampleFormat +
+

The output AudioSampleFormat for the destination data. If not provided, +the resulting copy will use this AudioData’s [[format]]. +Invoking copyTo() will throw a NotSupportedError if +conversion to the requested format is not supported. Conversion from any +AudioSampleFormat to f32-planar MUST always +be supported.

+

NOTE: Authors seeking to integrate with [WEBAUDIO] can request + f32-planar and use the resulting copy to create and AudioBuffer + or render via AudioWorklet.

+
+

9.3. Audio Sample Format

+

An audio sample format describes the numeric type used to represent a +single sample (e.g. 32-bit floating point) and the arrangement of samples from +different channels as either interleaved or planar. The audio +sample type refers solely to the numeric type and interval used to store +the data, this is u8, s16, s32, or f32 for respectively +unsigned 8-bits, signed 16-bits, signed 32-bits, and 32-bits +floating point number. The audio buffer +arrangement refers solely to the way the samples are laid out in memory +(planar or interleaved).

+

A sample refers to a single value that is the magnitude of a +signal at a particular point in time in a particular channel.

+

A frame or (sample-frame) refers to a set of values of all channels +of a multi-channel signal, that happen at the exact same time.

+

NOTE: Consequently, if an audio signal is mono (has only one channel), a frame +and a sample refer to the same thing.

+

All audio samples in this specification are using linear pulse-code +modulation (Linear PCM): quantization levels are uniform between values.

+

NOTE: The Web Audio API, that is expected to be used with this specification, +also uses Linear PCM.

+
enum AudioSampleFormat {
+  "u8",
+  "s16",
+  "s32",
+  "f32",
+  "u8-planar",
+  "s16-planar",
+  "s32-planar",
+  "f32-planar",
+};
+
+
+
u8 +
+

8-bit unsigned integer samples with interleaved channel arrangement.

+
s16 +
+

16-bit signed integer samples with interleaved channel arrangement.

+
s32 +
+

32-bit signed integer samples with interleaved channel arrangement.

+
f32 +
+

32-bit float samples with interleaved channel arrangement.

+
u8-planar +
+

8-bit unsigned integer samples with planar channel arrangement.

+
s16-planar +
+

16-bit signed integer samples with planar channel arrangement.

+
s32-planar +
+

32-bit signed integer samples with planar channel arrangement.

+
f32-planar +
+

32-bit float samples with planar channel arrangement.

+
+

9.3.1. Arrangement of audio buffer

+

When an AudioData has an AudioSampleFormat that is +interleaved, the audio samples from different channels are laid out +consecutively in the same buffer, in the order described in the section +§ 9.3.3 Audio channel ordering. The AudioData has a single plane, that contains a +number of elements therefore equal to [[number of frames]] * +[[number of channels]].

+

When an AudioData has an AudioSampleFormat that is +planar, the audio samples from different channels are laid out +in different buffers, themselves arranged in an order described in the section +§ 9.3.3 Audio channel ordering. The AudioData has a number of planes equal to the +AudioData’s [[number of channels]]. Each plane contains +[[number of frames]] elements.

+

NOTE: The Web Audio API currently uses f32-planar exclusively.

+
+ +NOTE: The following diagram exemplifies the memory layout of planar versus + interleaved AudioSampleFormats + + +

Graphical representation the memory layout of interleaved and planar
+    formats

+
+

9.3.2. Magnitude of the audio samples

+

The minimum value and maximum value of an audio sample, +for a particular audio sample type, are the values below which +(respectively above which) audio clipping might occur. They are otherwise regular +types, that can hold values outside this interval during intermediate +processing.

+

The bias value for an audio sample type is the value that often +corresponds to the middle of the range (but often the range is not symmetrical). +An audio buffer comprised only of values equal to the bias value is silent.

+ + + + + + + + +
Sample type + IDL type + Minimum value + Bias value + Maximum value +
u8 + octet + 0 + 128 + +255 +
s16 + short + -32768 + 0 + +32767 +
s32 + long + -2147483648 + 0 + +2147483647 +
f32 + float + -1.0 + 0.0 + +1.0 +
+

NOTE: There is no data type that can hold 24 bits of information conveniently, +but audio content using 24-bit samples is common, so 32-bits integers are +commonly used to hold 24-bit content.

+

AudioData containing 24-bit samples SHOULD store those +samples in s32 or f32. When samples are stored in s32, each sample +MUST be left-shifted by 8 bits. By virtue of this +process, samples outside of the valid 24-bit range ([-8388608, +8388607]) will +be clipped. To avoid clipping and ensure lossless transport, samples +MAY be converted to f32.

+

NOTE: While clipping is unavoidable in u8, s16, and s32 samples due +to their storage types, implementations SHOULD take +care not to clip internally when handling f32 samples.

+

9.3.3. Audio channel ordering

+

When decoding, the ordering of the audio channels in the resulting AudioData +MUST be the same as what is present in the +EncodedAudioChunk.

+

When encoding, the ordering of the audio channels in the resulting +EncodedAudioChunk MUST be the same as what is +preset in the given AudioData.

+

In other terms, no channel reordering is performed when encoding and decoding.

+

NOTE: The container either implies or specifies the channel mapping: the +channel attributed to a particular channel index.

+

9.4. VideoFrame Interface

+

NOTE: VideoFrame is a CanvasImageSource. A VideoFrame can be + passed to any method accepting a CanvasImageSource, including + CanvasDrawImage’s drawImage().

+
[Exposed=(Window,DedicatedWorker), Serializable, Transferable]
+interface VideoFrame {
+  constructor(CanvasImageSource image, optional VideoFrameInit init = {});
+  constructor(AllowSharedBufferSource data, VideoFrameBufferInit init);
+
+  readonly attribute VideoPixelFormat? format;
+  readonly attribute unsigned long codedWidth;
+  readonly attribute unsigned long codedHeight;
+  readonly attribute DOMRectReadOnly? codedRect;
+  readonly attribute DOMRectReadOnly? visibleRect;
+  readonly attribute double rotation;
+  readonly attribute boolean flip;
+  readonly attribute unsigned long displayWidth;
+  readonly attribute unsigned long displayHeight;
+  readonly attribute unsigned long long? duration;  // microseconds
+  readonly attribute long long timestamp;           // microseconds
+  readonly attribute VideoColorSpace colorSpace;
+
+  VideoFrameMetadata metadata();
+
+  unsigned long allocationSize(
+      optional VideoFrameCopyToOptions options = {});
+  Promise<sequence<PlaneLayout>> copyTo(
+      AllowSharedBufferSource destination,
+      optional VideoFrameCopyToOptions options = {});
+  VideoFrame clone();
+  undefined close();
+};
+
+dictionary VideoFrameInit {
+  unsigned long long duration;  // microseconds
+  long long timestamp;          // microseconds
+  AlphaOption alpha = "keep";
+
+  // Default matches image. May be used to efficiently crop. Will trigger
+  // new computation of displayWidth and displayHeight using image's pixel
+  // aspect ratio unless an explicit displayWidth and displayHeight are given.
+  DOMRectInit visibleRect;
+
+  double rotation = 0;
+  boolean flip = false;
+
+  // Default matches image unless visibleRect is provided.
+  [EnforceRange] unsigned long displayWidth;
+  [EnforceRange] unsigned long displayHeight;
+
+  VideoFrameMetadata metadata;
+};
+
+dictionary VideoFrameBufferInit {
+  required VideoPixelFormat format;
+  required [EnforceRange] unsigned long codedWidth;
+  required [EnforceRange] unsigned long codedHeight;
+  required [EnforceRange] long long timestamp;  // microseconds
+  [EnforceRange] unsigned long long duration;  // microseconds
+
+  // Default layout is tightly-packed.
+  sequence<PlaneLayout> layout;
+
+  // Default visible rect is coded size positioned at (0,0)
+  DOMRectInit visibleRect;
+
+  double rotation = 0;
+  boolean flip = false;
+
+  // Default display dimensions match visibleRect.
+  [EnforceRange] unsigned long displayWidth;
+  [EnforceRange] unsigned long displayHeight;
+
+  VideoColorSpaceInit colorSpace;
+
+  sequence<ArrayBuffer> transfer = [];
+
+  VideoFrameMetadata metadata;
+};
+
+dictionary VideoFrameMetadata {
+  // Possible members are recorded in the VideoFrame Metadata Registry.
+};
+
+

9.4.1. Internal Slots

+
+
[[resource reference]] +
+

A reference to the media resource that stores the pixel data for +this frame.

+
[[format]] +
+

A VideoPixelFormat describing the pixel format of the VideoFrame. +Will be null whenever the underlying format does not map to a +VideoPixelFormat or when [[Detached]] is true.

+
[[coded width]] +
+

Width of the VideoFrame in pixels, potentially including non-visible +padding, and prior to considering potential ratio adjustments.

+
[[coded height]] +
+

Height of the VideoFrame in pixels, potentially including non-visible +padding, and prior to considering potential ratio adjustments.

+
[[visible left]] +
+

The number of pixels defining the left offset of the visible rectangle.

+
[[visible top]] +
+

The number of pixels defining the top offset of the visible rectangle.

+
[[visible width]] +
+

The width of pixels to include in visible rectangle, starting from +[[visible left]].

+
[[visible height]] +
+

The height of pixels to include in visible rectangle, starting from +[[visible top]].

+
[[rotation]] +
+

The rotation to applied to the VideoFrame when rendered, in degrees +clockwise. Rotation applies before flip.

+
[[flip]] +
+

Whether a horizontal flip is applied to the VideoFrame when rendered. +Flip is applied after rotation.

+
[[display width]] +
+

Width of the VideoFrame when displayed after applying aspect ratio +adjustments.

+
[[display height]] +
+

Height of the VideoFrame when displayed after applying aspect ratio +adjustments.

+
[[duration]] +
+

The presentation duration of this VideoFrame.

+
[[timestamp]] +
+

The presentation timestamp of this VideoFrame.

+
[[color space]] +
+

The VideoColorSpace associated with this frame.

+
[[metadata]] +
+

The VideoFrameMetadata associated with this frame. +Possible members are recorded in [webcodecs-video-frame-metadata-registry]. +By design, all VideoFrameMetadata properties are serializable.

+
+

9.4.2. Constructors

+

+ VideoFrame(image, init) +

+
    +
  1. +

    Check the usability of the image argument. If this throws an +exception or returns bad, then throw an +InvalidStateError DOMException.

    +
  2. +

    If image is not origin-clean, then throw a SecurityError DOMException.

    +
  3. +

    Let frame be a new VideoFrame.

    +
  4. +

    Switch on image:

    +

    NOTE: Authors are encouraged to provide a meaningful timestamp unless it is + implicitly provided by the CanvasImageSource at construction. + Interfaces that consume VideoFrames can rely on this value for + timing decisions. For example, VideoEncoder can use + timestamp values to guide rate control (see + framerate).

    + +
  5. +

    Return frame.

    +
+

+ VideoFrame(data, init) +

+
    +
  1. +

    If init is not a valid VideoFrameBufferInit, throw a TypeError.

    +
  2. +

    Let defaultRect be «[ "x:" → 0, "y" → 0, +"width" → init.codedWidth, "height" → + init.codedWidth ]».

    +
  3. +

    Let overrideRect be undefined.

    +
  4. +

    If init.visibleRect exists, assign its +value to overrideRect.

    +
  5. +

    Let parsedRect be the result of running the Parse Visible Rect algorithm with defaultRect, overrideRect, + init.codedWidth, + init.codedHeight, and + init.format.

    +
  6. +

    If parsedRect is an exception, return parsedRect.

    +
  7. +

    Let optLayout be undefined.

    +
  8. +

    If init.layout exists, assign its value +to optLayout.

    +
  9. +

    Let combinedLayout be the result of running the Compute Layout and Allocation Size algorithm with parsedRect, +init.format, and optLayout.

    +
  10. +

    If combinedLayout is an exception, throw combinedLayout.

    +
  11. +

    If data.byteLength is less than combinedLayout’s +allocationSize, throw a TypeError.

    +
  12. +

    If init.transfer contains more than one reference + to the same ArrayBuffer, then throw a DataCloneError DOMException.

    +
  13. +

    For each transferable in init.transfer:

    +
      +
    1. +

      If [[Detached]] internal slot is true, + then throw a DataCloneError DOMException.

      +
    +
  14. +

    If init.transfer contains an ArrayBuffer +referenced by data the User Agent MAY choose to:

    +
      +
    1. +

      Let resource be a new media resource referencing pixel data in +data.

      +
    +
  15. +

    Otherwise:

    +
      +
    1. +

      Let resource be a new media resource containing a copy of data. +Use visibleRect and layout +to determine where in data the pixels for each plane reside.

      +

      The User Agent MAY choose to allocate +resource with a larger coded size and plane strides to improve memory +alignment. Increases will be reflected by codedWidth and +codedHeight. Additionally, the User Agent MAY use visibleRect to +copy only the visible rectangle. It MAY also +reposition the visible rectangle within resource. The final position +will be reflected by visibleRect.

      +
    +
  16. +

    For each transferable in init.transfer:

    +
      +
    1. +

      Perform DetachArrayBuffer +on transferable

      +
    +
  17. +

    Let resourceCodedWidth be the coded width of resource.

    +
  18. +

    Let resourceCodedHeight be the coded height of resource.

    +
  19. +

    Let resourceVisibleLeft be the left offset for the visible rectangle of +resource.

    +
  20. +

    Let resourceVisibleTop be the top offset for the visible rectangle of +resource.

    +

    The spec SHOULD provide definitions (and + possibly diagrams) for coded size, visible rectangle, and display size. + See #166.

    +
  21. +

    Let frame be a new VideoFrame object initialized as follows:

    +
      +
    1. +

      Assign resourceCodedWidth, resourceCodedHeight, +resourceVisibleLeft, and resourceVisibleTop to +[[coded width]], [[coded height]], +[[visible left]], and [[visible top]] +respectively.

      +
    2. +

      If init.visibleRect exists:

      +
        +
      1. +

        Let truncatedVisibleWidth be the value of +visibleRect.width after +truncating.

        +
      2. +

        Assign truncatedVisibleWidth to [[visible width]].

        +
      3. +

        Let truncatedVisibleHeight be the value of +visibleRect.height after +truncating.

        +
      4. +

        Assign truncatedVisibleHeight to [[visible height]].

        +
      +
    3. +

      Otherwise:

      +
        +
      1. +

        Assign [[coded width]] to +[[visible width]].

        +
      2. +

        Assign [[coded height]] to +[[visible height]].

        +
      +
    4. +

      Assign the result of running the Parse Rotation algorithm, +with init.rotation, to +[[rotation]].

      +
    5. +

      Assign init.flip to +[[flip]].

      +
    6. +

      If displayWidth and +displayHeight exist in init, assign +them to [[display width]] and +[[display height]] respectively.

      +
    7. +

      Otherwise:

      +
        +
      1. +

        If [[rotation]] is equal to 0 or 180:

        +
          +
        1. +

          Assign [[visible width]] to +[[display width]].

          +
        2. +

          Assign [[visible height]] to +[[display height]].

          +
        +
      2. +

        Otherwise:

        +
          +
        1. +

          Assign [[visible height]] to +[[display width]].

          +
        2. +

          Assign [[visible width]] to +[[display height]].

          +
        +
      +
    8. +

      Assign init’s timestamp and +duration to [[timestamp]] and +[[duration]] respectively.

      +
    9. +

      Let colorSpace be undefined.

      +
    10. +

      If init.colorSpace exists, assign its +value to colorSpace.

      +
    11. +

      Assign init’s format to +[[format]].

      +
    12. +

      Assign the result of running the Pick Color Space +algorithm, with colorSpace and [[format]], to +[[color space]].

      +
    13. +

      Assign the result of calling Copy VideoFrame metadata +with init’s metadata to +frame.[[metadata]].

      +
    +
  22. +

    Return frame.

    +
+

9.4.3. Attributes

+
+
format, of type VideoPixelFormat, readonly, nullable +
+

Describes the arrangement of bytes in each plane as well as the number and +order of the planes. Will be null whenever the underlying format does not +map to a VideoPixelFormat or when [[Detached]] is +true.

+

The format getter steps are to return +[[format]].

+
codedWidth, of type unsigned long, readonly +
+

Width of the VideoFrame in pixels, potentially including non-visible +padding, and prior to considering potential ratio adjustments.

+

The codedWidth getter steps are to return +[[coded width]].

+
codedHeight, of type unsigned long, readonly +
+

Height of the VideoFrame in pixels, potentially including non-visible +padding, and prior to considering potential ratio adjustments.

+

The codedHeight getter steps are to return +[[coded height]].

+
codedRect, of type DOMRectReadOnly, readonly, nullable +
+

A DOMRectReadOnly with width and +height matching codedWidth and +codedHeight and x and +y at (0,0). Offered for convenience for use with +allocationSize() and copyTo().

+

The codedRect getter steps are:

+
    +
  1. +

    If [[Detached]] is true, return null.

    +
  2. +

    Let rect be a new DOMRectReadOnly, initialized as follows:

    +
      +
    1. +

      Assign 0 to x and y.

      +
    2. +

      Assign [[coded width]] and +[[coded height]] to width and +height respectively.

      +
    +
  3. +

    Return rect.

    +
+
visibleRect, of type DOMRectReadOnly, readonly, nullable +
+

A DOMRectReadOnly describing the visible rectangle of pixels for this +VideoFrame.

+

The visibleRect getter steps are:

+
    +
  1. +

    If [[Detached]] is true, return null.

    +
  2. +

    Let rect be a new DOMRectReadOnly, initialized as follows:

    +
      +
    1. +

      Assign [[visible left]], +[[visible top]], [[visible width]], +and [[visible height]] to x, +y, width, and +height respectively.

      +
    +
  3. +

    Return rect.

    +
+
rotation, of type double, readonly +
+

The rotation to applied to the VideoFrame when rendered, in degrees +clockwise. Rotation applies before flip.

+

The rotation getter steps are to return +[[rotation]].

+
flip, of type boolean, readonly +
+

Whether a horizontal flip is applied to the VideoFrame when rendered. +Flip applies after rotation.

+

The flip getter steps are to return [[flip]].

+
displayWidth, of type unsigned long, readonly +
+

Width of the VideoFrame when displayed after applying rotation and aspect +ratio adjustments.

+

The displayWidth getter steps are to return +[[display width]].

+
displayHeight, of type unsigned long, readonly +
+

Height of the VideoFrame when displayed after applying rotation and aspect +ratio adjustments.

+

The displayHeight getter steps are to return +[[display height]].

+
timestamp, of type long long, readonly +
+

Returns the presentation timestamp of this VideoFrame.

+

The timestamp getter steps are to return +[[timestamp]].

+
duration, of type unsigned long long, readonly, nullable +
+

Returns the presentation duration of this VideoFrame.

+

The duration getter steps are to return +[[duration]].

+
colorSpace, of type VideoColorSpace, readonly +
+

The VideoColorSpace associated with this frame.

+

The colorSpace getter steps are to return +[[color space]].

+
+

9.4.4. Internal Structures

+ +A combined buffer layout is a struct that consists of: + + +

A computed plane layout is a struct that consists of:

+ +

9.4.5. Methods

+
+
allocationSize(options) +
+

Returns the minimum byte length for a valid destination BufferSource +to be used with copyTo() with the given options.

+

When invoked, run these steps:

+
    +
  1. +

    If [[Detached]] is true, throw an +InvalidStateError DOMException.

    +
  2. +

    If [[format]] is null, throw a NotSupportedError +DOMException.

    +
  3. +

    Let combinedLayout be the result of running the Parse VideoFrameCopyToOptions algorithm with options.

    +
  4. +

    If combinedLayout is an exception, throw combinedLayout.

    +
  5. +

    Return combinedLayout’s allocationSize.

    +
+
copyTo(destination, options) +
+

Asynchronously copies the planes of this frame into destination according +to options. The format of the data is options.format, +if it exists or this VideoFrame’s format otherwise.

+

NOTE: Promises that are returned by several calls to + copyTo() are not guaranteed to resolve in the order they + were returned.

+

When invoked, run these steps:

+
    +
  1. +

    If [[Detached]] is true, return a promise rejected +with a InvalidStateError DOMException.

    +
  2. +

    If [[format]] is null, return a promise rejected with a +NotSupportedError DOMException.

    +
  3. +

    Let combinedLayout be the result of running the Parse VideoFrameCopyToOptions algorithm with options.

    +
  4. +

    If combinedLayout is an exception, return a promise rejected with +combinedLayout.

    +
  5. +

    If destination.byteLength is less than combinedLayout’s allocationSize, return a promise rejected with a +TypeError.

    +
  6. +

    If options.format is equal to one of +RGBA, RGBX, BGRA, BGRX then:

    +
      +
    1. +

      Let newOptions be the result of running the Clone Configuration + algorithm with options.

      +
    2. +

      Assign undefined to newOptions.format.

      +
    3. +

      Let rgbFrame be the result of running the Convert to RGB frame + algorithm with this, options.format, + and options.colorSpace.

      +
    4. +

      Return the result of calling copyTo() on rgbFrame with + destination and newOptions.

      +
    +
  7. +

    Let p be a new Promise.

    +
  8. +

    Let copyStepsQueue be the result of starting a new parallel queue.

    +
  9. +

    Let planeLayouts be a new list.

    +
  10. +

    Enqueue the following steps to copyStepsQueue:

    +
      +
    1. +

      Let resource be the media resource referenced by +[[resource reference]].

      +
    2. +

      Let numPlanes be the number of planes as defined by +[[format]].

      +
    3. +

      Let planeIndex be 0.

      +
    4. +

      While planeIndex is less than combinedLayout’s numPlanes:

      +
        +
      1. +

        Let sourceStride be the stride of the plane in resource as +identified by planeIndex.

        +
      2. +

        Let computedLayout be the computed plane layout in +combinedLayout’s computedLayouts at +the position of planeIndex

        +
      3. +

        Let sourceOffset be the product of multiplying +computedLayout’s sourceTop by +sourceStride

        +
      4. +

        Add computedLayout’s sourceLeftBytes +to sourceOffset.

        +
      5. +

        Let destinationOffset be computedLayout’s +destinationOffset.

        +
      6. +

        Let rowBytes be computedLayout’s +sourceWidthBytes.

        +
      7. +

        Let layout be a new PlaneLayout, with +offset set to destinationOffset and +stride set to rowBytes.

        +
      8. +

        Let row be 0.

        +
      9. +

        While row is less than computedLayout’s +sourceHeight:

        +
          +
        1. +

          Copy rowBytes bytes from resource starting at +sourceOffset to destination starting at +destinationOffset.

          +
        2. +

          Increment sourceOffset by sourceStride.

          +
        3. +

          Increment destinationOffset by computedLayout’s +destinationStride.

          +
        4. +

          Increment row by 1.

          +
        +
      10. +

        Increment planeIndex by 1.

        +
      11. +

        Append layout to planeLayouts.

        +
      +
    5. +

      Queue a task to resolve p with planeLayouts.

      +
    +
  11. +

    Return p.

    +
+
clone() +
+

Creates a new VideoFrame with a reference to the same +media resource.

+

When invoked, run these steps:

+
    +
  1. +

    If the value of frame’s [[Detached]] internal slot is +true, throw an InvalidStateError DOMException.

    +
  2. +

    Return the result of running the Clone VideoFrame algorithm with +this.

    +
+
close() +
+

Clears all state and releases the reference to the media resource. +Close is final.

+

When invoked, run the Close VideoFrame algorithm with this.

+
metadata() +
+

Gets the VideoFrameMetadata associated with this frame.

+

When invoked, run these steps:

+
    +
  1. +

    If [[Detached]] is true, +throw an InvalidStateError DOMException.

    +
  2. +

    Return the result of calling Copy VideoFrame metadata +with [[metadata]].

    +
+
+

9.4.6. Algorithms

+
+
Create a VideoFrame (with output, timestamp, duration, displayAspectWidth, displayAspectHeight, colorSpace, rotation, and flip) +
+
    +
  1. +

    Let frame be a new VideoFrame, constructed as follows:

    +
      +
    1. +

      Assign false to [[Detached]].

      +
    2. +

      Let resource be the media resource described by output.

      +
    3. +

      Let resourceReference be a reference to resource.

      +
    4. +

      Assign resourceReference to [[resource reference]].

      +
    5. +

      If output uses a recognized VideoPixelFormat, assign that format to +[[format]]. Otherwise, assign null to +[[format]].

      +
    6. +

      Let codedWidth and codedHeight be the coded width and height of the +output in pixels.

      +
    7. +

      Let visibleLeft, visibleTop, visibleWidth, and visibleHeight be +the left, top, width and height for the visible rectangle of output.

      +
    8. +

      Let displayWidth and displayHeight be the display size of +output in pixels.

      +
    9. +

      If displayAspectWidth and displayAspectHeight are provided, +increase displayWidth or displayHeight until the ratio of +displayWidth to displayHeight matches the ratio of +displayAspectWidth to displayAspectHeight.

      +
    10. +

      Assign codedWidth, codedHeight, visibleLeft, visibleTop, +visibleWidth, visibleHeight, displayWidth, and +displayHeight to [[coded width]], +[[coded height]], [[visible left]], +[[visible top]], [[visible width]], +and [[visible height]] respectively.

      +
    11. +

      Assign duration and timestamp to [[duration]] and +[[timestamp]] respectively.

      +
    12. +

      Assign [[color space]] with the result of running the +Pick Color Space algorithm, with colorSpace and +[[format]].

      +
    13. +

      Assign rotation and flip to rotation +and flip respectively.

      +
    +
  2. +

    Return frame.

    +
+
Pick Color Space (with overrideColorSpace and format) +
+
    +
  1. +

    If overrideColorSpace is provided, return a new VideoColorSpace +constructed with overrideColorSpace.

    +

    User Agents MAY replace null members of the +provided overrideColorSpace with guessed values as determined by implementer +defined heuristics.

    +
  2. +

    Otherwise, if [[format]] is an RGB format return a new +instance of the sRGB Color Space

    +
  3. +

    Otherwise, return a new instance of the REC709 Color Space.

    +
+
Validate VideoFrameInit (with format, codedWidth, and +codedHeight): +
+
    +
  1. +

    If visibleRect exists:

    +
      +
    1. +

      Let validAlignment be the result of running the +Verify Rect Offset Alignment with format and +visibleRect.

      +
    2. +

      If validAlignment is false, return false.

      +
    3. +

      If any attribute of visibleRect is negative or +not finite, return false.

      +
    4. +

      If visibleRect.width == 0 or +visibleRect.height == 0 return +false.

      +
    5. +

      If visibleRect.y + +visibleRect.height > +codedHeight, return false.

      +
    6. +

      If visibleRect.x + +visibleRect.width > +codedWidth, return false.

      +
    +
  2. +

    If codedWidth = 0 or codedHeight = 0,return false.

    +
  3. +

    If only one of displayWidth or +displayHeight exists, return false.

    +
  4. +

    If displayWidth == 0 or +displayHeight == 0, return false.

    +
  5. +

    Return true.

    +
+
To check if a VideoFrameBufferInit is a +valid VideoFrameBufferInit, run these steps: +
+
    +
  1. +

    If codedWidth = 0 or +codedHeight = 0,return false.

    +
  2. +

    If any attribute of visibleRect is negative or +not finite, return false.

    +
  3. +

    If visibleRect.y + +visibleRect.height > +codedHeight, return false.

    +
  4. +

    If visibleRect.x + +visibleRect.width > +codedWidth, return false.

    +
  5. +

    If only one of displayWidth or +displayHeight exists, return false.

    +
  6. +

    If displayWidth = 0 or +displayHeight = 0, return false.

    +
  7. +

    Return true.

    +
+
Initialize Frame From Other Frame (with init, +frame, and otherFrame) +
+
    +
  1. +

    Let format be otherFrame.format.

    +
  2. +

    If init.alpha is discard, +assign otherFrame.format’s equivalent opaque format +format.

    +
  3. +

    Let validInit be the result of running the Validate VideoFrameInit +algorithm with format and otherFrame’s +[[coded width]] and [[coded height]].

    +
  4. +

    If validInit is false, throw a TypeError.

    +
  5. +

    Let resource be the media resource referenced by otherFrame’s +[[resource reference]].

    +
  6. +

    Assign a new reference for resource to frame’s +[[resource reference]].

    +
  7. +

    Assign the following attributes from otherFrame to frame: +codedWidth, codedHeight, +colorSpace.

    +
  8. +

    Let defaultVisibleRect be the result of performing the getter steps +for visibleRect on otherFrame.

    +
  9. +

    Let baseRotation and baseFlip be otherFrame’s +[[rotation]] and [[flip]], respectively.

    +
  10. +

    Let defaultDisplayWidth and defaultDisplayHeight be otherFrame’s +[[display width]] and [[display height]], +respectively.

    +
  11. +

    Run the Initialize Visible Rect, Orientation, and Display Size +algorithm with init, frame, defaultVisibleRect, baseRotation, +baseFlip, defaultDisplayWidth, and defaultDisplayHeight.

    +
  12. +

    If duration exists in init, assign it to +frame’s [[duration]]. Otherwise, assign +otherFrame.duration to +frame’s [[duration]].

    +
  13. +

    If timestamp exists in init, assign it to +frame’s [[timestamp]]. Otherwise, assign +otherFrame’s timestamp to +frame’s [[timestamp]].

    +
  14. +

    Assign format to frame.[[format]].

    +
  15. +

    Assign the result of calling Copy VideoFrame metadata +with init’s metadata to frame.[[metadata]].

    +
+
Initialize Frame With Resource (with +init, frame, resource, codedWidth, codedHeight, baseRotation, +baseFlip, defaultDisplayWidth, and defaultDisplayHeight) +
+
    +
  1. +

    Let format be null.

    +
  2. +

    If resource uses a recognized VideoPixelFormat, assign the +VideoPixelFormat of resource to format.

    +
  3. +

    Let validInit be the result of running the Validate VideoFrameInit +algorithm with format, width and height.

    +
  4. +

    If validInit is false, throw a TypeError.

    +
  5. +

    Assign a new reference for resource to frame’s +[[resource reference]].

    +
  6. +

    If init.alpha is discard, assign +format’s equivalent opaque format to format.

    +
  7. +

    Assign format to [[format]]

    +
  8. +

    Assign codedWidth and codedHeight to frame’s +[[coded width]] and [[coded height]] +respectively.

    +
  9. +

    Let defaultVisibleRect be a new DOMRect constructed with +«[ "x:" → 0, "y" → 0, "width" → codedWidth, "height" → codedHeight

    +
  10. +

    Run the Initialize Visible Rect, Orientation, and Display Size +algorithm with init, frame, defaultVisibleRect, +defaultDisplayWidth, and defaultDisplayHeight.

    +
  11. +

    Assign init.duration to +frame’s [[duration]].

    +
  12. +

    Assign init.timestamp to +frame’s [[timestamp]].

    +
  13. +

    If resource has a known VideoColorSpace, assign its value to +[[color space]].

    +
  14. +

    Otherwise, assign a new VideoColorSpace, constructed with an empty +VideoColorSpaceInit, to [[color space]].

    +
+
Initialize Visible Rect, Orientation, and Display Size +(with init, frame, defaultVisibleRect, baseRotation, baseFlip, +defaultDisplayWidth and defaultDisplayHeight) +
+
    +
  1. +

    Let visibleRect be defaultVisibleRect.

    +
  2. +

    If init.visibleRect exists:

    +
      +
    1. +

      If any attribute of init.visibleRect is negative or +not finite, throw a TypeError.

      +
    2. +

      Assign init.visibleRect to visibleRect.

      +
    +
  3. +

    Assign visibleRect’s x, y, width, +and height, to frame’s [[visible left]], +[[visible top]], [[visible width]], and +[[visible height]] respectively.

    +
  4. +

    Let rotation be the result of running the Parse Rotation +algorithm, with init.rotation.

    +
  5. +

    Assign the result of running the Add Rotations algorithm, +with baseRotation, baseFlip, and rotation, to frame’s +[[rotation]].

    +
  6. +

    If baseFlip is equal to init.flip, assign false +to frame’s [[flip]]. Otherwise, assign true to +frame’s [[flip]].

    +
  7. +

    If displayWidth and displayHeight +exist in init, assign them to [[display width]] +and [[display height]] respectively.

    +
  8. +

    Otherwise:

    +
      +
    1. +

      If baseRotation is equal to 0 or 180:

      +
        +
      1. +

        Let widthScale be the result of dividing defaultDisplayWidth +by defaultVisibleRect.width.

        +
      2. +

        Let heightScale be the result of dividing +defaultDisplayHeight by +defaultVisibleRect.height.

        +
      +
    2. +

      Otherwise:

      +
        +
      1. +

        Let widthScale be the result of dividing defaultDisplayHeight +by defaultVisibleRect.width.

        +
      2. +

        Let heightScale be the result of dividing defaultDisplayWidth +by defaultVisibleRect.height.

        +
      +
    3. +

      Let displayWidth be +|frame|'s {{VideoFrame/[[visible width]]}} * |widthScale|, rounded +to the nearest integer.

      +
    4. +

      Let displayHeight be +|frame|'s {{VideoFrame/[[visible height]]}} * |heightScale|, +rounded to the nearest integer.

      +
    5. +

      If rotation is equal to 0 or 180:

      +
        +
      1. +

        Assign displayWidth to frame’s +[[display width]].

        +
      2. +

        Assign displayHeight to frame’s +[[display height]].

        +
      +
    6. +

      Otherwise:

      +
        +
      1. +

        Assign displayHeight to frame’s +[[display width]].

        +
      2. +

        Assign displayWidth to frame’s +[[display height]].

        +
      +
    +
+
Clone VideoFrame (with frame) +
+
    +
  1. +

    Let clone be a new VideoFrame initialized as follows:

    +
      +
    1. +

      Let resource be the media resource referenced by frame’s +[[resource reference]].

      +
    2. +

      Let newReference be a new reference to resource.

      +
    3. +

      Assign newReference to clone’s +[[resource reference]].

      +
    4. +

      Assign all remaining internal slots of frame (excluding +[[resource reference]]) to those of the same name +in clone.

      +
    +
  2. +

    Return clone.

    +
+
Close VideoFrame (with frame) +
+
    +
  1. +

    Assign null to frame’s [[resource reference]].

    +
  2. +

    Assign true to frame’s [[Detached]].

    +
  3. +

    Assign null to frame’s format.

    +
  4. +

    Assign 0 to frame’s [[coded width]], +[[coded height]], [[visible left]], +[[visible top]], [[visible width]], +[[visible height]], [[rotation]], +[[display width]], and [[display height]].

    +
  5. +

    Assign false to frame’s [[flip]].

    +
  6. +

    Assign a new VideoFrameMetadata to frame.[[metadata]].

    +
+
Parse Rotation (with rotation) +
+
    +
  1. +

    Let alignedRotation be the nearest multiple of 90 to rotation, +rounding ties towards positive infinity.

    +
  2. +

    Let fullTurns be the greatest multiple of 360 less than or equal to +alignedRotation.

    +
  3. +

    Return |alignedRotation| - |fullTurns|.

    +
+
Add Rotations (with baseRotation, baseFlip, +and rotation) +
+
    +
  1. +

    If baseFlip is false, let combinedRotation be +|baseRotation| + |rotation|. Otherwise, let combinedRotation be +|baseRotation| - |rotation|.

    +
  2. +

    Let fullTurns be the greatest multiple of 360 less than or equal to +combinedRotation.

    +
  3. +

    Return |combinedRotation| - |fullTurns|.

    +
+
Parse VideoFrameCopyToOptions (with options) +
+
    +
  1. +

    Let defaultRect be the result of performing the getter steps for +visibleRect.

    +
  2. +

    Let overrideRect be undefined.

    +
  3. +

    If options.rect exists, assign the +value of options.rect to overrideRect.

    +
  4. +

    Let parsedRect be the result of running the Parse Visible Rect algorithm with defaultRect, overrideRect, +[[coded width]], [[coded height]], and +[[format]].

    +
  5. +

    If parsedRect is an exception, return parsedRect.

    +
  6. +

    Let optLayout be undefined.

    +
  7. +

    If options.layout exists, assign +its value to optLayout.

    +
  8. +

    Let format be undefined.

    +
  9. +

    If options.format does not exist, +assign [[format]] to format.

    +
  10. +

    Otherwise, if options.format is equal to +one of RGBA, RGBX, BGRA, BGRX, then assign +options.format to format, +otherwise return NotSupportedError.

    +
  11. +

    Let combinedLayout be the result of running the Compute Layout and Allocation Size algorithm with parsedRect, format, +and optLayout.

    +
  12. +

    Return combinedLayout.

    +
+
Verify Rect Offset Alignment (with format and +rect) +
+
    +
  1. +

    If format is null, return true.

    +
  2. +

    Let planeIndex be 0.

    +
  3. +

    Let numPlanes be the number of planes as defined by format.

    +
  4. +

    While planeIndex is less than numPlanes:

    +
      +
    1. +

      Let plane be the Plane identified by planeIndex as defined by +format.

      +
    2. +

      Let sampleWidth be the horizontal sub-sampling factor of each +subsample for plane.

      +
    3. +

      Let sampleHeight be the vertical sub-sampling factor of each +subsample for plane.

      +
    4. +

      If rect.x is not a multiple of sampleWidth, +return false.

      +
    5. +

      If rect.y is not a multiple of sampleHeight, +return false.

      +
    6. +

      Increment planeIndex by 1.

      +
    +
  5. +

    Return true.

    +
+
Parse Visible Rect (with defaultRect, +overrideRect, codedWidth, codedHeight, and format) +
+
    +
  1. +

    Let sourceRect be defaultRect

    +
  2. +

    If overrideRect is not undefined:

    +
      +
    1. +

      If any attribute of overrideRect is negative or not finite, return a TypeError.

      +
    2. +

      If either of overrideRect.width or +height is 0, return a TypeError.

      +
    3. +

      If the sum of overrideRect.x and +overrideRect.width is greater than +codedWidth, return a TypeError.

      +
    4. +

      If the sum of overrideRect.y and +overrideRect.height is greater than +codedHeight, return a TypeError.

      +
    5. +

      Assign overrideRect to sourceRect.

      +
    +
  3. +

    Let validAlignment be the result of running the +Verify Rect Offset Alignment algorithm with format and +sourceRect.

    +
  4. +

    If validAlignment is false, throw a TypeError.

    +
  5. +

    Return sourceRect.

    +
+
Compute Layout and Allocation Size (with +parsedRect, format, and layout) +
+
    +
  1. +

    Let numPlanes be the number of planes as defined by format.

    +
  2. +

    If layout is not undefined and its length does not equal numPlanes, + throw a TypeError.

    +
  3. +

    Let minAllocationSize be 0.

    +
  4. +

    Let computedLayouts be a new list.

    +
  5. +

    Let endOffsets be a new list.

    +
  6. +

    Let planeIndex be 0.

    +
  7. +

    While planeIndex < numPlanes:

    +
      +
    1. +

      Let plane be the Plane identified by planeIndex as defined by +format.

      +
    2. +

      Let sampleBytes be the number of bytes per sample for plane.

      +
    3. +

      Let sampleWidth be the horizontal sub-sampling factor of each +subsample for plane.

      +
    4. +

      Let sampleHeight be the vertical sub-sampling factor of each +subsample for plane.

      +
    5. +

      Let computedLayout be a new computed plane layout.

      +
    6. +

      Set computedLayout’s sourceTop to the +result of the division of truncated parsedRect.y +by sampleHeight, rounded up to the nearest integer.

      +
    7. +

      Set computedLayout’s sourceHeight to the +result of the division of truncated +parsedRect.height by sampleHeight, rounded up +to the nearest integer.

      +
    8. +

      Set computedLayout’s sourceLeftBytes to +the result of the integer division of +truncated parsedRect.x by sampleWidth, +multiplied by sampleBytes.

      +
    9. +

      Set computedLayout’s sourceWidthBytes to +the result of the integer division of +truncated parsedRect.width by sampleWidth, +multiplied by sampleBytes.

      +
    10. +

      If layout is not undefined:

      +
        +
      1. +

        Let planeLayout be the PlaneLayout in layout at position +planeIndex.

        +
      2. +

        If planeLayout.stride is less than +computedLayout’s sourceWidthBytes, +return a TypeError.

        +
      3. +

        Assign planeLayout.offset to computedLayout’s +destinationOffset.

        +
      4. +

        Assign planeLayout.stride to computedLayout’s +destinationStride.

        +
      +
    11. +

      Otherwise:

      +

      NOTE: If an explicit layout was not provided, the following steps + default to tight packing.

      +
        +
      1. +

        Assign minAllocationSize to computedLayout’s +destinationOffset.

        +
      2. +

        Assign computedLayout’s +sourceWidthBytes to +computedLayout’s destinationStride.

        +
      +
    12. +

      Let planeSize be the product of multiplying computedLayout’s + destinationStride and + sourceHeight.

      +
    13. +

      Let planeEnd be the sum of planeSize and computedLayout’s + destinationOffset.

      +
    14. +

      If planeSize or planeEnd is greater than maximum range of + unsigned long, return a TypeError.

      +
    15. +

      Append planeEnd to endOffsets.

      +
    16. +

      Assign the maximum of minAllocationSize and planeEnd to + minAllocationSize.

      +

      NOTE: The above step uses a maximum to allow for the + possibility that user specified plane offsets reorder + planes.

      +
    17. +

      Let earlierPlaneIndex be 0.

      +
    18. +

      While earlierPlaneIndex is less than planeIndex.

      +
        +
      1. +

        Let earlierLayout be computedLayouts[earlierPlaneIndex].

        +
      2. +

        If endOffsets[planeIndex] is less than or equal to +earlierLayout’s destinationOffset or +if endOffsets[earlierPlaneIndex] is less than or equal to +computedLayout’s destinationOffset, +continue.

        +

        NOTE: If plane A ends before plane B starts, they do not + overlap.

        +
      3. +

        Otherwise, return a TypeError.

        +
      4. +

        Increment earlierPlaneIndex by 1.

        +
      +
    19. +

      Append computedLayout to computedLayouts.

      +
    20. +

      Increment planeIndex by 1.

      +
    +
  8. +

    Let combinedLayout be a new combined buffer layout, initialized as +follows:

    +
      +
    1. +

      Assign computedLayouts to +computedLayouts.

      +
    2. +

      Assign minAllocationSize to +allocationSize.

      +
    +
  9. +

    Return combinedLayout.

    +
+
Convert PredefinedColorSpace to VideoColorSpace (with colorSpace) +
+
    +
  1. +

    Assert: colorSpace is equal to one of srgb or display-p3.

    +
  2. +

    If colorSpace is equal to srgb return a new instance of the +sRGB Color Space

    +
  3. +

    If colorSpace is equal to display-p3 return a new instance of the +Display P3 Color Space

    +
+
Convert to RGB frame (with frame, format and colorSpace) +
+
    +
  1. +

    This algorithm MUST be called only if format +is equal to one of RGBA, RGBX, BGRA, BGRX.

    +
  2. +

    Let convertedFrame be a new VideoFrame, constructed as follows:

    +
      +
    1. +

      Assign false to [[Detached]].

      +
    2. +

      Assign format to [[format]].

      +
    3. +

      Let width be frame’s [[visible width]].

      +
    4. +

      Let height be frame’s [[visible height]].

      +
    5. +

      Assign width, height, 0, 0, width, height, width, and +height to [[coded width]], +[[coded height]], [[visible left]], +[[visible top]], [[visible width]], +and [[visible height]] respectively.

      +
    6. +

      Assign frame’s [[duration]] and frame’s +[[timestamp]] to [[duration]] and +[[timestamp]] respectively.

      +
    7. +

      Assign the result of running the Convert +PredefinedColorSpace to VideoColorSpace algorithm with +colorSpace to [[color space]].

      +
    8. +

      Let resource be a new media resource containing the result of +conversion of media resource referenced by frame’s +[[resource reference]] into a color space and pixel +format specified by [[color space]] and +[[format]] respectively.

      +
    9. +

      Assign the reference to resource to [[resource reference]]

      +
    +
  3. +

    Return convertedFrame.

    +
+
Copy VideoFrame metadata (with metadata) +
+
    +
  1. +

    Let metadataCopySerialized be StructuredSerialize(metadata).

    +
  2. +

    Let metadataCopy be StructuredDeserialize(metadataCopySerialized, the current Realm).

    +
  3. +

    Return metadataCopy.

    +
+
+

The goal of this algorithm is to ensure that metadata owned by a VideoFrame is immutable.

+

9.4.7. Transfer and Serialization

+
+
The VideoFrame transfer steps (with value and dataHolder) are: +
+
    +
  1. +

    If value’s [[Detached]] is true, throw a +DataCloneError DOMException.

    +
  2. +

    For all VideoFrame internal slots in value, assign the value of +each internal slot to a field in dataHolder with the same name as the +internal slot.

    +
  3. +

    Run the Close VideoFrame algorithm with value.

    +
+
The VideoFrame transfer-receiving steps (with dataHolder and +value) are: +
+
    +
  1. +

    For all named fields in dataHolder, assign the value of each named +field to the VideoFrame internal slot in value with the same name +as the named field.

    +
+
The VideoFrame serialization steps (with value, serialized, and +forStorage) are: +
+
    +
  1. +

    If value’s [[Detached]] is true, throw a +DataCloneError DOMException.

    +
  2. +

    If forStorage is true, throw a DataCloneError.

    +
  3. +

    Let resource be the media resource referenced by + value’s [[resource reference]].

    +
  4. +

    Let newReference be a new reference to resource.

    +
  5. +

    Assign newReference to |serialized.resource reference|.

    +
  6. +

    For all remaining VideoFrame internal slots (excluding +[[resource reference]]) in value, assign the value of +each internal slot to a field in serialized with the same name as the +internal slot.

    +
+
The VideoFrame deserialization steps (with serialized and value) +are: +
+
    +
  1. +

    For all named fields in serialized, assign the value of each named +field to the VideoFrame internal slot in value with the same name +as the named field.

    +
+
+

9.4.8. Rendering

+

When rendered, for example by CanvasDrawImage +drawImage(), a VideoFrame MUST +be converted to a color space compatible with the rendering target, unless color +conversion is explicitly disabled.

+

Color space conversion during ImageBitmap construction is controlled by +ImageBitmapOptions colorSpaceConversion. Setting this +value to "none" disables color space conversion.

+

The rendering of a VideoFrame is produced from the media resource by +applying any necessary color space conversion, cropping to the +visibleRect, rotating clockwise by rotation +degrees, and flipping horizontally if flip is true.

+

9.5. VideoFrame CopyTo() Options

+ +Options to specify a rectangle of pixels to copy, their format, and the offset +and stride of planes in the destination buffer. + + +
dictionary VideoFrameCopyToOptions {
+  DOMRectInit rect;
+  sequence<PlaneLayout> layout;
+  VideoPixelFormat format;
+  PredefinedColorSpace colorSpace;
+};
+
+
+ +NOTE: The steps of copyTo() or allocationSize() +will enforce the following requirements: + + +
+
+
rect, of type DOMRectInit +
+

A DOMRectInit describing the rectangle of pixels to copy from the +VideoFrame. If unspecified, the visibleRect will be used.

+

NOTE: The coded rectangle can be specified by passing VideoFrame’s + codedRect.

+

NOTE: The default rect does not necessarily meet + the sample-alignment requirement and can result in + copyTo() or allocationSize() rejecting.

+
layout, of type sequence<PlaneLayout> +
+

The PlaneLayout for each plane in VideoFrame, affording the option +to specify an offset and stride for each plane in the destination +BufferSource. If unspecified, the planes will be tightly packed. It is +invalid to specify planes that overlap.

+
format, of type VideoPixelFormat +
+

A VideoPixelFormat for the pixel data in the destination +BufferSource. Potential values are: RGBA, RGBX, BGRA, +BGRX. If it does not exist, the destination +BufferSource will be in the same format as format .

+
colorSpace, of type PredefinedColorSpace +
+

A PredefinedColorSpace that MUST be used as +a target color space for the pixel data in the destination +BufferSource, but only if format is one of +RGBA, RGBX, BGRA, BGRX, otherwise it is ignored. +If it does not exist, srgb is used.

+
+

9.6. DOMRects in VideoFrame

+ +The VideoFrame interface uses DOMRects to specify the position and +dimensions for a rectangle of pixels. DOMRectInit is used with +copyTo() and allocationSize() to describe the +dimensions of the source rectangle. VideoFrame defines +codedRect and visibleRect for convenient copying +of the coded size and visible region respectively. + + +

NOTE: VideoFrame pixels are only addressable by integer numbers. All floating + point values provided to DOMRectInit will be truncated.

+

9.7. Plane Layout

+ +A PlaneLayout is a dictionary specifying the offset and stride of a +VideoFrame plane once copied to a BufferSource. A sequence of +PlaneLayouts MAY be provided to VideoFrame’s +copyTo() to specify how the plane is laid out in the destination +BufferSource. Alternatively, callers can inspect copyTo()’s +returned sequence of PlaneLayouts to learn the offset and stride for +planes as decided by the User Agent. + + +
dictionary PlaneLayout {
+  [EnforceRange] required unsigned long offset;
+  [EnforceRange] required unsigned long stride;
+};
+
+
+
offset, of type unsigned long +
+

The offset in bytes where the given plane begins within a BufferSource.

+
stride, of type unsigned long +
+

The number of bytes, including padding, used by each row of the plane within +a BufferSource.

+
+

9.8. Pixel Format

+ +Pixel formats describe the arrangement of bytes in each plane as well as the +number and order of the planes. Each format is described in its own sub-section. + + +
enum VideoPixelFormat {
+  // 4:2:0 Y, U, V
+  "I420",
+  "I420P10",
+  "I420P12",
+  // 4:2:0 Y, U, V, A
+  "I420A",
+  "I420AP10",
+  "I420AP12",
+  // 4:2:2 Y, U, V
+  "I422",
+  "I422P10",
+  "I422P12",
+  // 4:2:2 Y, U, V, A
+  "I422A",
+  "I422AP10",
+  "I422AP12",
+  // 4:4:4 Y, U, V
+  "I444",
+  "I444P10",
+  "I444P12",
+  // 4:4:4 Y, U, V, A
+  "I444A",
+  "I444AP10",
+  "I444AP12",
+  // 4:2:0 Y, UV
+  "NV12",
+  // 4:4:4 RGBA
+  "RGBA",
+  // 4:4:4 RGBX (opaque)
+  "RGBX",
+  // 4:4:4 BGRA
+  "BGRA",
+  // 4:4:4 BGRX (opaque)
+  "BGRX",
+};
+
+

Sub-sampling is a technique +where a single sample contains information for multiple pixels in the final +image. Sub-sampling can be horizontal, vertical or both, and has a factor, that is the number of final pixels +in the image that are derived from a sub-sampled sample.

+
+ If a VideoFrame is in I420 format, then the very first + component of the second plane (the U plane) corresponds to four pixels, that are + the pixels in the top-left angle of the image. Consequently, the first + component of the second row corresponds to the four pixels below those initial + four top-left pixels. The sub-sampling factor is 2 in both the horizontal + and vertical direction. +
+

If a VideoPixelFormat has an alpha component, the format’s +equivalent opaque format is the same VideoPixelFormat, without an +alpha component. If a VideoPixelFormat does not have an alpha component, it +is its own equivalent opaque format.

+

Integer values are unsigned unless otherwise specified.

+
+
I420 +
+ + This format is composed of three distinct planes, one plane of Luma and two + planes of Chroma, denoted Y, U and V, and present in this order. It is also + often refered to as Planar YUV 4:2:0. + + +

The U and V planes are sub-sampled horizontally and vertically by a + factor of 2 compared to the Y plane.

+

Each sample in this format is 8 bits.

+

There are codedWidth * codedHeight samples + (and therefore bytes) in the Y plane, arranged starting at the top left of + the image, in codedHeight rows of codedWidth + samples.

+

The U and V planes have a number of rows equal to the result of the + division of codedHeight by 2, rounded up to the nearest + integer. Each row has a number of samples equal to the result of the + division of codedWidth by 2, rounded up to the nearest + integer. Samples are arranged starting at the top left of the image.

+

The visible rectangle offset (visibleRect.x + and visibleRect.y) + MUST be even.

+
I420P10 +
+ + This format is composed of three distinct planes, one plane of Luma and two + planes of Chroma, denoted Y, U and V, and present in this order. + + +

The U and V planes are sub-sampled horizontally and vertically by a + factor of 2 compared to the Y plane.

+

Each sample in this format is 10 bits, encoded as a 16-bit integer in + little-endian byte order.

+

There are codedWidth * codedHeight samples + in the Y plane, arranged starting at the top left of the image, in + codedHeight rows of codedWidth samples.

+

The U and V planes have a number of rows equal to the result of the + division of codedHeight by 2, rounded up to the nearest + integer. Each row has a number of samples equal to the result of the + division of codedWidth by 2, rounded up to the nearest + integer. Samples are arranged starting at the top left of the image.

+

The visible rectangle offset (visibleRect.x + and visibleRect.y) + MUST be even.

+
I420P12 +
+ + This format is composed of three distinct planes, one plane of Luma and two + planes of Chroma, denoted Y, U and V, and present in this order. + + +

The U and V planes are sub-sampled horizontally and vertically by a + factor of 2 compared to the Y plane.

+

Each sample in this format is 12 bits, encoded as a 16-bit integer in + little-endian byte order.

+

There are codedWidth * codedHeight samples + in the Y plane, arranged starting at the top left of the image, in + codedHeight rows of codedWidth samples.

+

The U and V planes have a number of rows equal to the result of the + division of codedHeight by 2, rounded up to the nearest + integer. Each row has a number of samples equal to the result of the + division of codedWidth by 2, rounded up to the nearest + integer. Samples are arranged starting at the top left of the image.

+

The visible rectangle offset (visibleRect.x + and visibleRect.y) + MUST be even.

+
I420A +
+ + This format is composed of four distinct planes, one plane of Luma, two + planes of Chroma, denoted Y, U and V, and one plane of Alpha values, all + present in this order. It is also often refered to as Planar YUV 4:2:0 with + an alpha channel. + + +

The U and V planes are sub-sampled horizontally and vertically by a + factor of 2 compared to the Y and Alpha planes.

+

Each sample in this format is 8 bits.

+

There are codedWidth * codedHeight samples + (and therefore bytes) in the Y and Alpha planes, arranged starting at the + top left of the image, in codedHeight rows of + codedWidth samples.

+

The U and V planes have a number of rows equal to the result of the + division of codedHeight by 2, rounded up to the nearest + integer. Each row has a number of samples equal to the result of the + division of codedWidth by 2, rounded up to the nearest + integer. Samples are arranged starting at the top left of the image.

+

The visible rectangle offset (visibleRect.x + and visibleRect.y) + MUST be even.

+

I420A’s equivalent opaque format is I420.

+
I420AP10 +
+ + This format is composed of four distinct planes, one plane of Luma, two + planes of Chroma, denoted Y, U and V, and one plane of Alpha values, all + present in this order. + + +

The U and V planes are sub-sampled horizontally and vertically by a + factor of 2 compared to the Y and Alpha planes.

+

Each sample in this format is 10 bits, encoded as a 16-bit integer in + little-endian byte order.

+

There are codedWidth * codedHeight samples + in the Y and Alpha planes, arranged starting at the top left of the image, + in codedHeight rows of codedWidth samples.

+

The U and V planes have a number of rows equal to the result of the + division of codedHeight by 2, rounded up to the nearest + integer. Each row has a number of samples equal to the result of the + division of codedWidth by 2, rounded up to the nearest + integer. Samples are arranged starting at the top left of the image.

+

The visible rectangle offset (visibleRect.x + and visibleRect.y) + MUST be even.

+

I420AP10’s equivalent opaque format is I420P10.

+
I420AP12 +
+ + This format is composed of four distinct planes, one plane of Luma, two + planes of Chroma, denoted Y, U and V, and one plane of Alpha values, all + present in this order. + + +

The U and V planes are sub-sampled horizontally and vertically by a + factor of 2 compared to the Y and Alpha planes.

+

Each sample in this format is 12 bits, encoded as a 16-bit integer in + little-endian byte order.

+

There are codedWidth * codedHeight samples + in the Y and Alpha planes, arranged starting at the top left of the image, + in codedHeight rows of codedWidth samples.

+

The U and V planes have a number of rows equal to the result of the + division of codedHeight by 2, rounded up to the nearest + integer. Each row has a number of samples equal to the result of the + division of codedWidth by 2, rounded up to the nearest + integer. Samples are arranged starting at the top left of the image.

+

The visible rectangle offset (visibleRect.x + and visibleRect.y) + MUST be even.

+

I420AP12’s equivalent opaque format is I420P12.

+
I422 +
+ + This format is composed of three distinct planes, one plane of Luma and two + planes of Chroma, denoted Y, U and V, and present in this order. It is also + often refered to as Planar YUV 4:2:2. + + +

The U and V planes are sub-sampled horizontally by a factor of 2 + compared to the Y plane, and not sub-sampled vertically.

+

Each sample in this format is 8 bits.

+

There are codedWidth * codedHeight samples + (and therefore bytes) in the Y and plane, arranged starting at the top left + of the image, in codedHeight rows of + codedWidth samples.

+

The U and V planes have codedHeight rows. Each row has a + number of samples equal to the result of the division of + codedWidth by 2, rounded up to the nearest integer. Samples + are arranged starting at the top left of the image.

+

The visible rectangle horizontal offset + (visibleRect.x) + MUST be even.

+
I422P10 +
+ + This format is composed of three distinct planes, one plane of Luma and two + planes of Chroma, denoted Y, U and V, and present in this order. + + +

The U and V planes are sub-sampled horizontally by a factor of 2 + compared to the Y plane, and not sub-sampled vertically.

+

Each sample in this format is 10 bits, encoded as a 16-bit integer in + little-endian byte order.

+

There are codedWidth * codedHeight samples + in the Y plane, arranged starting at the top left of the image, in + codedHeight rows of codedWidth samples.

+

The U and V planes have codedHeight rows. Each row has a + number of samples equal to the result of the division of + codedWidth by 2, rounded up to the nearest integer. + Samples are arranged starting at the top left of the image.

+

The visible rectangle horizontal offset + (visibleRect.x) + MUST be even.

+
I422P12 +
+ + This format is composed of three distinct planes, one plane of Luma and two + planes of Chroma, denoted Y, U and V, and present in this order. + + +

The U and V planes are sub-sampled horizontally by a factor of 2 + compared to the Y plane, and not sub-sampled vertically.

+

Each sample in this format is 12 bits, encoded as a 16-bit integer in + little-endian byte order.

+

There are codedWidth * codedHeight samples + in the Y plane, arranged starting at the top left of the image, in + codedHeight rows of codedWidth samples.

+

The U and V planes have codedHeight rows. Each row has a + number of samples equal to the result of the division of + codedWidth by 2, rounded up to the nearest integer. + Samples are arranged starting at the top left of the image.

+

The visible rectangle horizontal offset + (visibleRect.x) + MUST be even.

+
I422A +
+ + This format is composed of four distinct planes, one plane of Luma, two + planes of Chroma, denoted Y, U and V, and one plane of Alpha values, all + present in this order. It is also often refered to as Planar YUV 4:2:2 with + an alpha channel. + + +

The U and V planes are sub-sampled horizontally by a factor of 2 + compared to the Y and Alpha planes, and not sub-sampled vertically.

+

Each sample in this format is 8 bits.

+

There are codedWidth * codedHeight samples + (and therefore bytes) in the Y and Alpha planes, arranged starting at the + top left of the image, in codedHeight rows of + codedWidth samples.

+

The U and V planes have codedHeight rows. Each row has a + number of samples equal to the result of the division of + codedWidth by 2, rounded up to the nearest integer. Samples + are arranged starting at the top left of the image.

+

The visible rectangle horizontal offset + (visibleRect.x) + MUST be even.

+

I422A’s equivalent opaque format is I422.

+
I422AP10 +
+ + This format is composed of four distinct planes, one plane of Luma, two + planes of Chroma, denoted Y, U and V, and one plane of Alpha values, all + present in this order. + + +

The U and V planes are sub-sampled horizontally by a factor of 2 + compared to the Y and Alpha planes, and not sub-sampled vertically.

+

Each sample in this format is 10 bits, encoded as a 16-bit integer in + little-endian byte order.

+

There are codedWidth * codedHeight samples + in the Y and Alpha planes, arranged starting at the top left of the image, + in codedHeight rows of codedWidth samples.

+

The U and V planes have codedHeight rows. Each row has a + number of samples equal to the result of the division of + codedWidth by 2, rounded up to the nearest integer. + Samples are arranged starting at the top left of the image.

+

The visible rectangle horizontal offset + (visibleRect.x) + MUST be even.

+

I422AP10’s equivalent opaque format is I420P10.

+
I422AP12 +
+ + This format is composed of four distinct planes, one plane of Luma, two + planes of Chroma, denoted Y, U and V, and one plane of Alpha values, all + present in this order. + + +

The U and V planes are sub-sampled horizontally by a factor of 2 + compared to the Y and Alpha planes, and not sub-sampled vertically.

+

Each sample in this format is 12 bits, encoded as a 16-bit integer in + little-endian byte order.

+

There are codedWidth * codedHeight samples + in the Y and Alpha planes, arranged starting at the top left of the image, + in codedHeight rows of codedWidth samples.

+

The U and V planes have codedHeight rows. Each row has a + number of samples equal to the result of the division of + codedWidth by 2, rounded up to the nearest integer. + Samples are arranged starting at the top left of the image.

+

The visible rectangle horizontal offset + (visibleRect.x) + MUST be even.

+

I422AP10’s equivalent opaque format is I420P10.

+
I444 +
+ + This format is composed of three distinct planes, one plane of Luma and two + planes of Chroma, denoted Y, U and V, and present in this order. It is also + often refered to as Planar YUV 4:4:4. + + +

This format does not use sub-sampling.

+

Each sample in this format is 8 bits.

+

There are codedWidth * codedHeight samples + (and therefore bytes) in all three planes, arranged starting at the top left + of the image, in codedHeight rows of + codedWidth samples.

+
I444P10 +
+ + This format is composed of three distinct planes, one plane of Luma and two + planes of Chroma, denoted Y, U and V, and present in this order. + + +

This format does not use sub-sampling.

+

Each sample in this format is 10 bits, encoded as a 16-bit integer in + little-endian byte order.

+

There are codedWidth * codedHeight samples + in all three planes, arranged starting at the top left of the image, in + codedHeight rows of codedWidth samples.

+
I444P12 +
+ + This format is composed of three distinct planes, one plane of Luma and two + planes of Chroma, denoted Y, U and V, and present in this order. + + +

This format does not use sub-sampling.

+

Each sample in this format is 12 bits, encoded as a 16-bit integer in + little-endian byte order.

+

There are codedWidth * codedHeight samples + in all three planes, arranged starting at the top left of the image, in + codedHeight rows of codedWidth samples.

+
I444A +
+ + This format is composed of four distinct planes, one plane of Luma, two + planes of Chroma, denoted Y, U and V, and one plane of Alpha values, all + present in this order. + + +

This format does not use sub-sampling.

+

Each sample in this format is 8 bits.

+

There are codedWidth * codedHeight samples + (and therefore bytes) in all four planes, arranged starting at the top left + of the image, in codedHeight rows of + codedWidth samples.

+

I444A’s equivalent opaque format is I444.

+
I444AP10 +
+ + This format is composed of four distinct planes, one plane of Luma, two + planes of Chroma, denoted Y, U and V, and one plane of Alpha values, all + present in this order. + + +

This format does not use sub-sampling.

+

Each sample in this format is 10 bits, encoded as a 16-bit integer in + little-endian byte order.

+

There are codedWidth * codedHeight samples + in all four planes, arranged starting at the top left of the image, + in codedHeight rows of codedWidth samples.

+

I444AP10’s equivalent opaque format is I444P10.

+
I444AP12 +
+ + This format is composed of four distinct planes, one plane of Luma, two + planes of Chroma, denoted Y, U and V, and one plane of Alpha values, all + present in this order. + + +

This format does not use sub-sampling.

+

Each sample in this format is 12 bits, encoded as a 16-bit integer in + little-endian byte order.

+

There are codedWidth * codedHeight samples + in all four planes, arranged starting at the top left of the image, + in codedHeight rows of codedWidth samples.

+

I444AP10’s equivalent opaque format is I444P10.

+
NV12 +
+ + This format is composed of two distinct planes, one plane of Luma and then + another plane for the two Chroma components. The two planes are present in + this order, and are refered to as respectively the Y plane and the UV plane. + + +

The U and V components are sub-sampled horizontally and vertically by a + factor of 2 compared to the components in the Y planes.

+

Each sample in this format is 8 bits.

+

There are codedWidth * codedHeight samples + (and therefore bytes) in the Y and plane, arranged starting at the top left + of the image, in codedHeight rows of + codedWidth samples.

+

The UV plane is composed of interleaved U and V values, in a number of + rows equal to the result of the division of codedHeight + by 2, rounded up to the nearest integer. Each row has a number of elements + equal to the result of the division of codedWidth by 2, + rounded up to the nearest integer. Each element is composed of two Chroma + samples, the U and V samples, in that order. Samples are arranged starting + at the top left of the image.

+

The visible rectangle offset (visibleRect.x + and visibleRect.y) + MUST be even.

+
+ + An image in the NV12 pixel format that is 16 pixels wide and 10 pixels tall + will be arranged like so in memory: + + +
YYYYYYYYYYYYYYYY
+YYYYYYYYYYYYYYYY
+YYYYYYYYYYYYYYYY
+YYYYYYYYYYYYYYYY
+YYYYYYYYYYYYYYYY
+YYYYYYYYYYYYYYYY
+YYYYYYYYYYYYYYYY
+YYYYYYYYYYYYYYYY
+YYYYYYYYYYYYYYYY
+YYYYYYYYYYYYYYYY
+UVUVUVUVUVUVUVUV
+UVUVUVUVUVUVUVUV
+UVUVUVUVUVUVUVUV
+UVUVUVUVUVUVUVUV
+UVUVUVUVUVUVUVUV
+
+

All samples being linear in memory.

+
+
RGBA +
+ + This format is composed of a single plane, that encodes four components: + Red, Green, Blue, and an alpha value, present in this order. + + +

Each sample in this format is 8 bits, and each pixel is therefore 32 bits.

+

There are codedWidth * codedHeight * 4 samples + (and therefore bytes) in the single plane, arranged starting at the top + left of the image, in codedHeight rows of + codedWidth samples.

+

RGBA’s equivalent opaque format is RGBX.

+
RGBX +
+ + This format is composed of a single plane, that encodes four components: + Red, Green, Blue, and a padding value, present in this order. + + +

Each sample in this format is 8 bits. The fourth element in each pixel is to + be ignored, the image is always fully opaque.

+

There are codedWidth * codedHeight * 4 samples + (and therefore bytes) in the single plane, arranged starting at the top left + of the image, in codedHeight rows of + codedWidth samples.

+
BGRA +
+ + This format is composed of a single plane, that encodes four components: + Blue, Green, Red, and an alpha value, present in this order. + + +

Each sample in this format is 8 bits.

+

There are codedWidth * codedHeight * 4 samples + (and therefore bytes) in the single plane, arranged starting at the top left + of the image, in codedHeight rows of + codedWidth samples.

+

BGRA’s equivalent opaque format is BGRX.

+
BGRX +
+ + This format is composed of a single plane, that encodes four components: + Blue, Green, Red, and a padding value, present in this order. + + +

Each sample in this format is 8 bits. The fourth element in each pixel is to + be ignored, the image is always fully opaque.

+

There are codedWidth * codedHeight * 4 samples + (and therefore bytes) in the single plane, arranged starting at the top left + of the image, in codedHeight rows of + codedWidth samples.

+
+

9.9. Video Color Space Interface

+
[Exposed=(Window,DedicatedWorker)]
+interface VideoColorSpace {
+  constructor(optional VideoColorSpaceInit init = {});
+
+  readonly attribute VideoColorPrimaries? primaries;
+  readonly attribute VideoTransferCharacteristics? transfer;
+  readonly attribute VideoMatrixCoefficients? matrix;
+  readonly attribute boolean? fullRange;
+
+  [Default] VideoColorSpaceInit toJSON();
+};
+
+dictionary VideoColorSpaceInit {
+  VideoColorPrimaries? primaries = null;
+  VideoTransferCharacteristics? transfer = null;
+  VideoMatrixCoefficients? matrix = null;
+  boolean? fullRange = null;
+};
+
+

9.9.1. Internal Slots

+
+
[[primaries]] +
+

The color primaries.

+
[[transfer]] +
+

The transfer characteristics.

+
[[matrix]] +
+

The matrix coefficients.

+
[[full range]] +
+

Indicates whether full-range color values are used.

+
+

9.9.2. Constructors

+ + + VideoColorSpace(init) + + +
    +
  1. +

    Let c be a new VideoColorSpace object, initialized as follows:

    +
      +
    1. +

      Assign init.primaries to [[primaries]].

      +
    2. +

      Assign init.transfer to [[transfer]].

      +
    3. +

      Assign init.matrix to [[matrix]].

      +
    4. +

      Assign init.fullRange to [[full range]].

      +
    +
  2. +

    Return c.

    +
+

9.9.3. Attributes

+
+
primaries, of type VideoColorPrimaries, readonly, nullable +
+

The primaries getter steps are to return the value of +[[primaries]].

+
transfer, of type VideoTransferCharacteristics, readonly, nullable +
+

The transfer getter steps are to return the value of +[[transfer]].

+
matrix, of type VideoMatrixCoefficients, readonly, nullable +
+

The matrix getter steps are to return the value of +[[matrix]].

+
fullRange, of type boolean, readonly, nullable +
+

The fullRange getter steps are to return the value of +[[full range]].

+
+

9.10. Video Color Primaries

+ +Color primaries describe the color gamut of video samples. + + +
enum VideoColorPrimaries {
+  "bt709",
+  "bt470bg",
+  "smpte170m",
+  "bt2020",
+  "smpte432",
+};
+
+
+
bt709 +
+ Color primaries used by BT.709 and sRGB, as described by [H.273] + section 8.1 table 2 value 1. + +
bt470bg +
+ Color primaries used by BT.601 PAL, as described by [H.273] + section 8.1 table 2 value 5. + +
smpte170m +
+ Color primaries used by BT.601 NTSC, as described by [H.273] + section 8.1 table 2 value 6. + +
bt2020 +
+ Color primaries used by BT.2020 and BT.2100, as described by [H.273] + section 8.1 table 2 value 9. + +
smpte432 +
+ Color primaries used by P3 D65, as described by [H.273] + section 8.1 table 2 value 12. + +
+

9.11. Video Transfer Characteristics

+ +Transfer characteristics describe the opto-electronic transfer characteristics +of video samples. + + +
enum VideoTransferCharacteristics {
+  "bt709",
+  "smpte170m",
+  "iec61966-2-1",
+  "linear",
+  "pq",
+  "hlg",
+};
+
+
+
bt709 +
+ Transfer characteristics used by BT.709, as described by [H.273] + section 8.2 table 3 value 1. + +
smpte170m +
+ Transfer characteristics used by BT.601, as described by [H.273] + section 8.2 table 3 value 6. (Functionally the same as "bt709".) + +
iec61966-2-1 +
+ Transfer characteristics used by sRGB, as described by [H.273] + section 8.2 table 3 value 13. + +
linear +
+ Transfer characteristics used by linear RGB, as described by [H.273] + section 8.2 table 3 value 8. + +
pq +
+ Transfer characteristics used by BT.2100 PQ, as described by [H.273] + section 8.2 table 3 value 16. + +
hlg +
+ Transfer characteristics used by BT.2100 HLG, as described by [H.273] + section 8.2 table 3 value 18. + +
+

9.12. Video Matrix Coefficients

+ +Matrix coefficients describe the relationship between sample component values +and color coordinates. + + +
enum VideoMatrixCoefficients {
+  "rgb",
+  "bt709",
+  "bt470bg",
+  "smpte170m",
+  "bt2020-ncl",
+};
+
+
+
rgb +
+ Matrix coefficients used by sRGB, as described by [H.273] + section 8.3 table 4 value 0. + +
bt709 +
+ Matrix coefficients used by BT.709, as described by [H.273] + section 8.3 table 4 value 1. + +
bt470bg +
+ Matrix coefficients used by BT.601 PAL, as described by [H.273] + section 8.3 table 4 value 5. + +
smpte170m +
+ Matrix coefficients used by BT.601 NTSC, as described by [H.273] + section 8.3 table 4 value 6. (Functionally the same as "bt470bg".) + +
bt2020-ncl +
+ Matrix coefficients used by BT.2020 NCL, as described by [H.273] + section 8.3 table 4 value 9. + +
+

10. Image Decoding

+

10.1. Background

+
+ + This section is non-normative. + + +

Image codec definitions are typically accompanied by a definition for a + corresponding file format. Hence image decoders often perform both duties of + unpacking (demuxing) as well as decoding the encoded image data. The WebCodecs + ImageDecoder follows this pattern, which motivates an interface design that + is notably different from that of VideoDecoder and AudioDecoder.

+

In spite of these differences, ImageDecoder uses the same + codec processing model as the other codec interfaces. Additionally, + ImageDecoder uses the VideoFrame interface to describe decoded outputs.

+
+

10.2. ImageDecoder Interface

+
[Exposed=(Window,DedicatedWorker), SecureContext]
+interface ImageDecoder {
+  constructor(ImageDecoderInit init);
+
+  readonly attribute DOMString type;
+  readonly attribute boolean complete;
+  readonly attribute Promise<undefined> completed;
+  readonly attribute ImageTrackList tracks;
+
+  Promise<ImageDecodeResult> decode(optional ImageDecodeOptions options = {});
+  undefined reset();
+  undefined close();
+
+  static Promise<boolean> isTypeSupported(DOMString type);
+};
+
+
+

10.2.1. Internal Slots

+
+
[[control message queue]] +
+

A queue of control messages to be performed upon this codec +instance. See [[control message queue]].

+
[[message queue blocked]] +
+

A boolean indicating when processing the +[[control message queue]] is blocked by a pending +control message. See [[message queue blocked]].

+
[[codec work queue]] +
+

A parallel queue used for running parallel steps that reference the +[[codec implementation]]. See [[codec work queue]].

+
[[ImageTrackList]] +
+

An ImageTrackList describing the tracks found in +[[encoded data]]

+
[[type]] +
+

A string reflecting the value of the MIME type given at +construction.

+
[[complete]] +
+

A boolean indicating whether [[encoded data]] is completely +buffered.

+
[[completed promise]] +
+

The promise used to signal when [[complete]] becomes +true.

+
[[codec implementation]] +
+

An underlying image decoder implementation provided by the User Agent. See +[[codec implementation]].

+
[[encoded data]] +
+

A byte sequence containing the encoded image data to be decoded.

+
[[prefer animation]] +
+

A boolean reflecting the value of preferAnimation given +at construction.

+
[[pending decode promises]] +
+

A list of unresolved promises returned by calls to decode().

+
[[internal selected track index]] +
+

Identifies the image track within [[encoded data]] that is +used by decoding algorithms.

+
[[tracks established]] +
+

A boolean indicating whether the track list has been established in +[[ImageTrackList]].

+
[[closed]] +
+

A boolean indicating that the ImageDecoder is in a permanent closed state +and can no longer be used.

+
[[progressive frame generations]] +
+

A mapping of frame indices to Progressive Image Frame Generations. The +values represent the Progressive Image Frame Generation for the +VideoFrame which was most recently output by a call to +decode() with the given frame index.

+
+

10.2.2. Constructor

+
+
+ImageDecoder(init) + +
+

NOTE: Calling decode() on the constructed ImageDecoder +will trigger a NotSupportedError if the User Agent does not support +type. Authors are encouraged to first check support by calling +isTypeSupported() with type. User Agents don’t have to +support any particular type.

+

When invoked, run these steps:

+
    +
  1. +

    If init is not valid ImageDecoderInit, throw a TypeError.

    +
  2. +

    If init.transfer contains more than one reference +to the same ArrayBuffer, then throw a DataCloneError DOMException.

    +
  3. +

    For each transferable in init.transfer:

    +
      +
    1. +

      If [[Detached]] internal slot is true, +then throw a DataCloneError DOMException.

      +
    +
  4. +

    Let d be a new ImageDecoder object. In the steps below, all +mentions of ImageDecoder members apply to d unless stated +otherwise.

    +
  5. +

    Assign a new queue to [[control message queue]].

    +
  6. +

    Assign false to [[message queue blocked]].

    +
  7. +

    Assign the result of starting a new parallel queue to +[[codec work queue]].

    +
  8. +

    Assign [[ImageTrackList]] a new ImageTrackList +initialized as follows:

    +
      +
    1. +

      Assign a new list to [[track list]].

      +
    2. +

      Assign -1 to [[selected index]].

      +
    +
  9. +

    Assign type to [[type]].

    +
  10. +

    Assign null to [[codec implementation]].

    +
  11. +

    If init.preferAnimation exists, assign init.preferAnimation +to the [[prefer animation]] internal slot. Otherwise, +assign 'null' to [[prefer animation]] internal slot.

    +
  12. +

    Assign a new list to [[pending decode promises]].

    +
  13. +

    Assign -1 to [[internal selected track index]].

    +
  14. +

    Assign false to [[tracks established]].

    +
  15. +

    Assign false to [[closed]].

    +
  16. +

    Assign a new map to [[progressive frame generations]].

    +
  17. +

    If init’s data member is of type +ReadableStream:

    +
      +
    1. +

      Assign a new list to [[encoded data]].

      +
    2. +

      Assign false to [[complete]]

      +
    3. +

      Queue a control message to configure the image decoder with +init.

      +
    4. +

      Process the control message queue.

      +
    5. +

      Let reader be the result of getting a reader for +data.

      +
    6. +

      In parallel, perform the Fetch Stream Data Loop on d with +reader.

      +
    +
  18. +

    Otherwise:

    +
      +
    1. +

      Assert that init.data is of type BufferSource.

      +
    2. +

      If init.transfer contains an ArrayBuffer +referenced by init.data the User Agent +MAY choose to:

      +
        +
      1. +

        Let [[encoded data]] reference bytes in data +representing an encoded image.

        +
      +
    3. +

      Otherwise:

      +
        +
      1. +

        Assign a copy of init.data to [[encoded data]].

        +
      +
    4. +

      Assign true to [[complete]].

      +
    5. +

      Resolve [[completed promise]].

      +
    6. +

      Queue a control message to configure the image decoder with +init.

      +
    7. +

      Queue a control message to decode track metadata.

      +
    8. +

      Process the control message queue.

      +
    +
  19. +

    For each transferable in init.transfer:

    +
      +
    1. +

      Perform DetachArrayBuffer +on transferable

      +
    +
  20. +

    return d.

    +
+

Running a control message to configure the image decoder +means running these steps:

+
    +
  1. +

    Let supported be the result of running the Check Type Support algorithm with init.type.

    +
  2. +

    If supported is false, run the Close ImageDecoder +algorithm with a NotSupportedError DOMException and return +"processed".

    +
  3. +

    Otherwise, assign the [[codec implementation]] internal +slot with an implementation supporting init.type

    +
  4. +

    Assign true to [[message queue blocked]].

    +
  5. +

    Enqueue the following steps to the [[codec work queue]]:

    +
      +
    1. +

      Configure [[codec implementation]] in accordance +with the values given for colorSpaceConversion, +desiredWidth, and +desiredHeight.

      +
    2. +

      Assign false to [[message queue blocked]].

      +
    3. +

      Queue a task to Process the control message queue.

      +
    +
  6. +

    Return "processed".

    +
+

Running a control message to decode track metadata means +running these steps:

+
    +
  1. +

    Enqueue the following steps to the [[codec work queue]]:

    +
      +
    1. +

      Run the Establish Tracks algorithm.

      +
    +
+
+

10.2.3. Attributes

+
+
type, of type DOMString, readonly +
+

A string reflecting the value of the MIME type given at +construction.

+

The type getter steps are to return +[[type]].

+
complete, of type boolean, readonly +
+

Indicates whether [[encoded data]] is completely buffered.

+

The complete getter steps are to return +[[complete]].

+
completed, of type Promise<undefined>, readonly +
+

The promise used to signal when complete becomes true.

+

The completed getter steps are to return +[[completed promise]].

+
tracks, of type ImageTrackList, readonly +
+

Returns a live ImageTrackList, which provides metadata +for the available tracks and a mechanism for selecting a track to decode.

+

The tracks getter steps are to return +[[ImageTrackList]].

+
+

10.2.4. Methods

+
+
decode(options) +
+

Enqueues a control message to decode the frame according to options.

+

When invoked, run these steps:

+
    +
  1. +

    If [[closed]] is true, return a Promise +rejected with an InvalidStateError DOMException.

    +
  2. +

    If [[ImageTrackList]]’s +[[selected index]] is '-1', return a Promise +rejected with an InvalidStateError DOMException.

    +
  3. +

    If options is undefined, assign a new ImageDecodeOptions to +options.

    +
  4. +

    Let promise be a new Promise.

    +
  5. +

    Append promise to [[pending decode promises]].

    +
  6. +

    Queue a control message to decode the image with options, and +promise.

    +
  7. +

    Process the control message queue.

    +
  8. +

    Return promise.

    +
+

Running a control message to decode the image means running these +steps:

+
    +
  1. +

    Enqueue the following steps to the [[codec work queue]]:

    +
      +
    1. +

      Wait for [[tracks established]] to become true.

      +
    2. +

      If options.completeFramesOnly is false and +the image is a Progressive Image for which the User Agent +supports progressive decoding, run the Decode Progressive Frame +algorithm with options.frameIndex and +promise.

      +
    3. +

      Otherwise, run the Decode Complete Frame algorithm with +options.frameIndex and promise.

      +
    +
+
reset() +
+

Immediately aborts all pending work.

+

When invoked, run the Reset ImageDecoder algorithm with +an AbortError DOMException.

+
close() +
+

Immediately aborts all pending work and releases system resources. Close is +final.

+

When invoked, run the Close ImageDecoder algorithm with +an AbortError DOMException.

+
isTypeSupported(type) +
+

Returns a promise indicating whether the provided config is supported by the +User Agent.

+

When invoked, run these steps:

+
    +
  1. +

    If type is not a valid image MIME type, return a Promise +rejected with TypeError.

    +
  2. +

    Let p be a new Promise.

    +
  3. +

    In parallel, resolve p with the result of running the +Check Type Support algorithm with type.

    +
  4. +

    Return p.

    +
+
+

10.2.5. Algorithms

+
+
Fetch Stream Data Loop (with reader) +
+

Run these steps:

+
    +
  1. +

    Let readRequest be the following read request.

    +
    +
    chunk steps, given chunk +
    +
      +
    1. +

      If [[closed]] is true, abort these steps.

      +
    2. +

      If chunk is not a Uint8Array object, queue a task to run +the Close ImageDecoder algorithm with a +DataError DOMException and abort these steps.

      +
    3. +

      Let bytes be the byte sequence represented by the Uint8Array +object.

      +
    4. +

      Append bytes to the [[encoded data]] +internal slot.

      +
    5. +

      If [[tracks established]] is false, run the +Establish Tracks algorithm.

      +
    6. +

      Otherwise, run the Update Tracks algorithm.

      +
    7. +

      Run the Fetch Stream Data Loop algorithm with reader.

      +
    +
    close steps +
    +
      +
    1. +

      Assign true to [[complete]]

      +
    2. +

      Resolve [[completed promise]].

      +
    +
    error steps +
    +
      +
    1. +

      Queue a task to run the Close ImageDecoder +algorithm with a NotReadableError DOMException

      +
    +
    +
  2. +

    Read a chunk from reader given readRequest.

    +
+
Establish Tracks +
+

Run these steps:

+
    +
  1. +

    Assert [[tracks established]] is false.

    +
  2. +

    If [[encoded data]] does not contain enough data to +determine the number of tracks:

    +
      +
    1. +

      If complete is true, queue a task to run the +Close ImageDecoder algorithm with a +InvalidStateError DOMException.

      +
    2. +

      Abort these steps.

      +
    +
  3. +

    If the number of tracks is found to be 0, queue a task to run the +Close ImageDecoder algorithm and abort these steps.

    +
  4. +

    Let newTrackList be a new list.

    +
  5. +

    For each image track found in [[encoded data]]:

    +
      +
    1. +

      Let newTrack be a new ImageTrack, initialized as follows:

      +
        +
      1. +

        Assign this to [[ImageDecoder]].

        +
      2. +

        Assign tracks to +[[ImageTrackList]].

        +
      3. +

        If image track is found to be animated, assign true to +newTrack’s [[animated]] internal slot. +Otherwise, assign false.

        +
      4. +

        If image track is found to describe a frame count, assign +that count to newTrack’s [[frame count]] +internal slot. Otherwise, assign 0.

        +

        NOTE: If this was constructed with + data as a ReadableStream, the + frameCount can change as additional bytes are + appended to [[encoded data]]. See the + Update Tracks algorithm.

        +
      5. +

        If image track is found to describe a repetition count, +assign that count to [[repetition count]] +internal slot. Otherwise, assign 0.

        +

        NOTE: A value of Infinity indicates infinite repetitions.

        +
      6. +

        Assign false to newTrack’s [[selected]] +internal slot.

        +
      +
    2. +

      Append newTrack to newTrackList.

      +
    +
  6. +

    Let selectedTrackIndex be the result of running the +Get Default Selected Track Index algorithm with +newTrackList.

    +
  7. +

    Let selectedTrack be the track at position selectedTrackIndex within +newTrackList.

    +
  8. +

    Assign true to selectedTrack’s [[selected]] internal +slot.

    +
  9. +

    Assign selectedTrackIndex to [[internal selected track index]].

    +
  10. +

    Assign true to [[tracks established]].

    +
  11. +

    Queue a task to perform the following steps:

    +
      +
    1. +

      Assign newTrackList to the tracks +[[track list]] internal slot.

      +
    2. +

      Assign selectedTrackIndex to tracks +[[selected index]].

      +
    3. +

      Resolve [[ready promise]].

      +
    +
+
Get Default Selected Track Index (with +trackList) +
+

Run these steps:

+
    +
  1. +

    If [[encoded data]] identifies a Primary Image Track:

    +
      +
    1. +

      Let primaryTrack be the ImageTrack from trackList that +describes the Primary Image Track.

      +
    2. +

      Let primaryTrackIndex be position of primaryTrack within +trackList.

      +
    3. +

      If [[prefer animation]] is null, return +primaryTrackIndex.

      +
    4. +

      If primaryTrack.animated equals +[[prefer animation]], return primaryTrackIndex.

      +
    +
  2. +

    If any ImageTracks in trackList have animated equal +to [[prefer animation]], return the position of the +earliest such track in trackList.

    +
  3. +

    Return 0.

    +
+
Update Tracks +
+

A track update struct is a struct that consists of a +track index (unsigned long) +and a frame count +(unsigned long).

+

Run these steps:

+
    +
  1. +

    Assert [[tracks established]] is true.

    +
  2. +

    Let trackChanges be a new list.

    +
  3. +

    Let trackList be a copy of tracks' +[[track list]].

    +
  4. +

    For each track in trackList:

    +
      +
    1. +

      Let trackIndex be the position of track in trackList.

      +
    2. +

      Let latestFrameCount be the frame count as indicated by +[[encoded data]] for the track corresponding to +track.

      +
    3. +

      Assert that latestFrameCount is greater than or equal to +track.frameCount.

      +
    4. +

      If latestFrameCount is greater than track.frameCount:

      +
        +
      1. +

        Let change be a track update struct whose +track index is trackIndex and +frame count is latestFrameCount.

        +
      2. +

        Append change to tracksChanges.

        +
      +
    +
  5. +

    If tracksChanges is empty, abort these steps.

    +
  6. +

    Queue a task to perform the following steps:

    +
      +
    1. +

      For each update in trackChanges:

      +
        +
      1. +

        Let updateTrack be the ImageTrack at position +update.trackIndex within tracks' +[[track list]].

        +
      2. +

        Assign update.frameCount to updateTrack’s +[[frame count]].

        +
      +
    +
+
Decode Complete Frame (with frameIndex and +promise) +
+
    +
  1. +

    Assert that [[tracks established]] is true.

    +
  2. +

    Assert that [[internal selected track index]] is not +-1.

    +
  3. +

    Let encodedFrame be the encoded frame identified by frameIndex and +[[internal selected track index]].

    +
  4. +

    Wait for any of the following conditions to be true (whichever happens +first):

    +
      +
    1. +

      [[encoded data]] contains enough bytes to +completely decode encodedFrame.

      +
    2. +

      [[encoded data]] is found to be malformed.

      +
    3. +

      complete is true.

      +
    4. +

      [[closed]] is true.

      +
    +
  5. +

    If [[encoded data]] is found to be malformed, run the +Fatally Reject Bad Data algorithm and abort these +steps.

    +
  6. +

    If [[encoded data]] does not contain enough bytes to +completely decode encodedFrame, run the +Reject Infeasible Decode algorithm with promise and +abort these steps.

    +
  7. +

    Attempt to use [[codec implementation]] to decode +encodedFrame.

    +
  8. +

    If decoding produces an error, run the +Fatally Reject Bad Data algorithm and abort these +steps.

    +
  9. +

    If [[progressive frame generations]] contains an entry +keyed by frameIndex, remove the entry from the map.

    +
  10. +

    Let output be the decoded image data emitted by +[[codec implementation]] corresponding to +encodedFrame.

    +
  11. +

    Let decodeResult be a new ImageDecodeResult initialized as +follows:

    +
      +
    1. +

      Assign 'true' to complete.

      +
    2. +

      Let duration be the presentation duration for output as +described by encodedFrame. If encodedFrame does not have a +duration, assign null to duration.

      +
    3. +

      Let timestamp be the presentation timestamp for output as +described by encodedFrame. If encodedFrame does not have a +timestamp:

      +
        +
      1. +

        If encodedFrame is a still image assign 0 to timestamp.

        +
      2. +

        If encodedFrame is a constant rate animated image and +duration is not null, assign |frameIndex| * |duration| to +timestamp.

        +
      3. +

        If a timestamp can otherwise be trivially generated from +metadata without further decoding, assign that to timestamp.

        +
      4. +

        Otherwise, assign 0 to timestamp.

        +
      +
    4. +

      If [[encoded data]] contains orientation metadata +describe it as rotation and flip, otherwise set rotation to 0 +and flip to false.

      +
    5. +

      Assign image with the result of running the +Create a VideoFrame algorithm with output, timestamp, +duration, rotation, and flip.

      +
    +
  12. +

    Run the Resolve Decode algorithm with promise and +decodeResult.

    +
+
Decode Progressive Frame (with frameIndex and +promise) +
+
    +
  1. +

    Assert that [[tracks established]] is true.

    +
  2. +

    Assert that [[internal selected track index]] is not +-1.

    +
  3. +

    Let encodedFrame be the encoded frame identified by frameIndex and +[[internal selected track index]].

    +
  4. +

    Let lastFrameGeneration be null.

    +
  5. +

    If [[progressive frame generations]] contains a map +entry with the key frameIndex, assign the value of the map entry to +lastFrameGeneration.

    +
  6. +

    Wait for any of the following conditions to be true (whichever happens +first):

    +
      +
    1. +

      [[encoded data]] contains enough bytes to decode +encodedFrame to produce an output whose Progressive Image Frame Generation exceeds lastFrameGeneration.

      +
    2. +

      [[encoded data]] is found to be malformed.

      +
    3. +

      complete is true.

      +
    4. +

      [[closed]] is true.

      +
    +
  7. +

    If [[encoded data]] is found to be malformed, run the +Fatally Reject Bad Data algorithm and abort these +steps.

    +
  8. +

    Otherwise, if [[encoded data]] does not contain enough +bytes to decode encodedFrame to produce an output whose +Progressive Image Frame Generation exceeds lastFrameGeneration, +run the Reject Infeasible Decode algorithm with +promise and abort these steps.

    +
  9. +

    Attempt to use [[codec implementation]] to decode +encodedFrame.

    +
  10. +

    If decoding produces an error, run the +Fatally Reject Bad Data algorithm and abort these +steps.

    +
  11. +

    Let output be the decoded image data emitted by +[[codec implementation]] corresponding to +encodedFrame.

    +
  12. +

    Let decodeResult be a new ImageDecodeResult.

    +
  13. +

    If output is the final full-detail progressive output corresponding +to encodedFrame:

    +
      +
    1. +

      Assign true to decodeResult’s complete.

      +
    2. +

      If [[progressive frame generations]] contains an +entry keyed by frameIndex, remove the entry from the map.

      +
    +
  14. +

    Otherwise:

    +
      +
    1. +

      Assign false to decodeResult’s complete.

      +
    2. +

      Let frameGeneration be the Progressive Image Frame Generation +for output.

      +
    3. +

      Add a new entry to [[progressive frame generations]] with key frameIndex and value frameGeneration.

      +
    +
  15. +

    Let duration be the presentation duration for output as +described by encodedFrame. If encodedFrame does not describe a +duration, assign null to duration.

    +
  16. +

    Let timestamp be the presentation timestamp for output as +described by encodedFrame. If encodedFrame does not have a +timestamp:

    +
      +
    1. +

      If encodedFrame is a still image assign 0 to timestamp.

      +
    2. +

      If encodedFrame is a constant rate animated image and +duration is not null, assign |frameIndex| * |duration| to +timestamp.

      +
    3. +

      If a timestamp can otherwise be trivially generated from +metadata without further decoding, assign that to timestamp.

      +
    4. +

      Otherwise, assign 0 to timestamp.

      +
    +
  17. +

    If [[encoded data]] contains orientation metadata +describe it as rotation and flip, otherwise set rotation to 0 +and flip to false.

    +
  18. +

    Assign image with the result of running the +Create a VideoFrame algorithm with output, timestamp, +duration, rotation, and flip.

    +
  19. +

    Remove promise from [[pending decode promises]].

    +
  20. +

    Resolve promise with decodeResult.

    +
+
Resolve Decode (with promise and result) +
+
    +
  1. +

    Queue a task to perform these steps:

    +
      +
    1. +

      If [[closed]], abort these steps.

      +
    2. +

      Assert that promise is an element of +[[pending decode promises]].

      +
    3. +

      Remove promise from [[pending decode promises]].

      +
    4. +

      Resolve promise with result.

      +
    +
+
Reject Infeasible Decode (with promise) +
+
    +
  1. +

    Assert that complete is true or +[[closed]] is true.

    +
  2. +

    If complete is true, let exception be a + RangeError. Otherwise, let exception be an + InvalidStateError DOMException.

    +
  3. +

    Queue a task to perform these steps:

    +
      +
    1. +

      If [[closed]], abort these steps.

      +
    2. +

      Assert that promise is an element of +[[pending decode promises]].

      +
    3. +

      Remove promise from [[pending decode promises]].

      +
    4. +

      Reject promise with exception.

      +
    +
+
Fatally Reject Bad Data +
+
    +
  1. +

    Queue a task to perform these steps:

    +
      +
    1. +

      If [[closed]], abort these steps.

      +
    2. +

      Run the Close ImageDecoder algorithm with an +EncodingError DOMException.

      +
    +
+
Check Type Support (with type) +
+
    +
  1. +

    If the User Agent can provide a codec to support decoding type, return +true.

    +
  2. +

    Otherwise, return false.

    +
+
Reset ImageDecoder (with exception) +
+
    +
  1. +

    Signal [[codec implementation]] to abort any active +decoding operation.

    +
  2. +

    For each decodePromise in +[[pending decode promises]]:

    +
      +
    1. +

      Reject decodePromise with exception.

      +
    2. +

      Remove decodePromise from +[[pending decode promises]].

      +
    +
+
Close ImageDecoder (with exception) +
+
    +
  1. +

    Run the Reset ImageDecoder algorithm with exception.

    +
  2. +

    Assign true to [[closed]].

    +
  3. +

    Clear [[codec implementation]] and release associated +system resources.

    +
  4. +

    If [[ImageTrackList]] is empty, reject +[[ready promise]] with exception. Otherwise +perform these steps,

    +
      +
    1. +

      Remove all entries from [[ImageTrackList]].

      +
    2. +

      Assign -1 to [[ImageTrackList]]’s +[[selected index]].

      +
    +
  5. +

    If [[complete]] is false resolve +[[completed promise]] with exception.

    +
+
+

10.3. ImageDecoderInit Interface

+
typedef (AllowSharedBufferSource or ReadableStream) ImageBufferSource;
+dictionary ImageDecoderInit {
+  required DOMString type;
+  required ImageBufferSource data;
+  ColorSpaceConversion colorSpaceConversion = "default";
+  [EnforceRange] unsigned long desiredWidth;
+  [EnforceRange] unsigned long desiredHeight;
+  boolean preferAnimation;
+  sequence<ArrayBuffer> transfer = [];
+};
+
+
+

To determine if an ImageDecoderInit is a valid ImageDecoderInit, +run these steps:

+
    +
  1. +

    If type is not a valid image MIME type, return false.

    +
  2. +

    If data is of type ReadableStream and the ReadableStream is +disturbed or locked, return false.

    +
  3. +

    If data is of type BufferSource:

    +
      +
    1. +

      If data is [detached], return false.

      +
    2. +

      If data is empty, return false.

      +
    +
  4. +

    If desiredWidth exists and +desiredHeight does not exist, return false.

    +
  5. +

    If desiredHeight exists and +desiredWidth does not exist, return false.

    +
  6. +

    Return true.

    +
+

A valid image MIME type is a string that is a valid MIME type string and for which the type, per Section 8.3.1 of [RFC9110], is +image.

+
+
type, of type DOMString +
+

String containing the MIME type of the image file to be decoded.

+
data, of type ImageBufferSource +
+

BufferSource or ReadableStream of bytes representing an encoded +image file as described by type.

+
colorSpaceConversion, of type ColorSpaceConversion, defaulting to "default" +
+

Controls whether decoded outputs' color space is converted or ignored, as +defined by colorSpaceConversion in +ImageBitmapOptions.

+
desiredWidth, of type unsigned long +
+

Indicates a desired width for decoded outputs. Implementation is best +effort; decoding to a desired width MAY not be +supported by all formats/ decoders.

+
desiredHeight, of type unsigned long +
+

Indicates a desired height for decoded outputs. Implementation is best +effort; decoding to a desired height MAY not be +supported by all formats/decoders.

+
preferAnimation, of type boolean +
+

For images with multiple tracks, this indicates whether the +initial track selection SHOULD prefer an animated +track.

+

NOTE: See the Get Default Selected Track Index algorithm.

+
+

10.4. ImageDecodeOptions Interface

+
dictionary ImageDecodeOptions {
+  [EnforceRange] unsigned long frameIndex = 0;
+  boolean completeFramesOnly = true;
+};
+
+
+
+
frameIndex, of type unsigned long, defaulting to 0 +
+

The index of the frame to decode.

+
completeFramesOnly, of type boolean, defaulting to true +
+

For Progressive Images, a value of false indicates that the decoder +MAY output an image with +reduced detail. Each subsequent call to decode() for the +same frameIndex will resolve to produce an image with +a higher Progressive Image Frame Generation (more image detail) than the +previous call, until finally the full-detail image is produced.

+

If completeFramesOnly is assigned true, or if the +image is not a Progressive Image, or if the User Agent does not support +progressive decoding for the given image type, calls to +decode() will only resolve once the full detail image is +decoded.

+
+ + NOTE: For Progressive Images, setting + completeFramesOnly to false can be used to + offer users a preview an image that is still being buffered from the + network (via the data ReadableStream). + + +

Upon decoding the full detail image, the ImageDecodeResult’s + complete will be set to true.

+
+
+

10.5. ImageDecodeResult Interface

+
dictionary ImageDecodeResult {
+  required VideoFrame image;
+  required boolean complete;
+};
+
+
+
+
image, of type VideoFrame +
+

The decoded image.

+
complete, of type boolean +
+

Indicates whether image contains the final full-detail +output.

+

NOTE: complete is always true when + decode() is invoked with + completeFramesOnly set to true.

+
+

10.6. ImageTrackList Interface

+
[Exposed=(Window,DedicatedWorker), SecureContext]
+interface ImageTrackList {
+  getter ImageTrack (unsigned long index);
+
+  readonly attribute Promise<undefined> ready;
+  readonly attribute unsigned long length;
+  readonly attribute long selectedIndex;
+  readonly attribute ImageTrack? selectedTrack;
+};
+
+
+

10.6.1. Internal Slots

+
+
[[ready promise]] +
+

The promise used to signal when the ImageTrackList has been populated +with ImageTracks.

+

NOTE: ImageTrack frameCount can receive subsequent + updates until complete is true.

+
[[track list]] +
+

The list of ImageTracks describe by this ImageTrackList.

+
[[selected index]] +
+

The index of the selected track in [[track list]]. A +value of -1 indicates that no track is selected. The initial value +is -1.

+
+

10.6.2. Attributes

+
+
ready, of type Promise<undefined>, readonly +
+

The ready getter steps are to return the +[[ready promise]].

+
length, of type unsigned long, readonly +
+

The length getter steps are to return the length of +[[track list]].

+
selectedIndex, of type long, readonly +
+

The selectedIndex getter steps are to return +[[selected index]];

+
selectedTrack, of type ImageTrack, readonly, nullable +
+

The selectedTrack getter steps are:

+
    +
  1. +

    If [[selected index]] is -1, return null.

    +
  2. +

    Otherwise, return the ImageTrack from [[track list]] +at the position indicated by [[selected index]].

    +
+
+

10.7. ImageTrack Interface

+
[Exposed=(Window,DedicatedWorker), SecureContext]
+interface ImageTrack {
+  readonly attribute boolean animated;
+  readonly attribute unsigned long frameCount;
+  readonly attribute unrestricted float repetitionCount;
+  attribute boolean selected;
+};
+
+
+

10.7.1. Internal Slots

+
+
[[ImageDecoder]] +
+

The ImageDecoder instance that constructed this ImageTrack.

+
[[ImageTrackList]] +
+

The ImageTrackList instance that lists this ImageTrack.

+
[[animated]] +
+

Indicates whether this track contains an animated image with multiple +frames.

+
[[frame count]] +
+

The number of frames in this track.

+
[[repetition count]] +
+

The number of times the animation is intended to repeat.

+
[[selected]] +
+

Indicates whether this track is selected for decoding.

+
+

10.7.2. Attributes

+
+
animated, of type boolean, readonly +
+

The animated getter steps are to return the value of +[[animated]].

+

NOTE: This attribute provides an early indication that + frameCount will ultimately exceed 0 for images where the + frameCount starts at 0 and later increments as new + chunks of the ReadableStream data arrive.

+
frameCount, of type unsigned long, readonly +
+

The frameCount getter steps are to return the value of +[[frame count]].

+
repetitionCount, of type unrestricted float, readonly +
+

The repetitionCount getter steps are to return the value of +[[repetition count]].

+
selected, of type boolean +
+

The selected getter steps are to return the value of +[[selected]].

+

The selected setter steps are:

+
    +
  1. +

    If [[ImageDecoder]]’s [[closed]] slot is +true, abort these steps.

    +
  2. +

    Let newValue be the given value.

    +
  3. +

    If newValue equals [[selected]], abort these steps.

    +
  4. +

    Assign newValue to [[selected]].

    +
  5. +

    Let parentTrackList be [[ImageTrackList]]

    +
  6. +

    Let oldSelectedIndex be the value of parentTrackList +[[selected index]].

    +
  7. +

    If oldSelectedIndex is not -1:

    +
      +
    1. +

      Let oldSelectedTrack be the ImageTrack in parentTrackList +[[track list]] at the position of +oldSelectedIndex.

      +
    2. +

      Assign false to oldSelectedTrack [[selected]]

      +
    +
  8. +

    If newValue is true, let selectedIndex be the index of this +ImageTrack within parentTrackList’s +[[track list]]. Otherwise, let selectedIndex be +-1.

    +
  9. +

    Assign selectedIndex to parentTrackList +[[selected index]].

    +
  10. +

    Run the Reset ImageDecoder algorithm on +[[ImageDecoder]].

    +
  11. +

    Queue a control message to [[ImageDecoder]]’s +control message queue to update the internal selected track +index with selectedIndex.

    +
  12. +

    Process the control message queue belonging to +[[ImageDecoder]].

    +
+

Running a control message to update the internal selected track index +means running these steps:

+
    +
  1. +

    Enqueue the following steps to [[ImageDecoder]]’s +[[codec work queue]]:

    +
      +
    1. +

      Assign selectedIndex to +[[internal selected track index]].

      +
    2. +

      Remove all entries from +[[progressive frame generations]].

      +
    +
+
+

11. Resource Reclamation

+

When resources are constrained, a User Agent MAY +proactively reclaim codecs. This is particularly true in the case where hardware +codecs are limited, and shared accross web pages or platform apps.

+

To reclaim a codec, a User Agent MUST run +the appropriate close algorithm (amongst Close AudioDecoder, +Close AudioEncoder, Close VideoDecoder and Close VideoEncoder) with +a QuotaExceededError.

+

The rules governing when a codec may be reclaimed depend on whether the codec is +an active or inactive codec and/or a background codec.

+

An active codec is a codec that has +made progress on the [[codec work queue]] in the past 10 seconds.

+

NOTE: A reliable sign of the working queue’s progress is a call to output() + callback.

+

An inactive codec is any codec that does +not meet the definition of an active codec.

+

A background codec is a codec whose +ownerDocument (or owner set’s Document, for codecs in workers) has a +hidden attribute equal to true.

+

A User Agent MUST only reclaim a codec that is +either an inactive codec, a background codec, or both. A User Agent +MUST NOT reclaim a codec that is both active and in +the foreground, i.e. not a background codec.

+

Additionally, User Agents MUST NOT reclaim an active +background codec if it is:

+ +

12. Security Considerations

+
+ +This section is non-normative. + + +

The primary security impact is that features of this API make it easier for an +attacker to exploit vulnerabilities in the underlying platform codecs. +Additionally, new abilities to configure and control the codecs can allow for +new exploits that rely on a specific configuration and/or sequence of control +operations.

+

Platform codecs are historically an internal detail of APIs like +HTMLMediaElement, [WEBAUDIO], and [WebRTC]. In this way, it has always +been possible to attack the underlying codecs by using malformed media +files/streams and invoking the various API control methods.

+

For example, you can send any stream to a decoder by first wrapping that stream +in a media container (e.g. mp4) and setting that as the src +of an HTMLMediaElement. You can then cause the underlying video decoder to +be reset() by setting a new value for <video>.currentTime.

+

WebCodecs makes such attacks easier by exposing low level control when inputs +are provided and direct access to invoke the codec control methods. This also +affords attackers the ability to invoke sequences of control methods that were +not previously possible via the higher level APIs.

+

The Working Group expects User Agents to mitigate this risk by extensively +fuzzing their implementation with random inputs and control method invocations. +Additionally, User Agents are encouraged to isolate their underlying codecs in +processes with restricted privileges (sandbox) as a barrier against successful +exploits being able to read user data.

+

An additional concern is exposing the underlying codecs to input mutation race +conditions, such as allowing a site to mutate a codec input or output while +the underlying codec is still operating on that data. This concern is mitigated +by ensuring that input and output interfaces are immutable.

+
+

13. Privacy Considerations

+
+ +This section is non-normative. + + +

The primary privacy impact is an increased ability to fingerprint users by +querying for different codec capabilities to establish a codec feature profile. +Much of this profile is already exposed by existing APIs. Such profiles are very +unlikely to be uniquely identifying, but can be used with other metrics to +create a fingerprint.

+

An attacker can accumulate a codec feature profile by calling +IsConfigSupported() methods with a number of different configuration +dictionaries. Similarly, an attacker can attempt to configure() a codec with +different configuration dictionaries and observe which configurations are +accepted.

+

Attackers can also use existing APIs to establish much of the codec feature +profile. For example, the [media-capabilities] decodingInfo() API +describes what types of decoders are supported and its powerEfficient +attribute can signal when a decoder uses hardware acceleration. Similarly, the +[WebRTC] getCapabilities() API can be used to determine what +types of encoders are supported and the getStats() API can +be used to determine when an encoder uses hardware acceleration. WebCodecs will +expose some additional information in the form of low level codec features.

+

A codec feature profile alone is unlikely to be uniquely identifying. Underlying +codecs are often implemented entirely in software (be it part of the User Agent +binary or part of the operating system), such that all users who run that +software will have a common set of capabilities. Additionally, underlying codecs +are often implemented with hardware acceleration, but such hardware is mass +produced and devices of a particular class and manufacture date (e.g. flagship +phones manufactured in 2020) will often have common capabilities. There will be +outliers (some users can be running outdated versions of software codecs or use +a rare mix of custom assembled hardware), but most of the time a given codec +feature profile is shared by a large group of users.

+

Segmenting groups of users by codec feature profile still amounts to a bit of +entropy that can be combined with other metrics to uniquely identify a user. +User Agents MAY partially mitigate this by returning an +error whenever a site attempts to exhaustively probe for codec capabilities. +Additionally, User Agents MAY implement a "privacy +budget", which depletes as authors use WebCodecs and other identifying APIs. +Upon exhaustion of the privacy budget, codec capabilities could be reduced to a +common baseline or prompt for user approval.

+
+

14. Best Practices for Authors Using WebCodecs

+
+ + This section is non-normative. + + +

While WebCodecs internally operates on background threads, authors working with + realtime media or in contended main thread environments are encouraged to ensure their + media pipelines operate in worker contexts entirely independent of the main + thread where possible. For example, realtime media processing of VideoFrames + are generally to be done in a worker context.

+

The main thread has significant potential for high contention and jank that can + go unnoticed in development, yet degrade inconsistently across devices and User + Agents in the field -- potentially dramatically impacting the end user + experience. Ensuring the media pipeline is decoupled from the main thread helps + provide a smooth experience for end users.

+

Authors using the main thread for their media pipeline ought to be sure of + their target frame rates, main thread workload, how their application will be + embedded, and the class of devices their users will be using.

+
+

15. Acknowledgements

+

The editors would like to thank Alex Russell, Chris Needham, Dale Curtis, Dan +Sanders, Eugene Zemtsov, Francois Daoust, Guido Urdaneta, Harald Alvestrand, +Jan-Ivar Bruaroey, Jer Noble, Mark Foltz, Peter Thatcher, Steve Anton, Matt +Wolenetz, Rijubrata Bhaumik, Thomas Guilbert, Tuukka Toivonen, and Youenn Fablet +for their contributions to this specification. Thank you also to the many +others who contributed to the specification, including through their +participation on the mailing list and in the issues.

+

The Working Group dedicates this specification to our colleague Bernard Aboba.

+
+
+

Conformance

+

Document conventions

+

Conformance requirements are expressed + with a combination of descriptive assertions + and RFC 2119 terminology. + The key words “MUST”, “MUST NOT”, “REQUIRED”, “SHALL”, “SHALL NOT”, “SHOULD”, “SHOULD NOT”, “RECOMMENDED”, “MAY”, and “OPTIONAL” + in the normative parts of this document + are to be interpreted as described in RFC 2119. + However, for readability, + these words do not appear in all uppercase letters in this specification. + +

+

All of the text of this specification is normative + except sections explicitly marked as non-normative, examples, and notes. [RFC2119] + +

+

Examples in this specification are introduced with the words “for example” + or are set apart from the normative text + with class="example", + like this: + +

+
+ + +

This is an example of an informative example. +

+
+

Informative notes begin with the word “Note” + and are set apart from the normative text + with class="note", + like this: + +

+

Note, this is an informative note.

+
+

Conformant Algorithms

+

Requirements phrased in the imperative as part of algorithms + (such as "strip any leading space characters" + or "return false and abort these steps") + are to be interpreted with the meaning of the key word + ("must", "should", "may", etc) + used in introducing the algorithm. + +

+

Conformance requirements phrased as algorithms or specific steps + can be implemented in any manner, + so long as the end result is equivalent. + In particular, the algorithms defined in this specification + are intended to be easy to understand + and are not intended to be performant. + Implementers are encouraged to optimize. +

+
+
+ +

Index

+

Terms defined by this specification

+ +

Terms defined by reference

+ +

References

+

Normative References

+
+
[CSS-IMAGES-3] +
Tab Atkins Jr.; Elika Etemad; Lea Verou. CSS Images Module Level 3. URL: https://drafts.csswg.org/css-images-3/ +
[DOM] +
Anne van Kesteren. DOM Standard. Living Standard. URL: https://dom.spec.whatwg.org/ +
[ECMASCRIPT] +
ECMAScript Language Specification. URL: https://tc39.es/ecma262/multipage/ +
[GEOMETRY-1] +
Sebastian Zartner; Yehonatan Daniv. Geometry Interfaces Module Level 1. URL: https://drafts.csswg.org/geometry/ +
[HTML] +
Anne van Kesteren; et al. HTML Standard. Living Standard. URL: https://html.spec.whatwg.org/multipage/ +
[INFRA] +
Anne van Kesteren; Domenic Denicola. Infra Standard. Living Standard. URL: https://infra.spec.whatwg.org/ +
[MEDIASTREAM-RECORDING] +
Miguel Casas-sanchez. MediaStream Recording. URL: https://w3c.github.io/mediacapture-record/ +
[MIMESNIFF] +
Gordon P. Hemsley. MIME Sniffing Standard. Living Standard. URL: https://mimesniff.spec.whatwg.org/ +
[MST-CONTENT-HINT] +
Harald Alvestrand. MediaStreamTrack Content Hints. URL: https://w3c.github.io/mst-content-hint/ +
[RFC2119] +
S. Bradner. Key words for use in RFCs to Indicate Requirement Levels. March 1997. Best Current Practice. URL: https://datatracker.ietf.org/doc/html/rfc2119 +
[STREAMS] +
Adam Rice; et al. Streams Standard. Living Standard. URL: https://streams.spec.whatwg.org/ +
[SVG2] +
Amelia Bellamy-Royds; et al. Scalable Vector Graphics (SVG) 2. URL: https://w3c.github.io/svgwg/svg2-draft/ +
[WEBIDL] +
Edgar Chen; Timothy Gu. Web IDL Standard. Living Standard. URL: https://webidl.spec.whatwg.org/ +
[WebRTC-SVC] +
Bernard Aboba. Scalable Video Coding (SVC) Extension for WebRTC. URL: https://w3c.github.io/webrtc-svc/ +
+

Informative References

+
+
[H.273] +
Coding-independent code points for video signal type identification. December 2016. URL: https://www.itu.int/rec/T-REC-H.273/en +
[MEDIA-CAPABILITIES] +
Jean-Yves Avenard; Mark Foltz. Media Capabilities. URL: https://w3c.github.io/media-capabilities/ +
[MEDIA-SOURCE-2] +
Jean-Yves Avenard; Mark Watson. Media Source Extensions™. URL: https://w3c.github.io/media-source/ +
[RFC6381] +
R. Gellens; D. Singer; P. Frojdh. The 'Codecs' and 'Profiles' Parameters for "Bucket" Media Types. August 2011. Proposed Standard. URL: https://www.rfc-editor.org/rfc/rfc6381 +
[RFC9110] +
R. Fielding, Ed.; M. Nottingham, Ed.; J. Reschke, Ed.. HTTP Semantics. June 2022. Internet Standard. URL: https://httpwg.org/specs/rfc9110.html +
[WEBAUDIO] +
Paul Adenot; Hongchan Choi. Web Audio API 1.1. URL: https://webaudio.github.io/web-audio-api/ +
[WEBCODECS-CODEC-REGISTRY] +
Paul Adenot; Eugene Zemtsov. WebCodecs Codec Registry. URL: https://w3c.github.io/webcodecs/codec_registry.html +
[WEBCODECS-VIDEO-FRAME-METADATA-REGISTRY] +
Youenn Fablet. WebCodecs VideoFrame Metadata Registry. ED. URL: https://w3c.github.io/webcodecs/video_frame_metadata_registry.html +
[WebRTC] +
Cullen Jennings; et al. WebRTC: Real-Time Communication in Browsers. URL: https://w3c.github.io/webrtc-pc/ +
+

IDL Index

+
[Exposed=(Window,DedicatedWorker), SecureContext]
+interface AudioDecoder : EventTarget {
+  constructor(AudioDecoderInit init);
+
+  readonly attribute CodecState state;
+  readonly attribute unsigned long decodeQueueSize;
+  attribute EventHandler ondequeue;
+
+  undefined configure(AudioDecoderConfig config);
+  undefined decode(EncodedAudioChunk chunk);
+  Promise<undefined> flush();
+  undefined reset();
+  undefined close();
+
+  static Promise<AudioDecoderSupport> isConfigSupported(AudioDecoderConfig config);
+};
+
+dictionary AudioDecoderInit {
+  required AudioDataOutputCallback output;
+  required WebCodecsErrorCallback error;
+};
+
+callback AudioDataOutputCallback = undefined(AudioData output);
+
+[Exposed=(Window,DedicatedWorker), SecureContext]
+interface VideoDecoder : EventTarget {
+  constructor(VideoDecoderInit init);
+
+  readonly attribute CodecState state;
+  readonly attribute unsigned long decodeQueueSize;
+  attribute EventHandler ondequeue;
+
+  undefined configure(VideoDecoderConfig config);
+  undefined decode(EncodedVideoChunk chunk);
+  Promise<undefined> flush();
+  undefined reset();
+  undefined close();
+
+  static Promise<VideoDecoderSupport> isConfigSupported(VideoDecoderConfig config);
+};
+
+dictionary VideoDecoderInit {
+  required VideoFrameOutputCallback output;
+  required WebCodecsErrorCallback error;
+};
+
+callback VideoFrameOutputCallback = undefined(VideoFrame output);
+
+[Exposed=(Window,DedicatedWorker), SecureContext]
+interface AudioEncoder : EventTarget {
+  constructor(AudioEncoderInit init);
+
+  readonly attribute CodecState state;
+  readonly attribute unsigned long encodeQueueSize;
+  attribute EventHandler ondequeue;
+
+  undefined configure(AudioEncoderConfig config);
+  undefined encode(AudioData data);
+  Promise<undefined> flush();
+  undefined reset();
+  undefined close();
+
+  static Promise<AudioEncoderSupport> isConfigSupported(AudioEncoderConfig config);
+};
+
+dictionary AudioEncoderInit {
+  required EncodedAudioChunkOutputCallback output;
+  required WebCodecsErrorCallback error;
+};
+
+callback EncodedAudioChunkOutputCallback =
+    undefined (EncodedAudioChunk output,
+               optional EncodedAudioChunkMetadata metadata = {});
+
+dictionary EncodedAudioChunkMetadata {
+  AudioDecoderConfig decoderConfig;
+};
+
+[Exposed=(Window,DedicatedWorker), SecureContext]
+interface VideoEncoder : EventTarget {
+  constructor(VideoEncoderInit init);
+
+  readonly attribute CodecState state;
+  readonly attribute unsigned long encodeQueueSize;
+  attribute EventHandler ondequeue;
+
+  undefined configure(VideoEncoderConfig config);
+  undefined encode(VideoFrame frame, optional VideoEncoderEncodeOptions options = {});
+  Promise<undefined> flush();
+  undefined reset();
+  undefined close();
+
+  static Promise<VideoEncoderSupport> isConfigSupported(VideoEncoderConfig config);
+};
+
+dictionary VideoEncoderInit {
+  required EncodedVideoChunkOutputCallback output;
+  required WebCodecsErrorCallback error;
+};
+
+callback EncodedVideoChunkOutputCallback =
+    undefined (EncodedVideoChunk chunk,
+               optional EncodedVideoChunkMetadata metadata = {});
+
+dictionary EncodedVideoChunkMetadata {
+  VideoDecoderConfig decoderConfig;
+  SvcOutputMetadata svc;
+  BufferSource alphaSideData;
+};
+
+dictionary SvcOutputMetadata {
+  unsigned long temporalLayerId;
+};
+
+dictionary AudioDecoderSupport {
+  boolean supported;
+  AudioDecoderConfig config;
+};
+
+dictionary VideoDecoderSupport {
+  boolean supported;
+  VideoDecoderConfig config;
+};
+
+dictionary AudioEncoderSupport {
+  boolean supported;
+  AudioEncoderConfig config;
+};
+
+dictionary VideoEncoderSupport {
+  boolean supported;
+  VideoEncoderConfig config;
+};
+
+dictionary AudioDecoderConfig {
+  required DOMString codec;
+  [EnforceRange] required unsigned long sampleRate;
+  [EnforceRange] required unsigned long numberOfChannels;
+  AllowSharedBufferSource description;
+};
+
+dictionary VideoDecoderConfig {
+  required DOMString codec;
+  AllowSharedBufferSource description;
+  [EnforceRange] unsigned long codedWidth;
+  [EnforceRange] unsigned long codedHeight;
+  [EnforceRange] unsigned long displayAspectWidth;
+  [EnforceRange] unsigned long displayAspectHeight;
+  VideoColorSpaceInit colorSpace;
+  HardwareAcceleration hardwareAcceleration = "no-preference";
+  boolean optimizeForLatency;
+  double rotation = 0;
+  boolean flip = false;
+};
+
+dictionary AudioEncoderConfig {
+  required DOMString codec;
+  [EnforceRange] required unsigned long sampleRate;
+  [EnforceRange] required unsigned long numberOfChannels;
+  [EnforceRange] unsigned long long bitrate;
+  BitrateMode bitrateMode = "variable";
+};
+
+dictionary VideoEncoderConfig {
+  required DOMString codec;
+  [EnforceRange] required unsigned long width;
+  [EnforceRange] required unsigned long height;
+  [EnforceRange] unsigned long displayWidth;
+  [EnforceRange] unsigned long displayHeight;
+  [EnforceRange] unsigned long long bitrate;
+  double framerate;
+  HardwareAcceleration hardwareAcceleration = "no-preference";
+  AlphaOption alpha = "discard";
+  DOMString scalabilityMode;
+  VideoEncoderBitrateMode bitrateMode = "variable";
+  LatencyMode latencyMode = "quality";
+  DOMString contentHint;
+};
+
+enum HardwareAcceleration {
+  "no-preference",
+  "prefer-hardware",
+  "prefer-software",
+};
+
+enum AlphaOption {
+  "keep",
+  "discard",
+};
+
+enum LatencyMode {
+  "quality",
+  "realtime"
+};
+
+dictionary VideoEncoderEncodeOptions {
+  boolean keyFrame = false;
+};
+
+enum VideoEncoderBitrateMode {
+  "constant",
+  "variable",
+  "quantizer"
+};
+
+enum CodecState {
+  "unconfigured",
+  "configured",
+  "closed"
+};
+
+callback WebCodecsErrorCallback = undefined(DOMException error);
+
+[Exposed=(Window,DedicatedWorker), Serializable]
+interface EncodedAudioChunk {
+  constructor(EncodedAudioChunkInit init);
+  readonly attribute EncodedAudioChunkType type;
+  readonly attribute long long timestamp;          // microseconds
+  readonly attribute unsigned long long? duration; // microseconds
+  readonly attribute unsigned long byteLength;
+
+  undefined copyTo(AllowSharedBufferSource destination);
+};
+
+dictionary EncodedAudioChunkInit {
+  required EncodedAudioChunkType type;
+  [EnforceRange] required long long timestamp;    // microseconds
+  [EnforceRange] unsigned long long duration;     // microseconds
+  required AllowSharedBufferSource data;
+  sequence<ArrayBuffer> transfer = [];
+};
+
+enum EncodedAudioChunkType {
+    "key",
+    "delta",
+};
+
+[Exposed=(Window,DedicatedWorker), Serializable]
+interface EncodedVideoChunk {
+  constructor(EncodedVideoChunkInit init);
+  readonly attribute EncodedVideoChunkType type;
+  readonly attribute long long timestamp;             // microseconds
+  readonly attribute unsigned long long? duration;    // microseconds
+  readonly attribute unsigned long byteLength;
+
+  undefined copyTo(AllowSharedBufferSource destination);
+};
+
+dictionary EncodedVideoChunkInit {
+  required EncodedVideoChunkType type;
+  [EnforceRange] required long long timestamp;        // microseconds
+  [EnforceRange] unsigned long long duration;         // microseconds
+  required AllowSharedBufferSource data;
+  sequence<ArrayBuffer> transfer = [];
+};
+
+enum EncodedVideoChunkType {
+    "key",
+    "delta",
+};
+
+[Exposed=(Window,DedicatedWorker), Serializable, Transferable]
+interface AudioData {
+  constructor(AudioDataInit init);
+
+  readonly attribute AudioSampleFormat? format;
+  readonly attribute float sampleRate;
+  readonly attribute unsigned long numberOfFrames;
+  readonly attribute unsigned long numberOfChannels;
+  readonly attribute unsigned long long duration;  // microseconds
+  readonly attribute long long timestamp;          // microseconds
+
+  unsigned long allocationSize(AudioDataCopyToOptions options);
+  undefined copyTo(AllowSharedBufferSource destination, AudioDataCopyToOptions options);
+  AudioData clone();
+  undefined close();
+};
+
+dictionary AudioDataInit {
+  required AudioSampleFormat format;
+  required float sampleRate;
+  [EnforceRange] required unsigned long numberOfFrames;
+  [EnforceRange] required unsigned long numberOfChannels;
+  [EnforceRange] required long long timestamp;  // microseconds
+  required BufferSource data;
+  sequence<ArrayBuffer> transfer = [];
+};
+
+dictionary AudioDataCopyToOptions {
+  [EnforceRange] required unsigned long planeIndex;
+  [EnforceRange] unsigned long frameOffset = 0;
+  [EnforceRange] unsigned long frameCount;
+  AudioSampleFormat format;
+};
+
+enum AudioSampleFormat {
+  "u8",
+  "s16",
+  "s32",
+  "f32",
+  "u8-planar",
+  "s16-planar",
+  "s32-planar",
+  "f32-planar",
+};
+
+[Exposed=(Window,DedicatedWorker), Serializable, Transferable]
+interface VideoFrame {
+  constructor(CanvasImageSource image, optional VideoFrameInit init = {});
+  constructor(AllowSharedBufferSource data, VideoFrameBufferInit init);
+
+  readonly attribute VideoPixelFormat? format;
+  readonly attribute unsigned long codedWidth;
+  readonly attribute unsigned long codedHeight;
+  readonly attribute DOMRectReadOnly? codedRect;
+  readonly attribute DOMRectReadOnly? visibleRect;
+  readonly attribute double rotation;
+  readonly attribute boolean flip;
+  readonly attribute unsigned long displayWidth;
+  readonly attribute unsigned long displayHeight;
+  readonly attribute unsigned long long? duration;  // microseconds
+  readonly attribute long long timestamp;           // microseconds
+  readonly attribute VideoColorSpace colorSpace;
+
+  VideoFrameMetadata metadata();
+
+  unsigned long allocationSize(
+      optional VideoFrameCopyToOptions options = {});
+  Promise<sequence<PlaneLayout>> copyTo(
+      AllowSharedBufferSource destination,
+      optional VideoFrameCopyToOptions options = {});
+  VideoFrame clone();
+  undefined close();
+};
+
+dictionary VideoFrameInit {
+  unsigned long long duration;  // microseconds
+  long long timestamp;          // microseconds
+  AlphaOption alpha = "keep";
+
+  // Default matches image. May be used to efficiently crop. Will trigger
+  // new computation of displayWidth and displayHeight using image's pixel
+  // aspect ratio unless an explicit displayWidth and displayHeight are given.
+  DOMRectInit visibleRect;
+
+  double rotation = 0;
+  boolean flip = false;
+
+  // Default matches image unless visibleRect is provided.
+  [EnforceRange] unsigned long displayWidth;
+  [EnforceRange] unsigned long displayHeight;
+
+  VideoFrameMetadata metadata;
+};
+
+dictionary VideoFrameBufferInit {
+  required VideoPixelFormat format;
+  required [EnforceRange] unsigned long codedWidth;
+  required [EnforceRange] unsigned long codedHeight;
+  required [EnforceRange] long long timestamp;  // microseconds
+  [EnforceRange] unsigned long long duration;  // microseconds
+
+  // Default layout is tightly-packed.
+  sequence<PlaneLayout> layout;
+
+  // Default visible rect is coded size positioned at (0,0)
+  DOMRectInit visibleRect;
+
+  double rotation = 0;
+  boolean flip = false;
+
+  // Default display dimensions match visibleRect.
+  [EnforceRange] unsigned long displayWidth;
+  [EnforceRange] unsigned long displayHeight;
+
+  VideoColorSpaceInit colorSpace;
+
+  sequence<ArrayBuffer> transfer = [];
+
+  VideoFrameMetadata metadata;
+};
+
+dictionary VideoFrameMetadata {
+  // Possible members are recorded in the VideoFrame Metadata Registry.
+};
+
+dictionary VideoFrameCopyToOptions {
+  DOMRectInit rect;
+  sequence<PlaneLayout> layout;
+  VideoPixelFormat format;
+  PredefinedColorSpace colorSpace;
+};
+
+dictionary PlaneLayout {
+  [EnforceRange] required unsigned long offset;
+  [EnforceRange] required unsigned long stride;
+};
+
+enum VideoPixelFormat {
+  // 4:2:0 Y, U, V
+  "I420",
+  "I420P10",
+  "I420P12",
+  // 4:2:0 Y, U, V, A
+  "I420A",
+  "I420AP10",
+  "I420AP12",
+  // 4:2:2 Y, U, V
+  "I422",
+  "I422P10",
+  "I422P12",
+  // 4:2:2 Y, U, V, A
+  "I422A",
+  "I422AP10",
+  "I422AP12",
+  // 4:4:4 Y, U, V
+  "I444",
+  "I444P10",
+  "I444P12",
+  // 4:4:4 Y, U, V, A
+  "I444A",
+  "I444AP10",
+  "I444AP12",
+  // 4:2:0 Y, UV
+  "NV12",
+  // 4:4:4 RGBA
+  "RGBA",
+  // 4:4:4 RGBX (opaque)
+  "RGBX",
+  // 4:4:4 BGRA
+  "BGRA",
+  // 4:4:4 BGRX (opaque)
+  "BGRX",
+};
+
+[Exposed=(Window,DedicatedWorker)]
+interface VideoColorSpace {
+  constructor(optional VideoColorSpaceInit init = {});
+
+  readonly attribute VideoColorPrimaries? primaries;
+  readonly attribute VideoTransferCharacteristics? transfer;
+  readonly attribute VideoMatrixCoefficients? matrix;
+  readonly attribute boolean? fullRange;
+
+  [Default] VideoColorSpaceInit toJSON();
+};
+
+dictionary VideoColorSpaceInit {
+  VideoColorPrimaries? primaries = null;
+  VideoTransferCharacteristics? transfer = null;
+  VideoMatrixCoefficients? matrix = null;
+  boolean? fullRange = null;
+};
+
+enum VideoColorPrimaries {
+  "bt709",
+  "bt470bg",
+  "smpte170m",
+  "bt2020",
+  "smpte432",
+};
+
+enum VideoTransferCharacteristics {
+  "bt709",
+  "smpte170m",
+  "iec61966-2-1",
+  "linear",
+  "pq",
+  "hlg",
+};
+
+enum VideoMatrixCoefficients {
+  "rgb",
+  "bt709",
+  "bt470bg",
+  "smpte170m",
+  "bt2020-ncl",
+};
+
+[Exposed=(Window,DedicatedWorker), SecureContext]
+interface ImageDecoder {
+  constructor(ImageDecoderInit init);
+
+  readonly attribute DOMString type;
+  readonly attribute boolean complete;
+  readonly attribute Promise<undefined> completed;
+  readonly attribute ImageTrackList tracks;
+
+  Promise<ImageDecodeResult> decode(optional ImageDecodeOptions options = {});
+  undefined reset();
+  undefined close();
+
+  static Promise<boolean> isTypeSupported(DOMString type);
+};
+
+
+typedef (AllowSharedBufferSource or ReadableStream) ImageBufferSource;
+dictionary ImageDecoderInit {
+  required DOMString type;
+  required ImageBufferSource data;
+  ColorSpaceConversion colorSpaceConversion = "default";
+  [EnforceRange] unsigned long desiredWidth;
+  [EnforceRange] unsigned long desiredHeight;
+  boolean preferAnimation;
+  sequence<ArrayBuffer> transfer = [];
+};
+
+
+dictionary ImageDecodeOptions {
+  [EnforceRange] unsigned long frameIndex = 0;
+  boolean completeFramesOnly = true;
+};
+
+
+dictionary ImageDecodeResult {
+  required VideoFrame image;
+  required boolean complete;
+};
+
+
+[Exposed=(Window,DedicatedWorker), SecureContext]
+interface ImageTrackList {
+  getter ImageTrack (unsigned long index);
+
+  readonly attribute Promise<undefined> ready;
+  readonly attribute unsigned long length;
+  readonly attribute long selectedIndex;
+  readonly attribute ImageTrack? selectedTrack;
+};
+
+
+[Exposed=(Window,DedicatedWorker), SecureContext]
+interface ImageTrack {
+  readonly attribute boolean animated;
+  readonly attribute unsigned long frameCount;
+  readonly attribute unrestricted float repetitionCount;
+  attribute boolean selected;
+};
+
+
+
+

Issues Index

+
+
The spec SHOULD provide definitions (and + possibly diagrams) for coded size, visible rectangle, and display size. + See #166.
+
+ + + \ No newline at end of file