Struct MultiHeadAttentionConfig
pub struct MultiHeadAttentionConfig {
pub d_model: usize,
pub n_heads: usize,
pub dropout: f64,
pub min_float: f64,
pub quiet_softmax: bool,
pub initializer: Initializer,
}
Expand description
Configuration to create a Multi Head Attention layer using the init function.
Fields§
§d_model: usize
The size of each linear layer.
n_heads: usize
The number of heads.
dropout: f64
The dropout rate. Default: 0.1
min_float: f64
The minimum value a float can take. Default: -1.0e4 This is used to mask attention scores before calculating attention weights. A value too low might result in NaN.
quiet_softmax: bool
Use “quiet softmax” instead of regular softmax.
- Usage may improve performance by allowing attention heads to deposit no information (if the sequence contains no information relevant to that head).
- Usage may reduce the entropy of weights in the model, enhancing quantization and compression.
Reference: https://www.evanmiller.org/attention-is-off-by-one.html
initializer: Initializer
The type of function used to initialize neural network parameters
Implementations§
§
impl MultiHeadAttentionConfig
impl MultiHeadAttentionConfig
pub fn new(d_model:
usize,
n_heads: usize)
-> MultiHeadAttentionConfig
pub fn new(d_model: usize, n_heads: usize) -> MultiHeadAttentionConfig
Create a new instance of the config.
§
impl MultiHeadAttentionConfig
impl MultiHeadAttentionConfig
pub fn with_dropout(self, dropout: f64)
-> MultiHeadAttentionConfig
pub fn with_dropout(self, dropout: f64) -> MultiHeadAttentionConfig
The dropout rate. Default: 0.1
pub fn with_min_float(self, min_float: f64)
-> MultiHeadAttentionConfig
pub fn with_min_float(self, min_float: f64) -> MultiHeadAttentionConfig
The minimum value a float can take. Default: -1.0e4
pub fn with_quiet_softmax(self, quiet_softmax: bool)
-> MultiHeadAttentionConfig
pub fn with_quiet_softmax(self, quiet_softmax: bool) -> MultiHeadAttentionConfig
Use “quiet softmax” instead of regular softmax.
pub fn with_initializer(
self,
initializer: Initializer,
) -> MultiHeadAttentionConfig
pub fn with_initializer( self, initializer: Initializer, ) -> MultiHeadAttentionConfig
The type of function used to initialize neural network parameters
§
impl MultiHeadAttentionConfig
impl MultiHeadAttentionConfig
pub fn init<B>(&self, device: &<B as Backend>::Device) -> MultiHeadAttention<B>
where
B: Backend,
pub fn init<B>(&self, device: &<B as Backend>::Device) -> MultiHeadAttention<B>
where
B: Backend,
Initialize a new multihead attention module.
Trait Implementations§
§
impl Clone for MultiHeadAttentionConfig
impl Clone for MultiHeadAttentionConfig
§
fn clone(&self) -> MultiHeadAttentionConfig
fn clone(&self) -> MultiHeadAttentionConfig
1.0.0 · Source§
fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source
. Read
more
§
impl Config for MultiHeadAttentionConfig
impl Config for MultiHeadAttentionConfig
§
fn load<P>(file: P) -> Result<Self, ConfigError>
fn load<P>(file: P) -> Result<Self, ConfigError>
§
fn load_binary(data: &[u8])
-> Result<Self, ConfigError>
fn load_binary(data: &[u8]) -> Result<Self, ConfigError>
§
impl<'de> Deserialize<'de> for MultiHeadAttentionConfig
impl<'de> Deserialize<'de> for MultiHeadAttentionConfig
§
fn deserialize<D>(
deserializer: D,
) -> Result<MultiHeadAttentionConfig,
<D as Deserializer<'de>>::Error>where
D: Deserializer<'de>,
fn deserialize<D>(
deserializer: D,
) -> Result<MultiHeadAttentionConfig,
<D as Deserializer<'de>>::Error>where
D: Deserializer<'de>,
§
impl Display for MultiHeadAttentionConfig
impl Display for MultiHeadAttentionConfig
§
impl Serialize for MultiHeadAttentionConfig
impl Serialize for MultiHeadAttentionConfig
§
fn serialize<S>(
&self,
serializer: S,
) -> Result<<S as Serializer>::Ok, <S as Serializer>::Error>where
S: Serializer,
fn serialize<S>(
&self,
serializer: S,
) -> Result<<S as Serializer>::Ok, <S as Serializer>::Error>where
S: Serializer,
Auto Trait Implementations§
impl Freeze for MultiHeadAttentionConfig
impl RefUnwindSafe for MultiHeadAttentionConfig
impl Send for MultiHeadAttentionConfig
impl Sync for MultiHeadAttentionConfig
impl Unpin for MultiHeadAttentionConfig
impl UnwindSafe for MultiHeadAttentionConfig
Blanket Implementations§
Source§
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§
fn borrow_mut(&mut self) -> &mut
T
fn borrow_mut(&mut self) -> &mut T
Source§
impl<T> CloneToUninit for Twhere
T: Clone,
impl<T> CloneToUninit for Twhere
T: Clone,
§
impl<T> Instrument for T
impl<T> Instrument for T
§
fn instrument(self, span: Span) ->
Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
§
fn in_current_span(self) ->
Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§
impl<T> IntoEither for T
impl<T> IntoEither for T
Source§
fn into_either(self, into_left: bool)
-> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self
into a Left
variant of Either<Self, Self>
if into_left
is true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read
more
Source§
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self
into a Left
variant of Either<Self, Self>
if into_left(&self)
returns true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read
more
§
impl<T> Pointable for T
impl<T> Pointable for T
§
impl<T> PolicyExt for Twhere
T: ?Sized,
impl<T> PolicyExt for Twhere
T: ?Sized,
§
impl<T> ToCompactString for Twhere
T: Display,
impl<T> ToCompactString for Twhere
T: Display,
§
fn try_to_compact_string(&self)
-> Result<CompactString,
ToCompactStringError>
fn try_to_compact_string(&self) -> Result<CompactString, ToCompactStringError>
ToCompactString::to_compact_string()
] Read more
§
fn to_compact_string(&self) ->
CompactString
fn to_compact_string(&self) -> CompactString
CompactString
].
Read more
§
impl<T> ToStringFallible for Twhere
T: Display,
impl<T> ToStringFallible for Twhere
T: Display,
§
fn try_to_string(&self) -> Result<String, TryReserveError>
fn try_to_string(&self) -> Result<String, TryReserveError>
ToString::to_string
,
but without panic on OOM.