Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add message translation to french #3

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ Title: Minimal Deep Learning Model Implementations
Version: 0.0.0.9000
Authors@R: c(
person("Daniel", "Falbel", , "[email protected]", role = c("aut", "cre")),
person(family = "Posit", role = c("cph"))
person(family = "Posit", role = c("cph")),
person("Christophe", "Regouby", role = c("ctb"))
)
Description: A collection of minimal implementations of deep learning
models. Clean and readable code prioritizing simplicity and understanding
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# minhub (development version)

* Add support for `gpt2`, `gptneox`, `gptbigcode` and `llama`.
* Add message translation to french (#3 @cregouby).
14 changes: 9 additions & 5 deletions R/gpt2.R
Original file line number Diff line number Diff line change
Expand Up @@ -197,21 +197,25 @@ gpt2_from_config <- function(identifier, revision = "main") {
config <- jsonlite::fromJSON(path)

if (config$model_type != "gpt2")
cli::cli_abort(c(
"{.arg config$model_type} must be {.val gpt2}, got {.val {config$model_type}}"
cli::cli_abort(gettext(
"{.arg config$model_type} must be {.val gpt2}, got {.val {config$model_type}}",
domain = "R-minhub"
))

if (config$layer_norm_eps != 1e-5)
cli::cli_abort("{.arg config$layer_norm_eps} must be {.val 1e-5}.")
cli::cli_abort(gettext("{.arg config$layer_norm_eps} must be {.val 1e-5}.",
domain = "R-minhub"))

pdrop <- unlist(config[c("resid_pdrop", "embd_pdrop", "attn_pdrop")])
if (length(unique(pdrop)) != 1)
cli::cli_abort("{.arg {names(pdrop)}} must be all equal, but got {pdrop}")
cli::cli_abort(gettext("{.arg {names(pdrop)}} must be all equal, but got {pdrop}",
domain = "R-minhub"))
else
pdrop <- unique(pdrop)

if (config$initializer_range != 0.02)
cli::cli_abort("{.arg initializer_range} must be {.val 0.02}, got {config$initializer_range}")
cli::cli_abort(gettext("{.arg initializer_range} must be {.val 0.02}, got {config$initializer_range}",
domain = "R-minhub"))

vocab_size <- config$vocab_size
n_embd <- config$n_embd
Expand Down
18 changes: 11 additions & 7 deletions R/gptbigcode.R
Original file line number Diff line number Diff line change
Expand Up @@ -135,27 +135,31 @@ gptbigcode_from_config <- function(identifier, revision = "main") {
config <- jsonlite::fromJSON(path)

if (config$model_type != "gpt_bigcode")
cli::cli_abort(c(
cli::cli_abort(gettext(
x = "{.arg config$model_type} must be {.val gpt_bigcode}.",
i = "Got {.val {config$model_type}}"
i = "Got {.val {config$model_type}}",
domain = "R-minhub"
))

if (!config$multi_query)
cli::cli_abort("Must use {.arg config$multi_query} but got {.val FALSE}")
cli::cli_abort(gettext("Must use {.arg config$multi_query} but got {.val FALSE}",
domain = "R-minhub"))

dropouts <- config[c("attn_pdrop", "resid_pdrop", "embd_pdrop")]
if (length(unique(dropouts)) != 1)
cli::cli_abort(c(
cli::cli_abort(gettext(
x = "All dropout must be equal.",
i = "Got {.val {names(dropouts)}} respectively {.val {dropouts}}"
i = "Got {.val {names(dropouts)}} respectively {.val {dropouts}}",
domain = "R-minhub"
))
else
pdrop <- unique(dropouts)


if (config$layer_norm_eps != 1e-5)
cli::cli_abort(c(
x = "{.arg config$layer_norm_eps} must be 1e-5, got {.val {config$layer_norm_eps}}"
cli::cli_abort(gettext(
x = "{.arg config$layer_norm_eps} must be 1e-5, got {.val {config$layer_norm_eps}}",
domain = "R-minhub"
))

# remap HF config attributes to minhub configurations
Expand Down
27 changes: 16 additions & 11 deletions R/gptneox.R
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ nn_gptneox_attention <- nn_module(
att <- torch_matmul(q, k$transpose(-2, -1)) * (1 / sqrt(k$size(-1)))
att <- att$masked_fill(self$bias[,,1:t, 1:t] == 0, self$masked_bias)
att <- nnf_softmax(att, dim=-1)$to(dtype = v$dtype)

y <- torch_matmul(att, v)$transpose(2, 3)$contiguous()$view(c(b, t, h))
self$c_proj(y)
}
Expand Down Expand Up @@ -178,32 +178,37 @@ gptneox_from_config <- function(identifier, revision = "main") {
config <- jsonlite::fromJSON(path)

if (config$model_type != "gpt_neox")
cli::cli_abort(c(
"{.arg config$model_type} must be {.val gpt_neox}, got {.val {config$model_type}}"
cli::cli_abort(gettext(
"{.arg config$model_type} must be {.val gpt_neox}, got {.val {config$model_type}}",
domain = "R-minhub"
))

# parallel residual is not supported
if (!config$use_parallel_residual)
cli::cli_abort(c(
cli::cli_abort(gettext(
x = "Non parallel residual is not supported.",
i = "{.arg config$use_parallel_residual} is {.val FALSE}"
i = "{.arg config$use_parallel_residual} is {.val FALSE}",
domain = "R-minhub"
))

if (config$hidden_act != "gelu")
cli::cli_abort(c(
cli::cli_abort(gettext(
x = "Unsupported {.arg config$hidden_act}: {.val {config$hidden_act}}",
i = "Currently only {.val gelu} is supported."
i = "Currently only {.val gelu} is supported.",
domain = "R-minhub"
))

if ((config$intermediate_size / config$hidden_size) != 4)
cli::cli_abort(c(
cli::cli_abort(gettext(
x = "{.arg config$intermediate_size} must be 4*{.arg config$hidden_size}",
i = "Got {.val {config$intermediate_size}} and {.val {config$hidden_size}}"
i = "Got {.val {config$intermediate_size}} and {.val {config$hidden_size}}",
domain = "R-minhub"
))

if (config$layer_norm_eps != 1e-5)
cli::cli_abort(c(
x = "{.arg config$layer_norm_eps} must be 1e-5, got {.val {config$layer_norm_eps}}"
cli::cli_abort(gettext(
x = "{.arg config$layer_norm_eps} must be 1e-5, got {.val {config$layer_norm_eps}}",
domain = "R-minhub"
))

# remap HF config attributes to minhub configurations
Expand Down
10 changes: 6 additions & 4 deletions R/llama.R
Original file line number Diff line number Diff line change
Expand Up @@ -205,14 +205,16 @@ llama_from_config <- function(identifier, revision = "main") {
config <- jsonlite::fromJSON(path)

if (config$model_type != "llama")
cli::cli_abort(c(
"{.arg config$model_type} must be {.val llama}, got {.val {config$model_type}}"
cli::cli_abort(gettext(
"{.arg config$model_type} must be {.val llama}, got {.val {config$model_type}}",
domain = "R-minhub"
))

if (config$hidden_act != "silu")
cli::cli_abort(c(
cli::cli_abort(gettext(
x = "Unsupported {.arg config$hidden_act}: {.val {config$hidden_act}}",
i = "Currently only {.val silu} is supported."
i = "Currently only {.val silu} is supported.",
domain = "R-minhub"
))

# remap HF config attributes to minhub configurations
Expand Down
9 changes: 6 additions & 3 deletions R/weights.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,10 @@ hf_state_dict <- function(identifier, revision = "main") {
index_path <- tryCatch({
hub_download(identifier, WEIGHTS_INDEX_NAME(), revision = revision)
}, error = function(e) {
cli::cli_abort(c(
cli::cli_abort(gettext(
x = "Error downloading weights from {.val {c(WEIGHTS_NAME(), WEIGHTS_INDEX_NAME())}}",
i = "Traceback below shows the error when trying to download {.val {WEIGHTS_NAME()}}"
i = "Traceback below shows the error when trying to download {.val {WEIGHTS_NAME()}}",
domain = "R-minhub"
), parent = err)
})

Expand Down Expand Up @@ -70,7 +71,9 @@ state_dict_safetensors <- function(identifier, revision) {
)

if (inherits(index_path, "try-error")) {
cli::cli_abort("No safetensors files found.")
cli::cli_abort(gettext(
"No safetensors files found.",
domain = "R-minhub"))
}

index <- jsonlite::fromJSON(index_path)$weight_map %>%
Expand Down
Binary file added inst/po/fr/LC_MESSAGES/R-minhub.mo
Binary file not shown.
129 changes: 129 additions & 0 deletions po/R-fr.po
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
msgid ""
msgstr ""
"Project-Id-Version: minhub 0.0.0.9000\n"
"POT-Creation-Date: 2024-11-28 11:28+0100\n"
"PO-Revision-Date: 2024-12-02 17:11+0100\n"
"Last-Translator: \n"
"Language-Team: \n"
"Language: fr\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"X-Generator: Poedit 3.5\n"

#: gpt2.R:201
msgid ""
"{.arg config$model_type} must be {.val gpt2}, got {.val {config$model_type}}"
msgstr ""
"La valeur de {.arg config$model_type} doit être {.val gpt2}, or elle est à {."
"val {config$model_type}}"

#: gpt2.R:205
msgid "{.arg config$layer_norm_eps} must be {.val 1e-5}."
msgstr "{.arg config$layer_norm_eps} doit être à {.val 1e-5}."

#: gpt2.R:209
msgid "{.arg {names(pdrop)}} must be all equal, but got {pdrop}"
msgstr "Les {.arg {names(pdrop)}} doivent être égaux, or ils sont à {pdrop}"

#: gpt2.R:214
msgid ""
"{.arg initializer_range} must be {.val 0.02}, got {config$initializer_range}"
msgstr ""
"La valeur de {.arg initializer_range} doit être à {.val 0.02}, or elle est à "
"{config$initializer_range}"

#: gptbigcode.R:139
msgid "{.arg config$model_type} must be {.val gpt_bigcode}."
msgstr "{.arg config$model_type} doit être {.val gpt_bigcode}."

#: gptbigcode.R:140
msgid "Got {.val {config$model_type}}"
msgstr "Or elle est à {.val {config$model_type}}"

#: gptbigcode.R:144
msgid "Must use {.arg config$multi_query} but got {.val FALSE}"
msgstr ""
"Il faut utiliser {.arg config$multi_query}, or c'est actuellement {.val "
"FALSE}"

#: gptbigcode.R:149
msgid "All dropout must be equal."
msgstr "Tous les {.val dropout} doivent être égaux."

#: gptbigcode.R:150
msgid "Got {.val {names(dropouts)}} respectively {.val {dropouts}}"
msgstr "Or {.val {names(dropouts)}} sont respectivement à {.val {dropouts}}"

#: gptbigcode.R:158 gptneox.R:206
msgid ""
"{.arg config$layer_norm_eps} must be 1e-5, got {.val {config$layer_norm_eps}}"
msgstr ""
"La valeur de {.arg config$layer_norm_eps} doit être {.val 1e-5}, or elle est "
"à {.val {config$layer_norm_eps}}"

#: gptneox.R:182
msgid ""
"{.arg config$model_type} must be {.val gpt_neox}, got {.val "
"{config$model_type}}"
msgstr ""
"{.arg config$model_type} doit être {.val gpt_neox}, or elle est à {.val "
"{config$model_type}}"

#: gptneox.R:188
msgid "Non parallel residual is not supported."
msgstr "Les résidus non-parallèles ne sont pas pris en compte."

#: gptneox.R:189
msgid "{.arg config$use_parallel_residual} is {.val FALSE}"
msgstr "{.arg config$use_parallel_residual} est {.val FALSE}"

#: gptneox.R:194 llama.R:214
msgid "Unsupported {.arg config$hidden_act}: {.val {config$hidden_act}}"
msgstr "{.arg config$hidden_act} ne peut pas être {.val {config$hidden_act}}"

#: gptneox.R:195
msgid "Currently only {.val gelu} is supported."
msgstr "Actuellement seul {.val gelu} est pris en compte."

#: gptneox.R:200
msgid "{.arg config$intermediate_size} must be 4*{.arg config$hidden_size}"
msgstr ""
"{.arg config$intermediate_size} doit être de 4*{.arg config$hidden_size}"

#: gptneox.R:201
msgid "Got {.val {config$intermediate_size}} and {.val {config$hidden_size}}"
msgstr ""
"Or nous avons respectivement {.val {config$intermediate_size}} et {.val "
"{config$hidden_size}}"

#: llama.R:209
msgid ""
"{.arg config$model_type} must be {.val llama}, got {.val {config$model_type}}"
msgstr ""
"{.arg config$model_type} doit être {.val llama}, or nous avons {.val "
"{config$model_type}}"

#: llama.R:215
msgid "Currently only {.val silu} is supported."
msgstr "Actuellement seul {.val silu} est pris en compte."

#: weights.R:33
msgid ""
"Error downloading weights from {.val {c(WEIGHTS_NAME(), "
"WEIGHTS_INDEX_NAME())}}"
msgstr ""
"Problème de téléchargement des points depuis {.val {c(WEIGHTS_NAME(), "
"WEIGHTS_INDEX_NAME())}}"

#: weights.R:34
msgid ""
"Traceback below shows the error when trying to download {.val "
"{WEIGHTS_NAME()}}"
msgstr ""
"Le log suivant détaille l'erreur lors de la tentative de téléchargement de {."
"val {WEIGHTS_NAME()}}"

#: weights.R:73
msgid "No safetensors files found."
msgstr "Aucun fichier safetensors trouvé."
Loading