Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: generate alt-text with ai #2875

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 90 additions & 0 deletions components/publish/PublishAttachment.vue
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,86 @@ const maxDescriptionLength = 1500
const isEditDialogOpen = ref(false)
const description = ref(props.attachment.description ?? '')

const generationInProgress = ref(false)

const userSettings = useUserSettings()

async function generateAltText() {
// eslint-disable-next-line no-console
console.log(JSON.parse(JSON.stringify(props)))

const url = props.attachment.url

if (!url)
return

if (generationInProgress.value)
return

const experimentalAltTextGeneration = getPreferences(userSettings.value, 'experimentalAltTextGeneration')

if (!experimentalAltTextGeneration) {
// TODO @Shinigami92 2024-05-28: Use a fancy dialog instead of the browser's alert
// eslint-disable-next-line no-alert
const allow = confirm('This will download a model with ~250MiB. Do you want to continue? This is an experimental feature and might fail in several scenarios.')

if (!allow)
return

togglePreferences('experimentalAltTextGeneration')
}

generationInProgress.value = true

try {
const { pipeline, RawImage } = await import('@xenova/transformers')

const pipe = await pipeline('image-to-text', 'Xenova/vit-gpt2-image-captioning')

const imageElement = new Image()
// See https://www.hacksoft.io/blog/handle-images-cors-error-in-chrome for why using `?request-with-cors`
imageElement.crossOrigin = 'Anonymous'
imageElement.src = `${url}?request-with-cors`

const dataUrl = new Promise<string>((resolve) => {
imageElement.onload = () => {
const canvas = document.createElement('canvas')
canvas.width = imageElement.width
canvas.height = imageElement.height

const ctx = canvas.getContext('2d')!
ctx.drawImage(imageElement, 0, 0)
Shinigami92 marked this conversation as resolved.
Show resolved Hide resolved

// TODO @Shinigami92 2024-05-28: Fix "Uncaught DOMException: Failed to execute 'toDataURL' on 'HTMLCanvasElement': Tainted canvases may not be exported."
const dataUrl = canvas.toDataURL(`image/${url.split('.').pop()!}`)

resolve(dataUrl)
}
})

const img = await RawImage.fromURL(await dataUrl)

const out = await pipe(img)

// eslint-disable-next-line no-console
console.debug(out)

const firstOut = out?.[0]

if (!firstOut || Array.isArray(firstOut))
return

description.value = firstOut.generated_text
}
catch (error) {
console.error(error)
// TODO @Shinigami92 2024-05-27: Display error message to the user, so they know that something went wrong
}
finally {
generationInProgress.value = false
}
}

function toggleApply() {
isEditDialogOpen.value = false
emit('setDescription', description.value)
Expand Down Expand Up @@ -62,6 +142,16 @@ function toggleApply() {
<div flex flex-row-reverse>
<PublishCharacterCounter :length="description.length" :max="maxDescriptionLength" />
</div>

<!-- TODO @Shinigami92 2024-05-27: Style the button in the upper right corner of the textarea -->
<button type="button" btn-outline flex="~ gap2 center" :disabled="generationInProgress" @click="generateAltText">
<span block i-ri:sparkling-2-line />
{{ $t('action.generate-alt-text') }}
<span v-if="generationInProgress" aria-hidden="true" block animate-spin preserve-3d>
<span block i-ri:loader-2-fill aria-hidden="true" />
</span>
</button>

<button btn-outline :disabled="description.length > maxDescriptionLength" @click="toggleApply">
{{ $t('action.apply') }}
</button>
Expand Down
2 changes: 2 additions & 0 deletions composables/settings/definition.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ export interface PreferencesSettings {
experimentalGitHubCards: boolean
experimentalUserPicker: boolean
experimentalEmbeddedMedia: boolean
experimentalAltTextGeneration: boolean
}

export interface UserSettings {
Expand Down Expand Up @@ -88,6 +89,7 @@ export const DEFAULT__PREFERENCES_SETTINGS: PreferencesSettings = {
experimentalGitHubCards: true,
experimentalUserPicker: true,
experimentalEmbeddedMedia: false,
experimentalAltTextGeneration: false,
}

export function getDefaultUserSettings(locales: string[]): UserSettings {
Expand Down
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
"@vueuse/math": "^10.8.0",
"@vueuse/motion": "2.1.0",
"@vueuse/nuxt": "^10.8.0",
"@xenova/transformers": "^2.17.1",
"blurhash": "^2.0.5",
"browser-fs-access": "^0.35.0",
"chroma-js": "^2.4.2",
Expand Down
Loading
Loading