Skip to content

Commit

Permalink
Completed Audio To Audio Modality (#25)
Browse files Browse the repository at this point in the history
* Completed Audio To Audio Modality

* Updated audioToAAudio icon

* Minor formatting updates
  • Loading branch information
ShivanshShalabh authored Jul 9, 2024
1 parent cc83cbc commit b1cf1ba
Show file tree
Hide file tree
Showing 12 changed files with 187 additions and 4 deletions.
12 changes: 12 additions & 0 deletions src/components/Experiment/QuickInput/QuickInput.stories.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import {
imageToText,
textTo3D,
textClassification,
audioToAudio,
} from "../../../helpers/TaskIDs";
import {
SampleImageClassificationInputs,
Expand All @@ -36,6 +37,7 @@ import {
SampleImageToText,
SampleTextTo3DInputs,
SampleTextClassification,
SampleAudioToAudioInputs,
} from "../../../helpers/sampleImages";

export default {
Expand Down Expand Up @@ -261,4 +263,14 @@ TextClassification.args = {
type: textClassification,
},
},
};

export const AudioToAudio = Template.bind({});
AudioToAudio.args = {
sampleInputs: SampleAudioToAudioInputs,
model: {
output: {
type: audioToAudio,
},
},
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import React from "react";
import useBEMNaming from "../../../../../common/useBEMNaming";
import useTextOutput from "../Text/useTextOutput";

import { AudioOutputBox } from "../TextToAudio/AudioOutputBox";
import AudioToTextOutputInputSection from "../AudioToText/AudioToTextOutputInputSection";


import { audioToAudio } from "../../../../../helpers/TaskIDs";
export default function AudioToAudioOutput(props) {
const { getBlock } = useBEMNaming("audio-to-audio-output");

// Note: This method could probably be renamed to a more generic 'useOutput' or similar?
const { output, inferenceDuration, input, setInput } = useTextOutput(
props.trial
);

const onSubmit = () => {
props.onSubmit(input);
};

return (
<div className={getBlock()}>
<AudioToTextOutputInputSection
input={input}
setInput={setInput}
onSubmit={onSubmit}
/>
<AudioOutputBox duration={inferenceDuration} output={output} task={audioToAudio} />
</div>
);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import React from "react";
import AudioToAudioOutput from "./AudioToAudioOutput";
import { TestAudioToAudioOutput } from "./testData/testAudioToAudio";

export default {
title: "Experiments/Quick Output/Audio to Audio",
component: AudioToAudioOutput,
};

const template = (args) => <AudioToAudioOutput {...args} />;

export const Default = template.bind({});
Default.args = { trial: TestAudioToAudioOutput };
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
export const TestAudioToAudioOutputGeneratedToken = {
id: "sampleidhere"
};

export const TestAudioToAudioOutput = {
id: "sampletestaudiotoaudiooutputidhere",
inputs: [
{
title: "audio1.flac",
src: "https://xlab1.netlify.app/audio-to-audio-input.flac"
},
],
completed_at: "2023-06-03T18:17:14.513854Z",
results: {
'duration': "9.216154124s",
'duration_for_inference': "9.193807904s",
'responses': [
{
'features': [
{
title: "text-to-audio-output.flac",
src: "https://xlab1.netlify.app/audio-to-audio-output.flac",
type: 'AUDIO'
}
],
'id': "sampletesttexttoaudiooutputresponseidhere"
}
]
}
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
.audio-to-text-output {
.audio-to-text-output , .audio-to-audio-output {
&__input-audio-content {
audio {
margin-top: 12px;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import React from "react";
import useBEMNaming from "../../../../../common/useBEMNaming";
import "./AudioToText.scss"
// Maybe rename the component since it's used not just in AudioToText
export default function AudioToTextOutputInputSection(props) {
const { getElement } = useBEMNaming("audio-to-text-output");
const input = props.input;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

.text-output, .text-to-code-output,
.audio-to-text-output, .text-to-audio-output,
.text-conversation-output,.image-to-text-output {
.text-conversation-output, .image-to-text-output, .audio-to-audio-output {
display: flex;
flex-direction: row;
gap: 72px;
Expand Down
54 changes: 53 additions & 1 deletion src/helpers/DefaultModels.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ import {
textToVideo,
visualQuestionAnswering,
imageToText,
textClassification
textClassification,
audioToAudio,
} from "./TaskIDs";

export const DefaultImageClassificationModel = {
Expand Down Expand Up @@ -970,4 +971,55 @@ export const DefaultTextClassification = {
link2: "",
},
version: "1.0",
};
export const DefaultAudioToAudioModel = {
id: 192,
created_at: "2022-04-29T20:48:47.370171Z",
updated_at: "2022-04-29T20:48:47.370171Z",
attributes: {
Top1: "",
Top5: "",
kind: "CNN",
manifest_author: "Jingning Tang",
training_dataset: "PASCAL VOC 2012",
},
description:
"TensorFlow Chatbot model, which is trained on the COCO (Common Objects in Context) dataset. Use deeplabv3_mnv2_dm05_pascal_train_aug(deeplabv3_mnv2_dm05_pascal_train_aug_2018_10_01) from TensorFlow DeepLab Model Zoo.\n",
short_description:
"DeepLabv3 is a deep convolutional neural networks for semantic chatbotness. It employ atrous convolution in cascade or in parallel to capture multi-scale context by adopting multiple atrous rates.",
model: {
graph_checksum: "0336ceb67b378df8ada0efe9eadb5ac8",
graph_path:
"https://s3.amazonaws.com/store.carml.org/models/tensorflow/models/deeplabv3_mnv2_dm05_pascal_train_aug_2018_10_01/frozen_inference_graph.pb",
weights_checksum: "",
weights_path: "",
},
framework: {
id: 4,
name: "TensorFlow",
version: "1.14.0",
architectures: [
{
name: "amd64",
},
],
},
input: {
description: "text to be responded to",
type: "text",
},
license: "Apache License, Version 2.0",
name: "DeepLabv3_MobileNet_v2_DM_05_PASCAL_VOC_Train_Aug",
output: {
description: "the chatbot's response to the inputted text",
type: audioToAudio,
},
url: {
github:
"https://github.com/rai-project/tensorflow/blob/master/builtin_models/DeepLabv3_MobileNet_v2_DM_05_PASCAL_VOC_Train_Aug.yml",
citation: "https://arxiv.org/pdf/1802.02611v3.pdf",
link1: "https://arxiv.org/pdf/1706.05587.pdf",
link2: "",
},
version: "1.0",
};
23 changes: 23 additions & 0 deletions src/helpers/Task.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import {
imageToText,
textTo3D,
textClassification,
audioToAudio,
} from "./TaskIDs";
import React from "react";
import { ReactComponent as ImageClassification } from "../resources/icons/icon-imageClassification.svg";
Expand All @@ -40,6 +41,7 @@ import { ReactComponent as TexttoVideo } from "../resources/icons/icon-textToVid
import { ReactComponent as TextTo3D } from "../resources/icons/icon-textTo3D.svg";
import { ReactComponent as ImageToText } from "../resources/icons/icon-imageToText.svg";
import { ReactComponent as TextClassification } from "../resources/icons/icon-textClassification.svg";
import { ReactComponent as AudioToAudio } from "../resources/icons/icon-audioToAudio.svg";

import {
DefaultImageClassificationModel,
Expand All @@ -61,6 +63,7 @@ import {
DefaultTextTo3DModel,
DefaultImageToText,
DefaultTextClassification,
DefaultAudioToAudioModel,
} from "./DefaultModels";
import {
SampleAudioToTextInputs,
Expand All @@ -77,6 +80,7 @@ import {
SampleImageToText,
SampleTextTo3DInputs,
SampleTextClassification,
SampleAudioToAudioInputs,
} from "./sampleImages";
import { TestImageClassificationResult } from "../components/Experiment/QuickOutput/Outputs/Classification/Features";
import { TestImageEnhancementData } from "../components/Experiment/QuickOutput/Outputs/ImageEnhancement/testData/TestFeatures";
Expand All @@ -99,6 +103,7 @@ import { TestImageTo3DOutput } from "../components/Experiment/QuickOutput/Output
import { TestImageToTextOutput } from "../components/Experiment/QuickOutput/Outputs/ImageToText/testData/testImageToTextOutput";
import { TestTextTo3DOutput } from "../components/Experiment/QuickOutput/Outputs/TextTo3D/testData/testTextTo3DOutput";
import { TestTextClassificationOutput } from "../components/Experiment/QuickOutput/Outputs/TextClassification/testData/testTextClassification";
import { TestAudioToAudioOutput } from "../components/Experiment/QuickOutput/Outputs/AudioToAudio/testData/testAudioToAudio";

export default class Task {
static image_classification = new Task({
Expand Down Expand Up @@ -431,6 +436,17 @@ export default class Task {

});

static audio_to_audio = new Task({
name: "Audio to Audio",
description: "Used to run operations on audio file.",
id: audioToAudio,
inputText: "see how well this model can transform an audio input.",
outputText: "Audio output:",
icon: (props) => <AudioToAudio {...props} />,
sampleInputs: SampleAudioToAudioInputs,
tutorialDescription: "Audio to audio models transform audio files.",
inputType: TaskInputTypes.Audio,
});

constructor(options) {
this.name = options.name ?? "";
Expand Down Expand Up @@ -501,6 +517,8 @@ export default class Task {
return Task.text_to_3D;
case textClassification:
return Task.text_classification;
case audioToAudio:
return Task.audio_to_audio;
default:
return new Task({ name: "unknown", description: "unknown task name" });
}
Expand Down Expand Up @@ -549,6 +567,8 @@ export default class Task {
return DefaultTextTo3DModel;
case textClassification:
return DefaultTextClassification;
case audioToAudio:
return DefaultAudioToAudioModel;
default:
return undefined;
}
Expand Down Expand Up @@ -594,6 +614,8 @@ export default class Task {
return TestTextTo3DOutput;
case textClassification:
return TestTextClassificationOutput;
case audioToAudio:
return TestAudioToAudioOutput;
default:
return undefined;
}
Expand Down Expand Up @@ -621,6 +643,7 @@ export default class Task {
this.getStaticTask(imageToText),
this.getStaticTask(textTo3D),
this.getStaticTask(textClassification),
this.getStaticTask(audioToAudio),
];
}

Expand Down
1 change: 1 addition & 0 deletions src/helpers/TaskIDs.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,5 @@ export const textToVideo = "text_to_video";
export const textTo3D = "text_to_3D";
export const imageToText = "image_to_text";
export const textClassification = "text_classification";
export const audioToAudio = "audio_to_audio";
export const pending = "pending";
20 changes: 19 additions & 1 deletion src/helpers/sampleImages.js
Original file line number Diff line number Diff line change
Expand Up @@ -225,4 +225,22 @@ export const SampleTextClassification = [
"The weather is very pleasant today.",
"The ending of the movie was sad.",
"There is a car parked there."
];
];

export const SampleAudioToAudioInputs = [
{
title: "audio1.flac",
src: "https://xlab1.netlify.app/audio-to-audio-input.flac"

},
{
title: "audio2.flac",
src: "https://xlab1.netlify.app/audio-to-audio-input.flac"

},
{
title: "audio3.flac",
src: "https://xlab1.netlify.app/audio-to-audio-input.flac"

},
];
1 change: 1 addition & 0 deletions src/resources/icons/icon-audioToAudio.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit b1cf1ba

Please sign in to comment.