Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
23 changes: 23 additions & 0 deletions examples/imageClassifier-transformer-single-image-topk/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
<!--
👋 Hello! This is an ml5.js example made and shared with ❤️.
Learn more about the ml5.js project: https://ml5js.org/
ml5.js license and Code of Conduct: https://github.com/ml5js/ml5-next-gen/blob/main/LICENSE.md
This example demonstrates image classification using a transformer model through ml5.imageClassifier.
-->

<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>ml5.js imageClassifier Transformer Example</title>
<script src="https://cdnjs.cloudflare.com/ajax/libs/p5.js/1.11.10/p5.min.js"></script>
<script src="../../dist/ml5.js"></script>
</head>
<body>
<script src="sketch.js"></script>
</body>
</html>

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nitpick: extra newline

45 changes: 45 additions & 0 deletions examples/imageClassifier-transformer-single-image-topk/sketch.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This might be just for testing - thought I point this out just in case: I don't think it's worth having a separate example just to demonstrate the topk option (documenting this seems sufficient imho)

* 👋 Hello! This is an ml5.js example made and shared with ❤️.
* Learn more about the ml5.js project: https://ml5js.org/
* ml5.js license and Code of Conduct: https://github.com/ml5js/ml5-next-gen/blob/main/LICENSE.md
*
* This example demonstrates image classification using a transformer model through ml5.imageClassifier.
*/

// Initialize the Image Classifier method with Transformer. A callback needs to be passed.
let classifier;

// A variable to hold the image we want to classify
let img;

// Variables for displaying the results on the canvas
let label = "";
let confidence = "";

function preload() {
classifier = ml5.imageClassifier("VisionTransformer", { topK: 2 });
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Elsewhere in the codebase, we use topk as an option with lowercase k. Probably good to retain that for consistency?

img = loadImage("images/bird.jpg");
}

function setup() {
createCanvas(400, 400);
classifier.classify(img, gotResult);
image(img, 0, 0, width, height);
}

// Callback function for when classification has finished
function gotResult(results) {
// The results are in an array ordered by score/confidence
// Hugging Face transformers use 'score' instead of 'confidence'
console.log(results);

// Display the results on the canvas
fill(255);
stroke(0);
textSize(18);
label = "Label: " + results[0].label;
confidence = "Confidence: " + nf(results[0].confidence, 0, 2);
text("Results Length: " + results.length, 10, 340);
text(label, 10, 360);
text(confidence, 10, 380);
}
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
23 changes: 23 additions & 0 deletions examples/imageClassifier-transformer-single-image/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
<!--
👋 Hello! This is an ml5.js example made and shared with ❤️.
Learn more about the ml5.js project: https://ml5js.org/
ml5.js license and Code of Conduct: https://github.com/ml5js/ml5-next-gen/blob/main/LICENSE.md
This example demonstrates image classification using a transformer model through ml5.imageClassifier.
-->

<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>ml5.js imageClassifier Transformer Example</title>
<script src="https://cdnjs.cloudflare.com/ajax/libs/p5.js/1.11.10/p5.min.js"></script>
<script src="../../dist/ml5.js"></script>
</head>
<body>
<script src="sketch.js"></script>
</body>
</html>

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nitpick: extra newline

45 changes: 45 additions & 0 deletions examples/imageClassifier-transformer-single-image/sketch.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*
* 👋 Hello! This is an ml5.js example made and shared with ❤️.
* Learn more about the ml5.js project: https://ml5js.org/
* ml5.js license and Code of Conduct: https://github.com/ml5js/ml5-next-gen/blob/main/LICENSE.md
*
* This example demonstrates image classification using a transformer model through ml5.imageClassifier.
*/

// Initialize the Image Classifier method with Transformer. A callback needs to be passed.
let classifier;

// A variable to hold the image we want to classify
let img;

// Variables for displaying the results on the canvas
let label = "";
let confidence = "";

function preload() {
classifier = ml5.imageClassifier("VisionTransformer");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Personally a bit on the fence if "VisionTransformer" is beneficial or not vs "vit-base-patch16-224" ... searching for the former brings up some articles of the general architecture (that by now different models implement) - only the latter lets me know that this was e.g. trained on 14 million images, with 21 thousand classes, and uses a resolution of 224x224.

If we'll be using "VisionTransformer": how about printing the actual name of the model that is being used to the console?

img = loadImage("images/bird.jpg");
}

function setup() {
createCanvas(400, 400);
classifier.classify(img, gotResult);
image(img, 0, 0, width, height);
}

// Callback function for when classification has finished
function gotResult(results) {
// The results are in an array ordered by score/confidence
// Hugging Face transformers use 'score' instead of 'confidence'
console.log("The default results length is 3");
console.log(results);

// Display the results on the canvas
fill(255);
stroke(0);
textSize(18);
label = "Label: " + results[0].label;
confidence = "Confidence: " + nf(results[0].confidence, 0, 2);
text(label, 10, 360);
text(confidence, 10, 380);
}
22 changes: 22 additions & 0 deletions examples/imageClassifier-transformer-webcam/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
<!--
👋 Hello! This is an ml5.js example made and shared with ❤️.
Learn more about the ml5.js project: https://ml5js.org/
ml5.js license and Code of Conduct: https://github.com/ml5js/ml5-next-gen/blob/main/LICENSE.md
This example demonstrates detecting objects in a live video through ml5.imageClassifier.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add using a transformer model (here and in <title>, sketch.js)?

-->

<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>ml5.js imageClassifier Webcam Example</title>
<script src="https://cdnjs.cloudflare.com/ajax/libs/p5.js/1.11.10/p5.min.js"></script>
<script src="../../dist/ml5.js"></script>
</head>
<body>
<script src="sketch.js"></script>
</body>
</html>
60 changes: 60 additions & 0 deletions examples/imageClassifier-transformer-webcam/sketch.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/*
* 👋 Hello! This is an ml5.js example made and shared with ❤️.
* Learn more about the ml5.js project: https://ml5js.org/
* ml5.js license and Code of Conduct: https://github.com/ml5js/ml5-next-gen/blob/main/LICENSE.md
*
* This example demonstrates detecting objects in a live video through ml5.imageClassifier.
*/

// A variable to initialize the Image Classifier
let classifier;

// A variable to hold the video we want to classify
let video;

// Variable for displaying the results on the canvas
let label = "Model loading...";

let isClassifying = false;

function preload() {
classifier = ml5.imageClassifier("VisionTransformer");
}

function setup() {
createCanvas(640, 480);
background(255);

// Using webcam feed as video input, hiding html element to avoid duplicate with canvas
video = createCapture(VIDEO);
video.size(640, 480);
video.hide();
isClassifying = true;
classifier.classifyStart(video, gotResult);
}

function draw() {
// Each video frame is painted on the canvas
image(video, 0, 0);

// Printing class with the highest probability on the canvas
fill(255);
textSize(32);
text(label, 20, 50);
}

// Callback function for when classification has finished
function gotResult(results) {
// Update label variable which is displayed on the canvas
label = results[0].label;
}

function mousePressed() {
if (isClassifying) {
classifier.classifyStop();
isClassifying = false;
} else {
isClassifying = true;
classifier.classifyStart(video, gotResult);
}
}
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
},
"homepage": "https://github.com/ml5js/ml5-next-gen#readme",
"dependencies": {
"@huggingface/transformers": "^3.7.6",
"@mediapipe/face_mesh": "^0.4.1633559619",
"@mediapipe/hands": "^0.4.1675469240",
"@mediapipe/pose": "^0.5.1675469404",
Expand Down
22 changes: 15 additions & 7 deletions src/ImageClassifier/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import callCallback from "../utils/callcallback";
import { imgToTensor, mediaReady } from "../utils/imageUtilities";
import handleOptions from "../utils/handleOptions";
import { handleModelName } from "../utils/handleOptions";
import { ImageClassifierTransformer } from "./transformer";

const IMAGE_SIZE = 224;
const MODEL_OPTIONS = ["mobilenet", "darknet", "darknet-tiny", "doodlenet"];
Expand Down Expand Up @@ -248,7 +249,10 @@ class ImageClassifier {
"image",
"No input image provided. If you want to classify a video, use classifyStart."
);
return callCallback(this.classifyInternal(image, number || this.topk), callback);
return callCallback(
this.classifyInternal(image, number || this.topk),
callback
);
}

/**
Expand All @@ -271,8 +275,11 @@ class ImageClassifier {
await mediaReady(image, true);

// call the callback function
await callCallback(this.classifyInternal(image, number || this.topk), callback);

await callCallback(
this.classifyInternal(image, number || this.topk),
callback
);

// call recursively for continuous classification
if (!this.signalStop) {
requestAnimationFrame(classifyFrame);
Expand Down Expand Up @@ -308,11 +315,12 @@ class ImageClassifier {

const imageClassifier = (modelName, optionsOrCallback, cb) => {
const args = handleArguments(modelName, optionsOrCallback, cb);

const { string, options = {}, callback } = args;

const instance = new ImageClassifier(string, options, callback);
const instance =
string === "VisionTransformer"
? new ImageClassifierTransformer(options, callback)
: new ImageClassifier(string, options, callback);
return instance;
};

export default imageClassifier;
export default imageClassifier;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Git prefers to have a newline character at the end of each file (since diff operates on whole lines)

Loading