NEON and Sun datasets

weecology · Oct 4, 2024 · 222507f · 222507f
1 parent 08fcf6a
commit 222507f
Show file tree

Hide file tree

Showing 7 changed files with 96 additions and 20 deletions.
diff --git a/data_prep/NeonBenchmark.py b/data_prep/NeonBenchmark.py
@@ -7,26 +7,6 @@
 from deepforest.preprocess import split_raster
 
 def generate_NEON_benchmark():
-    BENCHMARK_PATH = "/orange/idtrees-collab/NeonTreeEvaluation/"
-    tifs = glob.glob(BENCHMARK_PATH + "evaluation/RGB/*.tif")
-    xmls = [os.path.splitext(os.path.basename(x))[0] for x in tifs] 
-    xmls = [os.path.join(BENCHMARK_PATH, "annotations", x) + ".xml" for x in xmls] 
-
-    #Load and format xmls, not every RGB image has an annotation
-    annotation_list = []   
-    for xml_path in xmls:
-        try:
-            annotation = read_file(xml_path)
-        except:
-            continue
-        annotation_list.append(annotation)
-    benchmark_annotations = pd.concat(annotation_list, ignore_index=True)
-
-    benchmark_annotations["source"] = "NEON_benchmark"
-    for image_path in benchmark_annotations.image_path.unique():
-        dst = os.path.join(BENCHMARK_PATH, "evaluation/RGB/", image_path)
-        shutil.copy(dst, "/orange/ewhite/DeepForest/NEON_benchmark/images/")
-    benchmark_annotations.to_csv("/orange/ewhite/DeepForest/NEON_benchmark/images/test.csv")
 
     # Copy images to test location
     benchmark_annotations["source"] = "NEON_benchmark"

diff --git a/data_prep/NeonTreeEvaluation.py b/data_prep/NeonTreeEvaluation.py
@@ -0,0 +1,30 @@
+import glob
+import os
+import shutil
+import pandas as pd
+from deepforest.utilities import read_file
+
+BENCHMARK_PATH = "/orange/idtrees-collab/NeonTreeEvaluation/"
+tifs = glob.glob(BENCHMARK_PATH + "evaluation/RGB/*.tif")
+xmls = [os.path.splitext(os.path.basename(x))[0] for x in tifs] 
+xmls = [os.path.join(BENCHMARK_PATH, "annotations", x) + ".xml" for x in xmls] 
+
+#Load and format xmls, not every RGB image has an annotation
+annotation_list = []   
+for xml_path in xmls:
+    try:
+        annotation = read_file(xml_path)
+    except:
+        continue
+    annotation_list.append(annotation)
+benchmark_annotations = pd.concat(annotation_list, ignore_index=True)
+
+benchmark_annotations["source"] = "NEON_benchmark"
+benchmark_annotations["label"] = "Tree"
+
+for image_path in benchmark_annotations.image_path.unique():
+    dst = os.path.join(BENCHMARK_PATH, "evaluation/RGB/", image_path)
+    shutil.copy(dst, "/orange/ewhite/DeepForest/NEON_benchmark/images/")
+
+benchmark_annotations["image_path"] = benchmark_annotations.image_path.apply(lambda x: os.path.join("/orange/ewhite/DeepForest/NEON_benchmark/images/", x))
+benchmark_annotations.to_csv("/orange/ewhite/DeepForest/NEON_benchmark/NeonTreeEvaluation_annotations.csv")
diff --git a/data_prep/collect_tasks.py b/data_prep/collect_tasks.py
@@ -14,6 +14,8 @@
     '/orange/ewhite/DeepForest/individual_urban_tree_crown_detection/annotations.csv',
     '/orange/ewhite/DeepForest/Radogoshi_Sweden/annotations.csv',
     "/orange/ewhite/DeepForest/WRI/WRI-labels-opensource/annotations.csv",
+    "/orange/ewhite/DeepForest/Guangzhou2022/annotations.csv",
+    "/orange/ewhite/DeepForest/NEON_benchmark/NeonTreeEvaluation_annotations.csv",
     '/orange/ewhite/DeepForest/ReForestTree/images/train.csv']
 
 TreePoints = [

diff --git a/docs/Gaungzhou2022.py b/docs/Gaungzhou2022.py
@@ -0,0 +1,50 @@
+import os
+import pandas as pd
+from deepforest.utilities import read_file
+import json
+
+# Define the directory containing the JSON files
+json_dir = "/orange/ewhite/DeepForest/Guangzhou2022/GZIndividualTree_Anno"
+
+# Initialize an empty list to store data
+data = []
+
+# Iterate over all JSON files in the directory
+for filename in os.listdir(json_dir):
+    if filename.endswith(".json"):
+        file_path = os.path.join(json_dir, filename)
+        # Read the JSON file
+
+        with open(file_path, 'r') as f:
+            coco_data = json.load(f)
+
+        # Extract annotations
+        for annotation in coco_data['annotations']:
+            image_id = annotation['image_id']
+            image_info = next(item for item in coco_data['images'] if item['id'] == image_id)
+            if 'treeInstance300' in file_path:
+                image_path = os.path.join(json_dir, 'train', image_info['file_name'])
+            elif 'test' in file_path:
+                image_path = os.path.join(json_dir, 'test', image_info['file_name'])
+            else:
+                image_path = os.path.join(json_dir, image_info['file_name'])
+
+            if not os.path.exists(image_path):
+                continue
+
+            xmin = annotation['bbox'][0]
+            ymin = annotation['bbox'][1]
+            xmax = xmin + annotation['bbox'][2]
+            ymax = ymin + annotation['bbox'][3]
+
+            # Append the data to the list
+            data.append([image_path, xmin, xmax, ymin, ymax])
+
+# Create a DataFrame
+df = pd.DataFrame(data, columns=['image_path', 'xmin', 'xmax', 'ymin', 'ymax'])
+df = read_file(df)
+df["label"] = "Tree"
+df["source"] = "Sun et al. 2022"
+
+# Save the DataFrame to a CSV file
+df.to_csv('/orange/ewhite/DeepForest/Guangzhou2022/annotations.csv', index=False)
diff --git a/docs/datasets.md b/docs/datasets.md
@@ -13,6 +13,12 @@ ISPRS Journal of Photogrammetry and Remote Sensing, Volume 206, 2023
 
 **Location:** Suwon, South Korea
 
+### Sun et al. 2022
+
+![sample_image](public/Sun_et_al._2022.png)
+
+**Link:** [https://www.sciencedirect.com/science/article/pii/S030324342100369X](https://www.sciencedirect.com/science/article/pii/S030324342100369X)
+
 ### Ragadoshi_Sweden
 
 ![sample_image](public/Radogoshi_et_al._2021.png)
@@ -37,6 +43,14 @@ ISPRS Journal of Photogrammetry and Remote Sensing, Volume 206, 2023
 
 **Location:** Mato Grosso do Sul, Brazil
 
+### Weinstein et al. 2021
+
+![sample_image](public/NEON_benchmark.png)
+
+**Link:** https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1009180
+
+**Location** [NEON sites](https://www.neonscience.org/field-sites/explore-field-sites) within the United States
+
 ### World Resources Institute 
 
 NAIP Imagery from across the United States

diff --git a/docs/public/NEON_benchmark.png b/docs/public/NEON_benchmark.png
diff --git a/docs/public/Sun_et_al._2022.png b/docs/public/Sun_et_al._2022.png