Added image processing for files with metadata

- Image processing runs on upload and whenever the metadata is updated
TACC · Jan 16, 2025 · 7ce9c51 · 7ce9c51
1 parent ae6dffa
commit 7ce9c51
Show file tree

Hide file tree

Showing 7 changed files with 757 additions and 5 deletions.
diff --git a/server/poetry.lock b/server/poetry.lock
diff --git a/server/portal/apps/_custom/drp/models.py b/server/portal/apps/_custom/drp/models.py
@@ -41,6 +41,7 @@ class DrpFileMetadata(DrpMetadataModel):
     offset_to_first_image: Optional[int] = None
     gap_between_images: Optional[int] = None
     byte_order: Optional[Literal['big_endian', 'little_endian']] = None
+    use_binary_correction: Optional[bool] = None
 
 class FileObj(DrpMetadataModel):
     """Model for associated files"""

diff --git a/server/portal/apps/projects/tasks.py b/server/portal/apps/projects/tasks.py
@@ -1,3 +1,5 @@
+import os
+import base64
 import logging
 from pathlib import Path
 from django.conf import settings
@@ -10,10 +12,11 @@
 from django.core.exceptions import ObjectDoesNotExist
 from portal.apps.projects.models.project_metadata import ProjectMetadata
 from portal.apps._custom.drp import constants
-from portal.apps.projects.workspace_operations.project_meta_operations import add_file_associations, create_file_obj, get_ordered_value
+from portal.apps.projects.workspace_operations.project_meta_operations import add_file_associations, create_file_obj, get_file_obj, get_ordered_value
 from portal.apps.projects.workspace_operations.graph_operations import get_path_uuid_mapping
 import networkx as nx
-import uuid
+from portal.apps._custom.drp.models import FileObj
+from portal.libs.files.file_processing import binary_correction, conf_raw, conf_tiff, create_animation, create_histogram, create_thumbnail
 
 # TODO: Cleanup this file
 
@@ -231,4 +234,76 @@ def sync_files_without_metadata(self, user_access_token, project_id: str):
     for entity_uuid, file_objs in files_to_add_dict.items():
         logger.info(f'Adding {len(file_objs)} files to entity {entity_uuid} in project {project_id}')
         add_file_associations(entity_uuid, file_objs)
-
+
+@shared_task(bind=True, queue='default')
+def process_file(self, project_id: str, path: str, user_access_token, encoded_file=None):
+
+    client = user_account(user_access_token)
+
+    logger.info(f'Processing file {path} in project {project_id}')
+
+    if encoded_file:
+        logger.info('Decoding file')
+        file = base64.b64decode(encoded_file)
+    else:
+        logger.info('Retrieving file using Tapis')
+        file = client.files.getContents(systemId=project_id, path=path)
+
+    logger.info('File retrieved')
+
+    parent_path = str(Path(path).parent)
+
+    file_obj: FileObj = get_file_obj(project_id, path)
+
+    if file and file_obj: 
+        value = get_ordered_value(constants.FILE, file_obj.get('value'))
+
+        file_name = file_obj.get('name')
+
+        _, file_ext = os.path.splitext(file_obj.get('name'))
+
+        if file_ext in ['.tif', '.tiff']:
+            adv_image = conf_tiff(file)
+        else:
+            adv_image = conf_raw(value, file)
+
+        try:
+            if value.get('use_binary_correction'):
+                adv_image = binary_correction(adv_image)
+        except Exception as e:
+            logger.error(f'Error applying binary correction: {e}')
+
+        try:
+            thumbnail = create_thumbnail(adv_image)
+
+            thumbnail_path = f'{parent_path}/{file_name}.thumb.jpg'
+
+            logger.info('Uploading generated thumbnail')
+            client.files.insert(systemId=project_id, path=thumbnail_path, file=thumbnail)
+        except Exception as e: 
+            logger.error(f'Error generating thumbnail: {e}')
+
+        try:
+            histogram_img, histogram_csv = create_histogram(adv_image)
+
+            histogram_img_path = f'{parent_path}/{file_name}.histogram.jpg'
+            histogram_csv_path = f'{parent_path}/{file_name}.histogram.csv'
+
+            logger.info('Uploading generated histogram')
+            client.files.insert(systemId=project_id, path=histogram_img_path, file=histogram_img)
+            client.files.insert(systemId=project_id, path=histogram_csv_path, file=histogram_csv)
+        except Exception as e: 
+            logger.error(f'Error generating histogram: {e}')
+
+        try:
+            animation = create_animation(adv_image)
+
+            animation_path = f'{parent_path}/{file_name}.gif'
+
+            logger.info('Uploading generated animation')
+            client.files.insert(systemId=project_id, path=animation_path, file=animation)
+        except Exception as e: 
+            logger.error(f'Error generating animation: {e}')
+
+    else: 
+        print(f"File {path} does not exist in project {project_id}")
diff --git a/server/portal/apps/projects/views.py b/server/portal/apps/projects/views.py
@@ -33,7 +33,7 @@
 from pathlib import Path
 from portal.apps._custom.drp import constants
 from portal.apps.projects.workspace_operations.graph_operations import add_node_to_project, initialize_project_graph, get_node_from_path
-from portal.apps.projects.tasks import sync_files_without_metadata
+from portal.apps.projects.tasks import process_file, sync_files_without_metadata
 
 LOGGER = logging.getLogger(__name__)
 
@@ -398,6 +398,7 @@ def patch(self, request: HttpRequest, project_id: str):
         if value['data_type'] == 'file':
             try: 
                 patch_file_obj_entity(client, project_id, value, path)
+                process_file.delay(project_id, path.lstrip("/"), client.access_token.access_token)
             except Exception as exc:
                 raise ApiException("Error updating file metadata", status=500) from exc
         else:

diff --git a/server/portal/libs/agave/operations.py b/server/portal/libs/agave/operations.py
@@ -1,3 +1,4 @@
+import base64
 import os
 import io
 from django.conf import settings
@@ -11,6 +12,7 @@
 from portal.libs.agave.filter_mapping import filter_mapping
 from pathlib import Path
 from portal.apps._custom.drp.models import FileObj
+from portal.apps.projects.tasks import process_file
 from tapipy.errors import BaseTapyException
 from portal.apps.projects.models.metadata import ProjectsMetadata
 from portal.apps.datafiles.models import DataFilesMetadata
@@ -558,6 +560,11 @@ def upload(client, system, path, uploaded_file, metadata=None):
             root_node = get_root_node(system)
             add_file_associations(root_node['uuid'], [file_obj])
 
+        # additional processing for files
+        encoded_file = base64.b64encode(uploaded_file.read()).decode('utf-8')
+        uploaded_file.seek(0)
+        transaction.on_commit(lambda: process_file.delay(file_obj.system, file_obj.path, client.access_token.access_token, encoded_file))
+
     response_json = client.files.insert(systemId=system, path=dest_path, file=uploaded_file)
     tapis_indexer.apply_async(kwargs={'access_token': client.access_token.access_token,
                                       'systemId': system,

diff --git a/server/portal/libs/files/file_processing.py b/server/portal/libs/files/file_processing.py
@@ -0,0 +1,184 @@
+import numpy as np
+import io
+import logging
+from matplotlib import pyplot as plt
+import csv
+import matplotlib.animation as anim
+import tempfile
+from portal.apps._custom.drp.models import DrpFileMetadata, FileObj
+import tifffile as tiff
+
+logger = logging.getLogger(__name__)
+
+def conf_raw(img, file):
+    # NOTE: As mentioned above, this datatype should be an auto generated field
+    # in the adv image upload form for RAW files. The image parameters for
+    # slices, width, height should be converted to ints
+    # NOTE: If an 8-bit raw comes through, we need to set the datatype for that to unsigned.
+
+    logger.info(f'img: {img}')
+
+    prefix_map = {'little_endian': '<', 'big_endian': '>'}
+    suffix_map = {
+        '8_bit': 'u1', '16_bit_unsigned': 'u2', '32_bit_unsigned': 'u4', '64_bit_unsigned': 'u8',
+        '8_bit_signed': 'i1', '16_bit_signed': 'i2', '32_bit_signed': 'i4', '64_bit_signed': 'i8',
+        '32_bit_real': 'f4', '64_bit_real': 'f8'
+    }
+
+    prefix = prefix_map.get(img['byte_order'], '|')  # Default to native byte order if unknown
+    suffix = suffix_map.get(img['image_type'])
+    datatype = prefix + suffix
+
+    file_data = np.frombuffer(file, dtype=datatype)
+    return file_data.reshape([
+        int(img['number_of_images']),
+        int(img['height']),
+        int(img['width'])
+    ])
+
+def conf_tiff(file):
+    with io.BytesIO(file) as buffer:
+        image = tiff.imread(buffer)
+    return image
+
+def binary_correction(img):
+    logger.debug('Correcting for Binary values...')
+    min_value = np.min(img)
+    max_value = np.max(img)
+    k=255/(max_value-min_value)
+    l=-k*min_value
+
+    image1=np.floor(img * k + l)
+    del img
+    return image1.astype('uint8')
+
+def create_thumbnail(img):    
+    dpi = 100
+    dim_max = 5
+    width = img.shape[1]
+    height = img.shape[0]
+    depth_slice = None
+    if len(img.shape) == 3 and 3 not in img.shape:
+        width = img.shape[2]
+        height = img.shape[1]
+        depth_slice = int(np.ceil(img.shape[0]/2))
+    elif len(img.shape) == 3 and 3 in img.shape:
+        # TODO: Handle RGB files shape ==> (h, w, 3)
+        logger.debug('handle RGB')
+
+    # preserve aspect ratio and resize to fit.
+    modifier = dim_max/width if width>height else dim_max/height
+    resized_width = width*modifier
+    resized_height = height*modifier
+
+    fig = plt.figure()
+    fig.set_size_inches(resized_width, resized_height, dpi)
+    ax = plt.Axes(fig, [0., 0., 1., 1.])
+    ax.set_axis_off()
+    fig.add_axes(ax)
+    # TODO: Swap color mapping if image is bitmap/8bit
+    # plt.set_cmap('gray') if img.invert_colors else plt.set_cmap('Greys')
+    plt.set_cmap('Greys')
+    if depth_slice:
+        logger.debug('Creating Thumbnail from 3D tif')
+        ax.imshow(img[depth_slice,:,:], aspect='equal', vmin=0, vmax=255)
+    else:
+        logger.debug('Creating Thumbnail from FLAT tif')
+        ax.imshow(img, aspect='equal')
+
+    buffer = io.BytesIO()
+    plt.savefig(buffer, format='jpeg', dpi=dpi)
+    buffer.seek(0)
+
+    plt.close(fig)
+
+    return buffer.getvalue()
+
+def create_histogram(img):
+    logger.debug('Creating Histogram')
+    nbins=256
+    fig_hist = plt.figure(figsize=(4,2.4))
+    freq, bins, patches = plt.hist(img.reshape([np.size(img),]), nbins, density=True)
+    plt.xlabel('Gray value')
+    plt.ylabel('Probability')
+    plt.tight_layout()
+
+    image_buffer = io.BytesIO()
+    fig_hist.savefig(image_buffer, format='jpeg', dpi=200)
+    image_buffer.seek(0)
+    plt.close(fig_hist)
+
+    csv_buffer = io.StringIO()
+    histwriter = csv.writer(csv_buffer,delimiter=',')
+    histwriter.writerow(('Value','Probability'))
+    for i in range(np.size(freq)):
+        histwriter.writerow((bins[i],freq[i]))
+    csv_buffer.seek(0)
+
+    logger.debug('Histogram Created')
+
+    return image_buffer.getvalue(), csv_buffer.getvalue()
+
+def create_animation(img):
+    """
+    Creates an animated GIF in memory using matplotlib and returns its binary data.
+
+    Args:
+        img (ndarray): The input 3D image as a NumPy array.
+
+    Returns:
+        bytes: Binary data of the animated GIF.
+    """
+    if len(img.shape) < 3 or (len(img.shape) == 3 and 3 in img.shape):
+        logger.debug('Image is not a 3D array')
+        return  # Exit if the image is not a 3D array
+
+    logger.debug('Creating Animated Gif')
+
+    class AnimatedGif:
+        def __init__(self):
+            self.fig = plt.figure()
+            self.images = []
+
+        def add(self, image, h, w, dpi=100):
+            self.fig.set_size_inches(w, h, dpi)
+            ax1 = plt.Axes(self.fig, [0., 0., 1., 1.])
+            ax1.set_axis_off()
+            self.fig.add_axes(ax1)
+            plt.set_cmap('Greys')
+            plt_im = ax1.imshow(image, aspect='equal', vmin=0, vmax=255)
+            self.images.append([plt_im])
+
+        def save_to_tempfile(self):
+            """Saves the animation to a temporary file and returns its binary content."""
+            with tempfile.NamedTemporaryFile(suffix=".gif", delete=True) as temp_file:
+                animation = anim.ArtistAnimation(self.fig, self.images)
+                animation.save(temp_file.name, writer='imagemagick', fps=6)
+                temp_file.seek(0)  # Reset pointer to the beginning
+                return temp_file.read()  # Read binary content
+
+    # Resize the image while preserving aspect ratio
+    dim_max = 5
+    width, height = img.shape[2], img.shape[1]
+    modifier = dim_max / width if width > height else dim_max / height
+    resized_width, resized_height = width * modifier, height * modifier
+
+    # Create the animation
+    sl1 = img[0, :, :]
+    animated_gif = AnimatedGif()
+    animated_gif.add(sl1, h=resized_height, w=resized_width)
+
+    if img.shape[0] < 100:
+        slicesave = 1
+    else:
+        slicesave = round((img.shape[0] / 100) * 1)
+
+    for i in range(1, img.shape[0], slicesave):
+        sl = img[i, :, :]
+        animated_gif.add(sl, h=resized_height, w=resized_width)
+
+    # Save the animation to a temporary file and return its binary data
+    gif_binary_data = animated_gif.save_to_tempfile()
+
+    logger.debug('Animated Gif Created')
+    return gif_binary_data
diff --git a/server/pyproject.toml b/server/pyproject.toml
@@ -41,6 +41,9 @@ gevent = "^23.9.1"
 pymemcache = "^4.0.0"
 pydantic = "^2.5.0"
 networkx = "^3.2.1"
+numpy = "^2.2.1"
+matplotlib = "^3.10.0"
+tifffile = "^2025.1.10"
 
 [tool.poetry.group.dev.dependencies]
 pytest = "^7.3.1"