Skip to content

Commit

Permalink
Added image processing for files with metadata
Browse files Browse the repository at this point in the history
- Image processing runs on upload and whenever the metadata is updated
  • Loading branch information
shayanaijaz committed Jan 16, 2025
1 parent ae6dffa commit 7ce9c51
Show file tree
Hide file tree
Showing 7 changed files with 757 additions and 5 deletions.
483 changes: 482 additions & 1 deletion server/poetry.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions server/portal/apps/_custom/drp/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ class DrpFileMetadata(DrpMetadataModel):
offset_to_first_image: Optional[int] = None
gap_between_images: Optional[int] = None
byte_order: Optional[Literal['big_endian', 'little_endian']] = None
use_binary_correction: Optional[bool] = None

class FileObj(DrpMetadataModel):
"""Model for associated files"""
Expand Down
81 changes: 78 additions & 3 deletions server/portal/apps/projects/tasks.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import os
import base64
import logging
from pathlib import Path
from django.conf import settings
Expand All @@ -10,10 +12,11 @@
from django.core.exceptions import ObjectDoesNotExist
from portal.apps.projects.models.project_metadata import ProjectMetadata
from portal.apps._custom.drp import constants
from portal.apps.projects.workspace_operations.project_meta_operations import add_file_associations, create_file_obj, get_ordered_value
from portal.apps.projects.workspace_operations.project_meta_operations import add_file_associations, create_file_obj, get_file_obj, get_ordered_value
from portal.apps.projects.workspace_operations.graph_operations import get_path_uuid_mapping
import networkx as nx
import uuid
from portal.apps._custom.drp.models import FileObj
from portal.libs.files.file_processing import binary_correction, conf_raw, conf_tiff, create_animation, create_histogram, create_thumbnail

# TODO: Cleanup this file

Expand Down Expand Up @@ -231,4 +234,76 @@ def sync_files_without_metadata(self, user_access_token, project_id: str):
for entity_uuid, file_objs in files_to_add_dict.items():
logger.info(f'Adding {len(file_objs)} files to entity {entity_uuid} in project {project_id}')
add_file_associations(entity_uuid, file_objs)


@shared_task(bind=True, queue='default')
def process_file(self, project_id: str, path: str, user_access_token, encoded_file=None):

client = user_account(user_access_token)

logger.info(f'Processing file {path} in project {project_id}')

if encoded_file:
logger.info('Decoding file')
file = base64.b64decode(encoded_file)
else:
logger.info('Retrieving file using Tapis')
file = client.files.getContents(systemId=project_id, path=path)

logger.info('File retrieved')

parent_path = str(Path(path).parent)

file_obj: FileObj = get_file_obj(project_id, path)

if file and file_obj:
value = get_ordered_value(constants.FILE, file_obj.get('value'))

file_name = file_obj.get('name')

_, file_ext = os.path.splitext(file_obj.get('name'))

if file_ext in ['.tif', '.tiff']:
adv_image = conf_tiff(file)
else:
adv_image = conf_raw(value, file)

try:
if value.get('use_binary_correction'):
adv_image = binary_correction(adv_image)
except Exception as e:
logger.error(f'Error applying binary correction: {e}')

try:
thumbnail = create_thumbnail(adv_image)

thumbnail_path = f'{parent_path}/{file_name}.thumb.jpg'

logger.info('Uploading generated thumbnail')
client.files.insert(systemId=project_id, path=thumbnail_path, file=thumbnail)
except Exception as e:
logger.error(f'Error generating thumbnail: {e}')

try:
histogram_img, histogram_csv = create_histogram(adv_image)

histogram_img_path = f'{parent_path}/{file_name}.histogram.jpg'
histogram_csv_path = f'{parent_path}/{file_name}.histogram.csv'

logger.info('Uploading generated histogram')
client.files.insert(systemId=project_id, path=histogram_img_path, file=histogram_img)
client.files.insert(systemId=project_id, path=histogram_csv_path, file=histogram_csv)
except Exception as e:
logger.error(f'Error generating histogram: {e}')

try:
animation = create_animation(adv_image)

animation_path = f'{parent_path}/{file_name}.gif'

logger.info('Uploading generated animation')
client.files.insert(systemId=project_id, path=animation_path, file=animation)
except Exception as e:
logger.error(f'Error generating animation: {e}')

else:
print(f"File {path} does not exist in project {project_id}")
3 changes: 2 additions & 1 deletion server/portal/apps/projects/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
from pathlib import Path
from portal.apps._custom.drp import constants
from portal.apps.projects.workspace_operations.graph_operations import add_node_to_project, initialize_project_graph, get_node_from_path
from portal.apps.projects.tasks import sync_files_without_metadata
from portal.apps.projects.tasks import process_file, sync_files_without_metadata

LOGGER = logging.getLogger(__name__)

Expand Down Expand Up @@ -398,6 +398,7 @@ def patch(self, request: HttpRequest, project_id: str):
if value['data_type'] == 'file':
try:
patch_file_obj_entity(client, project_id, value, path)
process_file.delay(project_id, path.lstrip("/"), client.access_token.access_token)
except Exception as exc:
raise ApiException("Error updating file metadata", status=500) from exc
else:
Expand Down
7 changes: 7 additions & 0 deletions server/portal/libs/agave/operations.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import base64
import os
import io
from django.conf import settings
Expand All @@ -11,6 +12,7 @@
from portal.libs.agave.filter_mapping import filter_mapping
from pathlib import Path
from portal.apps._custom.drp.models import FileObj
from portal.apps.projects.tasks import process_file
from tapipy.errors import BaseTapyException
from portal.apps.projects.models.metadata import ProjectsMetadata
from portal.apps.datafiles.models import DataFilesMetadata
Expand Down Expand Up @@ -558,6 +560,11 @@ def upload(client, system, path, uploaded_file, metadata=None):
root_node = get_root_node(system)
add_file_associations(root_node['uuid'], [file_obj])

# additional processing for files
encoded_file = base64.b64encode(uploaded_file.read()).decode('utf-8')
uploaded_file.seek(0)
transaction.on_commit(lambda: process_file.delay(file_obj.system, file_obj.path, client.access_token.access_token, encoded_file))

response_json = client.files.insert(systemId=system, path=dest_path, file=uploaded_file)
tapis_indexer.apply_async(kwargs={'access_token': client.access_token.access_token,
'systemId': system,
Expand Down
184 changes: 184 additions & 0 deletions server/portal/libs/files/file_processing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
import numpy as np
import io
import logging
from matplotlib import pyplot as plt
import csv
import matplotlib.animation as anim
import tempfile
from portal.apps._custom.drp.models import DrpFileMetadata, FileObj
import tifffile as tiff

logger = logging.getLogger(__name__)

def conf_raw(img, file):
# NOTE: As mentioned above, this datatype should be an auto generated field
# in the adv image upload form for RAW files. The image parameters for
# slices, width, height should be converted to ints
# NOTE: If an 8-bit raw comes through, we need to set the datatype for that to unsigned.

logger.info(f'img: {img}')

prefix_map = {'little_endian': '<', 'big_endian': '>'}
suffix_map = {
'8_bit': 'u1', '16_bit_unsigned': 'u2', '32_bit_unsigned': 'u4', '64_bit_unsigned': 'u8',
'8_bit_signed': 'i1', '16_bit_signed': 'i2', '32_bit_signed': 'i4', '64_bit_signed': 'i8',
'32_bit_real': 'f4', '64_bit_real': 'f8'
}

prefix = prefix_map.get(img['byte_order'], '|') # Default to native byte order if unknown
suffix = suffix_map.get(img['image_type'])
datatype = prefix + suffix

file_data = np.frombuffer(file, dtype=datatype)
return file_data.reshape([
int(img['number_of_images']),
int(img['height']),
int(img['width'])
])

def conf_tiff(file):
with io.BytesIO(file) as buffer:
image = tiff.imread(buffer)
return image

def binary_correction(img):
logger.debug('Correcting for Binary values...')
min_value = np.min(img)
max_value = np.max(img)
k=255/(max_value-min_value)
l=-k*min_value

image1=np.floor(img * k + l)
del img
return image1.astype('uint8')

def create_thumbnail(img):
dpi = 100
dim_max = 5
width = img.shape[1]
height = img.shape[0]
depth_slice = None
if len(img.shape) == 3 and 3 not in img.shape:
width = img.shape[2]
height = img.shape[1]
depth_slice = int(np.ceil(img.shape[0]/2))
elif len(img.shape) == 3 and 3 in img.shape:
# TODO: Handle RGB files shape ==> (h, w, 3)
logger.debug('handle RGB')

# preserve aspect ratio and resize to fit.
modifier = dim_max/width if width>height else dim_max/height
resized_width = width*modifier
resized_height = height*modifier

fig = plt.figure()
fig.set_size_inches(resized_width, resized_height, dpi)
ax = plt.Axes(fig, [0., 0., 1., 1.])
ax.set_axis_off()
fig.add_axes(ax)
# TODO: Swap color mapping if image is bitmap/8bit
# plt.set_cmap('gray') if img.invert_colors else plt.set_cmap('Greys')
plt.set_cmap('Greys')
if depth_slice:
logger.debug('Creating Thumbnail from 3D tif')
ax.imshow(img[depth_slice,:,:], aspect='equal', vmin=0, vmax=255)
else:
logger.debug('Creating Thumbnail from FLAT tif')
ax.imshow(img, aspect='equal')

buffer = io.BytesIO()
plt.savefig(buffer, format='jpeg', dpi=dpi)
buffer.seek(0)

plt.close(fig)

return buffer.getvalue()

def create_histogram(img):
logger.debug('Creating Histogram')
nbins=256
fig_hist = plt.figure(figsize=(4,2.4))
freq, bins, patches = plt.hist(img.reshape([np.size(img),]), nbins, density=True)
plt.xlabel('Gray value')
plt.ylabel('Probability')
plt.tight_layout()

image_buffer = io.BytesIO()
fig_hist.savefig(image_buffer, format='jpeg', dpi=200)
image_buffer.seek(0)
plt.close(fig_hist)

csv_buffer = io.StringIO()
histwriter = csv.writer(csv_buffer,delimiter=',')
histwriter.writerow(('Value','Probability'))
for i in range(np.size(freq)):
histwriter.writerow((bins[i],freq[i]))
csv_buffer.seek(0)

logger.debug('Histogram Created')

return image_buffer.getvalue(), csv_buffer.getvalue()

def create_animation(img):
"""
Creates an animated GIF in memory using matplotlib and returns its binary data.
Args:
img (ndarray): The input 3D image as a NumPy array.
Returns:
bytes: Binary data of the animated GIF.
"""
if len(img.shape) < 3 or (len(img.shape) == 3 and 3 in img.shape):
logger.debug('Image is not a 3D array')
return # Exit if the image is not a 3D array

logger.debug('Creating Animated Gif')

class AnimatedGif:
def __init__(self):
self.fig = plt.figure()
self.images = []

def add(self, image, h, w, dpi=100):
self.fig.set_size_inches(w, h, dpi)
ax1 = plt.Axes(self.fig, [0., 0., 1., 1.])
ax1.set_axis_off()
self.fig.add_axes(ax1)
plt.set_cmap('Greys')
plt_im = ax1.imshow(image, aspect='equal', vmin=0, vmax=255)
self.images.append([plt_im])

def save_to_tempfile(self):
"""Saves the animation to a temporary file and returns its binary content."""
with tempfile.NamedTemporaryFile(suffix=".gif", delete=True) as temp_file:
animation = anim.ArtistAnimation(self.fig, self.images)
animation.save(temp_file.name, writer='imagemagick', fps=6)
temp_file.seek(0) # Reset pointer to the beginning
return temp_file.read() # Read binary content

# Resize the image while preserving aspect ratio
dim_max = 5
width, height = img.shape[2], img.shape[1]
modifier = dim_max / width if width > height else dim_max / height
resized_width, resized_height = width * modifier, height * modifier

# Create the animation
sl1 = img[0, :, :]
animated_gif = AnimatedGif()
animated_gif.add(sl1, h=resized_height, w=resized_width)

if img.shape[0] < 100:
slicesave = 1
else:
slicesave = round((img.shape[0] / 100) * 1)

for i in range(1, img.shape[0], slicesave):
sl = img[i, :, :]
animated_gif.add(sl, h=resized_height, w=resized_width)

# Save the animation to a temporary file and return its binary data
gif_binary_data = animated_gif.save_to_tempfile()

logger.debug('Animated Gif Created')
return gif_binary_data
3 changes: 3 additions & 0 deletions server/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ gevent = "^23.9.1"
pymemcache = "^4.0.0"
pydantic = "^2.5.0"
networkx = "^3.2.1"
numpy = "^2.2.1"
matplotlib = "^3.10.0"
tifffile = "^2025.1.10"

[tool.poetry.group.dev.dependencies]
pytest = "^7.3.1"
Expand Down

0 comments on commit 7ce9c51

Please sign in to comment.