'''Load Images allows you to specify which images or movies are to be loaded and in which order.
Disclaimer: Please note that the Input modues (i.e., Images, Metadata, NamesAndTypes and Groups) largely supercedes this module. However, old pipelines loaded into CellProfiler that contain this module will provide the option of preserving them; these pipelines will operate exactly as before.
When used in combination with a SaveImages module, you can load images in one file format and save them in another, using CellProfiler as a file format converter.
If you would like to use the metadata-specific settings, please see Help > General help > Using metadata in CellProfiler for more details on metadata usage and syntax. Briefly, LoadImages can extract metadata from the image filename using pattern-matching strings, for grouping similar images together for the analysis run and for metadata-specfic options in other modules; see the settings help for Where to extract metadata, and if an option for that setting is selected, Regular expression that finds metadata in the file name for the necessary syntax.
Please note that if you are loading a movie file(e.g., TIFs, FLEX, STKs, AVIs, ZVIs), each movie
is already treated as a group of images, so there is no need to enable here.""" % globals())
self.metadata_fields = cps.MultiChoice(
'Specify metadata fields to group by', [], doc="""
(Used only if grouping images by metadata)
Select the fields by which you want group the image files. You can select multiple tags. For
example, if a set of images had metadata for "Run", "Plate", "Well", and
"Site", selecting Run and Plate will create groups containing
images that share the same [Run,Plate] pair of fields.""")
# Add the first image to the images list
self.images = []
self.add_imagecb(False)
self.image_count = cps.HiddenCount(self.images,
text="Image count")
# Add another image
self.add_image = cps.DoSomething("", "Add another image", self.add_imagecb)
def add_imagecb(self, can_remove=True):
'Adds another image to the settings'
group = cps.SettingsGroup()
def example_file_fn(path=None):
'''Get an example file for use in the file metadata regexp editor'''
if path is None:
path = self.image_directory()
default = "plateA-2008-08-06_A12_s1_w1_[89A882DE-E675-4C12-9F8E-46C9976C4ABE].tif"
else:
default = None
#
# Find out the index we expect from filter_filename
#
for i, test_group in enumerate(self.images):
if id(test_group) == id(group):
break
filenames = [x for x in os.listdir(path)
if x.find('.') != -1 and
os.path.splitext(x)[1].upper() in
('.TIF', '.JPG', '.PNG', '.BMP')]
filtered_filenames = [x for x in filenames
if self.filter_filename(x) == i]
if len(filtered_filenames) > 0:
return filtered_filenames[0]
if len(filenames) > 0:
return filenames[0]
if self.analyze_sub_dirs():
d = [x for x in [os.path.abspath(os.path.join(path, x))
for x in os.listdir(path)
if not x.startswith('.')]
if os.path.isdir(x)]
for subdir in d:
result = example_file_fn(subdir)
if result is not None:
return result
return default
def example_path_fn():
'''Get an example path for use in the path metadata regexp editor'''
root = self.image_directory()
d = [x for x in [os.path.abspath(os.path.join(root, x))
for x in os.listdir(root)
if not x.startswith('.')]
if os.path.isdir(x)]
if len(d) > 0:
return d[0]
return root
img_index = len(self.images)
self.images.append(group)
group.append("divider", cps.Divider(line=True))
group.append("common_text", cps.Text(
'Text that these images have in common (case-sensitive)', '', doc="""
(Used only for the image-loading Text options)
For Text-Exact match, type the text string that all the
images have in common. For example, if all the images for the given
channel end with the text "D.TIF", type D.TIF here.
For Text-Regular expression, type the regular expression
that would capture all the images for this channel. See the module
help for more information on regular expressions."""))
group.append("order_position", cps.Integer(
'Position of this image in each group', img_index + 1,
minval=1,
doc="""
(Used only for the image-loading Order option) Clicking the magnifying glass icon to the right will bring up a
tool for checking the accuracy of your regular expression. The
regular expression syntax can be used to name different parts of
your expression. The syntax (?P<fieldname>expr) will
extract whatever matches expr and assign it to the
measurement,fieldname for the image.
For instance, a researcher uses plate names composed of a string
of letters and numbers, followed by an underscore, then the well,
followed by another underscore, followed by an "s" and a digit
representing the site taken within the well (e.g., TE12345_A05_s1.tif).
The following regular expression will capture the plate, well, and
site in the fields "Plate", "Well", and "Site": The regular expression can be typed in the upper text box, with
a sample file name given in the lower text box. Provided the syntax
is correct, the corresponding fields will be highlighted in the same
color in the two boxes. Press Submit to enter the typed
regular expression. You can create metadata tags for any portion of the filename or path, but if you are
specifying metadata for multiple images in a single LoadImages module, an image cycle can
only have one set of values for each metadata tag. This means that you can only
specify the metadata tags which have the same value across all images listed in the module. For example,
in the example above, you might load two wavelengths of data, one named TE12345_A05_s1_w1.tif
and the other TE12345_A05_s1_w2.tif, where the number following the w is the wavelength.
In this case, a "Wavelength" tag should not be included in the regular expression
because while the "Plate", "Well" and "Site" metadata is identical for both images, the wavelength metadata is not. Note that if you use the special fieldnames <WellColumn> and
<WellRow> together, LoadImages will automatically create a <Well>
metadata field by joining the two fieldname values together. For example,
if <WellRow> is "A" and <WellColumn> is "01", a field
<Well> will be "A01". This is useful if your well row and column names are
separated from each other in the filename, but you want to retain the standard
well nomenclature. Clicking the magnifying glass icon to the right will bring up a
tool that will allow you to check the accuracy of your regular
expression. The regular expression syntax can be used to
name different parts of your expression. The syntax
(?<fieldname>expr) will extract whatever matches
expr and assign it to the image's fieldname measurement.
For instance, a researcher uses folder names with the date and
subfolders containing the images with the run ID
(e.g., ./2009_10_02/1234/) The following regular expression
will capture the plate, well, and site in the fields
Date and Run:
Enter the number in the image order that this image channel
occupies. For example, if the order is "DAPI, FITC, Red;
DAPI, FITC, Red" and so on, the DAPI channel would occupy
position 1."""))
group.append("metadata_choice", cps.Choice(
'Extract metadata from where?',
[M_NONE, M_FILE_NAME, M_PATH, M_BOTH], doc="""
Metadata fields can be specified from
the image filename, the image path (including subfolders), or both.
The metadata entered here can be used for image grouping (see the
Group images by metadata? setting) or simply used as
additional columns in the exported
measurements (see the ExportToSpreadsheet module)."""))
group.append("file_metadata", cps.RegexpText(
'Regular expression that finds metadata in the file name',
'^(?P
The regular expression to extract the metadata from the file name
is entered here. Note that this field is available whether you have
selected Text-Regular expressions to load the files or not.
Please see the general module help for more information on
construction of a regular expression.
^(?P<Plate>.*)_(?P<Well>[A-P][0-9]{1,2})_s(?P<Site>[0-9]) ^ Start only at beginning of the file name (?P<Plate> Name the captured field Plate .* Capture as many characters as follow _ Discard the underbar separating plate from well (?P<Well> Name the captured field Well [A-P] Capture exactly one letter between A and P [0-9]{1,2} Capture one or two digits that follow _s Discard the underbar followed by s separating well from site (?P<Site> Name the captured field Site [0-9] Capture one digit following
Enter the regular expression for extracting the metadata from the
path. Note that this field is available whether you have selected
Text-Regular expressions to load the files or not.
.*[\\\/](?P<Date>.*)[\\\\/](?P<Run>.*)$ .*[\\\\/] Skip characters at the beginning of the pathname until either a slash (/) or
backslash (\\) is encountered (depending on the operating system) (?P<Date> Name the captured field Date .* Capture as many characters that follow [\\\\/] Discard the slash/backslash character (?P<Run> Name the captured field Run .* Capture as many characters as follow $ The Run field must be at the end of the path string, i.e., the
last folder on the path. This also means that the Date field contains the parent
folder of the Date folder.
LoadImages refers to the individual images in a group
as channels. Channels are numbered consecutively, starting
at channel 1. To set up grouping, first specify how the channels
are grouped (interleaving and number of channels per group), then
assign image names to each of the channels individually.
""" % globals()))
group.append("interleaving", cps.Choice(
"Grouping method", [I_INTERLEAVED, I_SEPARATED],
doc="""
(Used only if a movie image format is selected as file type and movie frame grouping are selected)
Channels in a movie can be interleaved or separated.
In an interleaved movie, the first frame is channel 1, the second is channel 2 and so on up to the number of channels per group for a given image cycle. In a separated movie, all of the frames for channel 1 are processed as the first image cycle, then the frames for channel 2 for the second image cycle, and so on.
For example, a movie may consist of 6 frames and we would like to process the movie as two channels per group. An interleaved movie would be processed like this:
Frame # | Channel # | Image cycle # |
---|---|---|
1 | 1 | 1 |
2 | 2 | 1 |
3 | 1 | 2 |
4 | 2 | 2 |
5 | 1 | 3 |
6 | 2 | 3 |
For a separated movie, the channels would be processed like this:
Frame # | Channel # | Image cycle # |
---|---|---|
1 | 1 | 1 |
2 | 1 | 2 |
3 | 1 | 3 |
4 | 2 | 1 |
5 | 2 | 2 |
6 | 2 | 3 |
Note the difference in which frames are processed in which image cycle
between the two methods."""))
group.append("channels_per_group", cps.Integer(
"Number of channels per group", 3, minval=2,
reset_view=True, doc="""
(Used only if a movie image format is selected as file type and movie frame grouping is selected)
This setting controls the number of frames to be
grouped together. As an example, for an interleaved movie with
12 frames and three channels per group, the first, fourth,
seventh and tenth frame will be assigned to channel 1, the
2nd, 5,th 8th and 11th frame will be assigned to
channel 2 and the 3rd, 6th, 9th, and 12th will be
assigned to channel 3. For a separated movie, frames 1 through 4
will be assigned to channel 1, 5 through 8 to channel 2 and
9 through 12 to channel 3."""))
#
# Flex files (and arguably others like color images and multichannel
# TIF files) can have more than one channel. So, within each image,
# we have a list of channels.
#
group.channels = []
group.append("channel_count", cps.HiddenCount(group.channels,
"Channel count"))
def add_channel(can_remove=True):
self.add_channel(group, can_remove)
add_channel(False)
group.append("add_channel_button", cps.DoSomething(
"Add another channel", "Add channel", add_channel))
group.can_remove = can_remove
if can_remove:
group.append("remover", cps.RemoveSettingButton(
'', 'Remove this image', self.images, group))
def add_channel(self, image_settings, can_remove=True):
'''Add another channel to an image
image_settings - the image's settings group
can_remove - true if we are allowed to remove this channel
'''
group = cps.SettingsGroup()
image_settings.channels.append(group)
img_index = 0
for ii in self.images:
for jj in ii.channels:
if id(jj) == id(group):
break
img_index += 1
group.append("image_object_choice", cps.Choice(
'Load the input as images or objects?', IO_ALL, doc="""
This setting determines whether you load an image as image data
or as segmentation results (i.e., objects):
Special note on saving images: You can use the settings in this module
to pass object outlines along to the module OverlayOutlines, and then save
them with the SaveImages module.'''))
group.get_image_name = lambda: (
group.image_name.value if self.channel_wants_images(group)
else IMAGE_FOR_OBJECTS_F % group.object_name.value)
channels = [
str(x) for x in range(1, max(10, len(image_settings.channels) + 2))]
group.append("channel_number", cps.Choice(
"Channel number", channels, channels[len(image_settings.channels) - 1], doc="""
(Used only if a movie image format is selected as file type and movie frame grouping is selected)
The channels of a multichannel image are numbered starting from 1.
Each channel is a greyscale image, acquired using different
illumination sources and/or optics. Use this setting to pick
the channel to associate with the above image name."""))
group.append("rescale", cps.Binary(
"Rescale intensities?", True, doc="""
This option determines whether image metadata should be
used to rescale the image's intensities. Some image formats
save the maximum possible intensity value along with the pixel data.
For instance, a microscope might acquire images using a 12-bit
A/D converter which outputs intensity values between zero and 4095,
but stores the values in a field that can take values up to 65535.
Select %(YES)s to rescale the image intensity so that saturated values are rescaled to 1.0 by dividing all pixels in the image by the maximum possible intensity value.
Select %(NO)s to ignore the image metadata and rescale the image to 0 – 1.0 by dividing by 255 or 65535, depending on the number of bits used to store the image.
""" % globals())) group.can_remove = can_remove if can_remove: group.append("remover", cps.RemoveSettingButton( "Remove this channel", "Remove channel", image_settings.channels, group)) def channel_wants_images(self, channel): '''True if the channel produces images, false if it produces objects''' return channel.image_object_choice == IO_IMAGES def help_settings(self): result = [self.file_types, self.match_method, self.order_group_size, self.exclude, self.match_exclude, self.descend_subdirectories, self.subdirectory_filter, self.check_images, self.group_by_metadata, self.metadata_fields] image_group = self.images[0] result += [ image_group.common_text, image_group.order_position, image_group.channels[0].image_object_choice, image_group.channels[0].image_name, image_group.channels[0].object_name, image_group.channels[0].wants_outlines, image_group.channels[0].outlines_name, image_group.channels[0].channel_number, image_group.channels[0].rescale, image_group.metadata_choice, image_group.file_metadata, image_group.path_metadata, image_group.wants_movie_frame_grouping, image_group.interleaving, image_group.channels_per_group ] result += [self.location] return result def visible_settings(self): varlist = [self.file_types, self.match_method] if self.match_method == MS_EXACT_MATCH: varlist += [self.exclude] if self.exclude.value: varlist += [self.match_exclude] elif self.match_method == MS_ORDER: varlist += [self.order_group_size] varlist += [self.descend_subdirectories] if self.descend_subdirectories == SUB_SOME: varlist += [self.subdirectory_filter] if self.has_metadata and not self.match_method == MS_ORDER: varlist += [self.check_images] if self.has_metadata: varlist += [self.group_by_metadata] if self.do_group_by_metadata: varlist += [self.metadata_fields] choices = set() for fd in self.images: for setting, tag in ( (fd.file_metadata, M_FILE_NAME), (fd.path_metadata, M_PATH)): if fd.metadata_choice in (tag, M_BOTH): choices.update( cpmeas.find_metadata_tokens(setting.value)) if (any([cpmeas.is_well_column_token(x) for x in choices]) and any([cpmeas.is_well_row_token(x) for x in choices]) and not any([x.lower() == cpmeas.FTR_WELL.lower() for x in choices])): choices.add(cpmeas.FTR_WELL) if self.file_types == FF_OTHER_MOVIES: choices.update([M_Z, M_T, C_SERIES]) elif self.file_types in (FF_AVI_MOVIES, FF_STK_MOVIES): choices.add(M_T) choices = list(choices) choices.sort() self.metadata_fields.choices = choices # per image settings for i, fd in enumerate(self.images): is_multichannel = (self.is_multichannel or fd.wants_movie_frame_grouping) varlist += [fd.divider] if self.match_method != MS_ORDER: varlist += [fd.common_text] else: varlist += [fd.order_position] if not is_multichannel: varlist += [fd.channels[0].image_object_choice] if self.channel_wants_images(fd.channels[0]): varlist += [fd.channels[0].image_name, fd.channels[0].rescale] else: varlist += [fd.channels[0].object_name, fd.channels[0].wants_outlines] if fd.channels[0].wants_outlines.value: varlist += [fd.channels[0].outlines_name] varlist += [fd.metadata_choice] if self.has_file_metadata(fd): varlist += [fd.file_metadata] if self.has_path_metadata(fd): varlist += [fd.path_metadata] max_channels = 9 if self.file_types in (FF_AVI_MOVIES, FF_STK_MOVIES, FF_OTHER_MOVIES): varlist += [fd.wants_movie_frame_grouping] if fd.wants_movie_frame_grouping: varlist += [fd.interleaving, fd.channels_per_group] is_multichannel = True max_channels = fd.channels_per_group.value if is_multichannel: for channel in fd.channels: varlist += [channel.image_object_choice] if self.channel_wants_images(channel): varlist += [channel.image_name] else: varlist += [channel.object_name, channel.wants_outlines] if channel.wants_outlines.value: varlist += [channel.outlines_name] varlist += [channel.channel_number] if self.channel_wants_images(channel): varlist += [channel.rescale] choices = channel.channel_number.choices del choices[:] choices += [str(x + 1) for x in range(max_channels)] if channel.can_remove: varlist += [channel.remover] varlist += [fd.add_channel_button] if fd.can_remove: varlist += [fd.remover] varlist += [self.add_image] varlist += [self.location] return varlist def validate_module(self, pipeline): '''Validate a module's settings LoadImages marks the common_text as invalid if it's blank. ''' if self.match_method == MS_EXACT_MATCH: for image_group in self.images: if len(image_group.common_text.value) == 0: raise cps.ValidationError( "The matching text is blank. This would match all images.\n" "Use regular expressions to match with a matching\n" 'expression of ".*" if this is the desired behavior.', image_group.common_text) def validate_module_warnings(self, pipeline): '''Check for potentially dangerous settings''' # Check that user has selected fields for grouping if grouping is turned on if self.group_by_metadata.value and (len(self.metadata_fields.selections) == 0): raise cps.ValidationError("Group images by metadata is True, but no metadata " "fields have been chosen for grouping.", self.metadata_fields) # Check that user-specified names don't have bad characters invalid_chars_pattern = "^[A-Za-z][A-Za-z0-9_]+$" warning_text = "The image name has questionable characters. The pipeline can use this name " \ "and produce results, but downstream programs that use this data (e.g, MATLAB, MySQL) may error." for i, fd in enumerate(self.images): is_multichannel = (self.is_multichannel or fd.wants_movie_frame_grouping) if not is_multichannel: if self.channel_wants_images(fd.channels[0]): if not re.match(invalid_chars_pattern, fd.channels[0].image_name.value): raise cps.ValidationError(warning_text, fd.channels[0].image_name) else: for channel in fd.channels: if self.channel_wants_images(channel): if not re.match(invalid_chars_pattern, fd.channels[0].image_name.value): raise cps.ValidationError(warning_text, channel.image_name) # The best practice is to have a single LoadImages or LoadData module. from cellprofiler.modules.loaddata import LoadData for module in pipeline.modules(): if id(module) == id(self): return if isinstance(module, LoadData): raise cps.ValidationError( "Your pipeline has a LoadImages and LoadData module.\n" "The best practice is to have only a single LoadImages\n" "or LoadData module. This LoadImages module will match its\n" "metadata against that of the previous LoadData module\n" "in an attempt to reconcile the two modules' image\n" "set lists and this can result in image sets with\n" "missing images or metadata.", self.add_image) if isinstance(module, LoadImages): raise cps.ValidationError( "Your pipeline has two or more LoadImages modules.\n" "The best practice is to have only one LoadImages module.\n" "Consider loading all of your images using a single\n" "LoadImages module. You can add additional images using\n" "the Add button", self.add_image) # # Slots for storing settings in the array # SLOT_FILE_TYPE = 0 SLOT_MATCH_METHOD = 1 SLOT_ORDER_GROUP_SIZE = 2 SLOT_MATCH_EXCLUDE = 3 SLOT_DESCEND_SUBDIRECTORIES = 4 SLOT_LOCATION = 5 SLOT_CHECK_IMAGES = 6 SLOT_GROUP_BY_METADATA = 7 SLOT_EXCLUDE = 8 SLOT_GROUP_FIELDS = 9 SLOT_FIRST_IMAGE_V1 = 8 SLOT_FIRST_IMAGE_V2 = 9 SLOT_FIRST_IMAGE_V3 = 10 SLOT_FIRST_IMAGE_V4 = 11 SLOT_FIRST_IMAGE_V5 = 10 SLOT_FIRST_IMAGE_V6 = 11 SLOT_FIRST_IMAGE_V7 = 11 SLOT_FIRST_IMAGE_V8 = 11 SLOT_FIRST_IMAGE_V9 = 11 SLOT_FIRST_IMAGE_V10 = 11 SLOT_FIRST_IMAGE = 12 SLOT_IMAGE_COUNT_V6 = 10 SLOT_IMAGE_COUNT_V7 = 10 SLOT_IMAGE_COUNT_V8 = 10 SLOT_IMAGE_COUNT_V9 = 10 SLOT_IMAGE_COUNT_V10 = 10 SLOT_IMAGE_COUNT = 11 SLOT_OFFSET_COMMON_TEXT = 0 SLOT_OFFSET_IMAGE_NAME_V5 = 1 SLOT_OFFSET_ORDER_POSITION_V5 = 2 SLOT_OFFSET_METADATA_CHOICE_V5 = 3 SLOT_OFFSET_FILE_METADATA_V5 = 4 SLOT_OFFSET_PATH_METADATA_V5 = 5 SLOT_IMAGE_FIELD_COUNT_V1 = 3 SLOT_IMAGE_FIELD_COUNT_V5 = 6 SLOT_IMAGE_FIELD_COUNT_V7 = 9 SLOT_IMAGE_FIELD_COUNT_V8 = 9 SLOT_IMAGE_FIELD_COUNT_V9 = 9 SLOT_IMAGE_FIELD_COUNT = 9 SLOT_OFFSET_ORDER_POSITION = 1 SLOT_OFFSET_METADATA_CHOICE = 2 SLOT_OFFSET_FILE_METADATA = 3 SLOT_OFFSET_PATH_METADATA = 4 SLOT_OFFSET_CHANNEL_COUNT = 5 SLOT_OFFSET_CHANNEL_COUNT_V6 = 5 SLOT_OFFSET_CHANNEL_COUNT_V7 = 5 SLOT_OFFSET_CHANNEL_COUNT_V8 = 5 SLOT_OFFSET_CHANNEL_COUNT_V9 = 5 SLOT_OFFSET_WANTS_MOVIE_FRAME_GROUPING = 6 SLOT_OFFSET_INTERLEAVING = 7 SLOT_OFFSET_CHANNELS_PER_GROUP = 8 SLOT_OFFSET_IO_CHOICE = 0 SLOT_OFFSET_IMAGE_NAME_V8 = 0 SLOT_OFFSET_IMAGE_NAME_V9 = 1 SLOT_OFFSET_IMAGE_NAME = 1 SLOT_OFFSET_OBJECT_NAME_V9 = 3 SLOT_OFFSET_OBJECT_NAME = 2 SLOT_OFFSET_CHANNEL_NUMBER_V8 = 1 SLOT_OFFSET_CHANNEL_NUMBER_V9 = 3 SLOT_OFFSET_CHANNEL_NUMBER = 3 SLOT_OFFSET_RESCALE_V8 = 2 SLOT_OFFSET_RESCALE_V9 = 4 SLOT_OFFSET_RESCALE = 4 SLOT_CHANNEL_FIELD_COUNT = 7 SLOT_CHANNEL_FIELD_COUNT_V6 = 2 SLOT_CHANNEL_FIELD_COUNT_V7 = 2 SLOT_CHANNEL_FIELD_COUNT_V8 = 3 SLOT_CHANNEL_FIELD_COUNT_V9 = 5 def settings(self): """Return the settings array in a consistent order""" setting_values = [ self.file_types, self.match_method, self.order_group_size, self.match_exclude, self.descend_subdirectories, self.location, self.check_images, self.group_by_metadata, self.exclude, self.metadata_fields, self.subdirectory_filter, self.image_count] for image_group in self.images: setting_values += [ image_group.common_text, image_group.order_position, image_group.metadata_choice, image_group.file_metadata, image_group.path_metadata, image_group.channel_count, image_group.wants_movie_frame_grouping, image_group.interleaving, image_group.channels_per_group] for channel in image_group.channels: setting_values += [ channel.image_object_choice, channel.image_name, channel.object_name, channel.wants_outlines, channel.outlines_name, channel.channel_number, channel.rescale] return setting_values def prepare_settings(self, setting_values): # # Figure out how many images are in the saved settings - make sure # the array size matches the incoming # # image_count = int(setting_values[self.SLOT_IMAGE_COUNT]) setting_values = setting_values[self.SLOT_FIRST_IMAGE:] del self.images[:] for i in range(image_count): self.add_imagecb(i > 0) image_settings = self.images[-1] channel_count = int(setting_values[self.SLOT_OFFSET_CHANNEL_COUNT]) setting_values = setting_values[self.SLOT_IMAGE_FIELD_COUNT:] for j in range(channel_count): if j > 0: self.add_channel(image_settings) setting_values = setting_values[self.SLOT_CHANNEL_FIELD_COUNT:] @property def is_multichannel(self): '''True if the image is one of the multichannel types and needs to be split''' # # Currently, only Flex are handled this way # return self.file_types == FF_OTHER_MOVIES @property def has_metadata(self): if self.file_types in (FF_AVI_MOVIES, FF_STK_MOVIES, FF_OTHER_MOVIES): return True return any([self.has_file_metadata(fd) or self.has_path_metadata(fd) for fd in self.images]) @property def do_group_by_metadata(self): '''Return true if we should group by metadata The group-by-metadata checkbox won't show unless there are metadata groupings to group by - so go by the checkbox and the presence of metadata to group by ''' if not self.group_by_metadata: return False return self.has_metadata def get_channel_for_image_name(self, image_name): '''Given an image name, return the channel that holds its settings''' for image_settings in self.images: for channel in image_settings.channels: if channel.get_image_name() == image_name: return channel return None def upgrade_settings(self, setting_values, variable_revision_number, module_name, from_matlab): # # historic rewrites from CP1.0 # def upgrade_1_to_2(setting_values): """Upgrade rev 1 LoadImages to rev 2 Handle movie formats new to rev 2 """ new_values = list(setting_values[:10]) image_or_movie = setting_values[10] if image_or_movie == 'Image': new_values.append('individual images') elif setting_values[11] == 'avi': new_values.append('avi movies') elif setting_values[11] == 'stk': new_values.append('stk movies') else: raise ValueError('Unhandled movie type: %s' % (setting_values[11])) new_values.extend(setting_values[12:]) return new_values, 2 def upgrade_2_to_3(setting_values): """Added binary/grayscale question""" new_values = list(setting_values) new_values.append('grayscale') new_values.append('') return new_values, 3 def upgrade_3_to_4(setting_values): """Added text exclusion at slot # 10""" new_values = list(setting_values) new_values.insert(10, cps.DO_NOT_USE) return new_values, 4 def upgrade_4_to_5(setting_values): new_values = list(setting_values) new_values.append(cps.NO) return new_values, 5 def upgrade_5_to_new_1(setting_values): """Take the old LoadImages values and put them in the correct slots""" loc = setting_values[13] custom_path = loc if loc == '.': dir_choice = DEFAULT_INPUT_FOLDER_NAME elif loc == '&': dir_choice = DEFAULT_OUTPUT_FOLDER_NAME elif loc.startswith('.'): dir_choice = DEFAULT_INPUT_SUBFOLDER_NAME elif loc.startswith('&'): dir_choice = DEFAULT_OUTPUT_SUBFOLDER_NAME custom_path = '.' + loc[1:] else: dir_choice = ABSOLUTE_FOLDER_NAME new_values = [ setting_values[11], # file_types setting_values[0], # match_method setting_values[9], # order_group_size setting_values[10], # match_exclude setting_values[12], # descend_subdirectories dir_choice, # was location custom_path, # was location_other setting_values[16]] # check_images for i in range(0, 4): text_to_find = setting_values[i * 2 + 1] image_name = setting_values[i * 2 + 2] if text_to_find == cps.DO_NOT_USE or \ image_name == cps.DO_NOT_USE or \ text_to_find == '/' or \ image_name == '/' or \ text_to_find == '\\' or \ image_name == '\\': break new_values.extend([text_to_find, image_name, text_to_find]) return new_values, 1 # # New revisions in CP2.0 # def upgrade_new_1_to_2(setting_values): """Add the metadata slots to the images""" new_values = list(setting_values[:self.SLOT_FIRST_IMAGE_V1]) new_values.append(cps.NO) # Group by metadata is off for i in range((len(setting_values) - self.SLOT_FIRST_IMAGE_V1) / self.SLOT_IMAGE_FIELD_COUNT_V1): off = self.SLOT_FIRST_IMAGE_V1 + i * self.SLOT_IMAGE_FIELD_COUNT_V1 new_values.extend([setting_values[off], setting_values[off + 1], setting_values[off + 2], M_NONE, cps.NONE, cps.NONE]) return new_values, 2 def upgrade_new_2_to_3(setting_values): """Add the checkbox for excluding certain files""" new_values = list(setting_values[:self.SLOT_FIRST_IMAGE_V2]) if setting_values[self.SLOT_MATCH_EXCLUDE] == cps.DO_NOT_USE: new_values += [cps.NO] else: new_values += [cps.YES] new_values += setting_values[self.SLOT_FIRST_IMAGE_V2:] return new_values, 3 def upgrade_new_3_to_4(setting_values): """Add the metadata_fields setting""" new_values = list(setting_values[:self.SLOT_FIRST_IMAGE_V3]) new_values.append('') new_values += setting_values[self.SLOT_FIRST_IMAGE_V3:] return new_values, 4 def upgrade_new_4_to_5(setting_values): """Combine the location and custom location values""" setting_values = cps.standardize_default_folder_names( setting_values, self.SLOT_LOCATION) custom_location = setting_values[self.SLOT_LOCATION + 1] location = setting_values[self.SLOT_LOCATION] if location == ABSOLUTE_FOLDER_NAME: if custom_location.startswith('.'): location = cps.DEFAULT_INPUT_SUBFOLDER_NAME elif custom_location.startswith('&'): location = cps.DEFAULT_OUTPUT_SUBFOLDER_NAME custom_location = "." + custom_location[1:] location = cps.DirectoryPath.static_join_string( location, custom_location) setting_values = (setting_values[:self.SLOT_LOCATION] + [location] + setting_values[self.SLOT_LOCATION + 2:]) return setting_values, 5 def upgrade_new_5_to_6(setting_values): '''Added separate channels for flex images''' new_values = list(setting_values[:self.SLOT_FIRST_IMAGE_V5]) setting_values = setting_values[self.SLOT_FIRST_IMAGE_V5:] image_count = (len(setting_values) / self.SLOT_IMAGE_FIELD_COUNT_V5) # # Add the image count to the settings # new_values += [str(image_count)] for i in range(image_count): new_values += setting_values[:self.SLOT_OFFSET_IMAGE_NAME_V5] new_values += setting_values[(self.SLOT_OFFSET_IMAGE_NAME_V5 + 1): self.SLOT_IMAGE_FIELD_COUNT_V5] # # Add a channel count of 1, the image name and a channel # number of 1 # new_values += [ "1", setting_values[self.SLOT_OFFSET_IMAGE_NAME_V5], "1"] setting_values = setting_values[self.SLOT_IMAGE_FIELD_COUNT_V5:] return new_values, 6 def upgrade_new_6_to_7(setting_values): '''Added movie frame grouping''' new_values = list(setting_values[:self.SLOT_FIRST_IMAGE_V6]) image_count = int(setting_values[self.SLOT_IMAGE_COUNT_V6]) setting_values = setting_values[self.SLOT_FIRST_IMAGE_V6:] for i in range(image_count): new_values += setting_values[:self.SLOT_IMAGE_FIELD_COUNT_V5] new_values += [cps.NO, I_INTERLEAVED, "2"] channel_count = int(setting_values[self.SLOT_OFFSET_CHANNEL_COUNT_V6]) setting_values = setting_values[self.SLOT_IMAGE_FIELD_COUNT_V5:] channel_field_count = self.SLOT_CHANNEL_FIELD_COUNT_V6 * channel_count new_values += setting_values[:channel_field_count] setting_values = setting_values[channel_field_count:] return new_values, 7 def upgrade_new_7_to_8(setting_values): '''Added rescale control''' new_values = list(setting_values[:self.SLOT_FIRST_IMAGE_V7]) image_count = int(setting_values[self.SLOT_IMAGE_COUNT_V7]) setting_values = setting_values[self.SLOT_FIRST_IMAGE_V7:] for i in range(image_count): new_values += setting_values[:self.SLOT_IMAGE_FIELD_COUNT_V7] channel_count = int(setting_values[self.SLOT_OFFSET_CHANNEL_COUNT_V7]) setting_values = setting_values[self.SLOT_IMAGE_FIELD_COUNT_V7:] for j in range(channel_count): new_values += setting_values[:self.SLOT_CHANNEL_FIELD_COUNT_V7] + [cps.YES] setting_values = setting_values[self.SLOT_CHANNEL_FIELD_COUNT_V7:] return new_values, 8 def upgrade_new_8_to_9(setting_values): '''Added object loading''' new_values = list(setting_values[:self.SLOT_FIRST_IMAGE_V8]) image_count = int(setting_values[self.SLOT_IMAGE_COUNT_V8]) setting_values = setting_values[self.SLOT_FIRST_IMAGE_V8:] for i in range(image_count): new_values += setting_values[:self.SLOT_IMAGE_FIELD_COUNT_V8] channel_count = int(setting_values[self.SLOT_OFFSET_CHANNEL_COUNT_V8]) setting_values = setting_values[self.SLOT_IMAGE_FIELD_COUNT_V8:] for j in range(channel_count): new_values += [ IO_IMAGES, setting_values[self.SLOT_OFFSET_IMAGE_NAME_V8], "Nuclei"] new_values += setting_values[(self.SLOT_OFFSET_IMAGE_NAME_V8 + 1): self.SLOT_CHANNEL_FIELD_COUNT_V8] setting_values = setting_values[self.SLOT_CHANNEL_FIELD_COUNT_V8:] return new_values, 9 def upgrade_new_9_to_10(setting_values): '''Added outlines to object loading''' new_values = list(setting_values[:self.SLOT_FIRST_IMAGE_V9]) image_count = int(setting_values[self.SLOT_IMAGE_COUNT_V9]) setting_values = setting_values[self.SLOT_FIRST_IMAGE_V9:] for i in range(image_count): new_values += setting_values[:self.SLOT_IMAGE_FIELD_COUNT_V9] channel_count = int(setting_values[self.SLOT_OFFSET_CHANNEL_COUNT_V9]) setting_values = setting_values[self.SLOT_IMAGE_FIELD_COUNT_V9:] for j in range(channel_count): new_values += setting_values[:self.SLOT_OFFSET_OBJECT_NAME_V9] + \ [cps.NO, "NucleiOutlines"] + \ setting_values[self.SLOT_OFFSET_OBJECT_NAME_V9:self.SLOT_CHANNEL_FIELD_COUNT_V9] setting_values = setting_values[self.SLOT_CHANNEL_FIELD_COUNT_V9:] return new_values, 10 def upgrade_new_10_to_11(setting_values): '''Added subdirectory filter''' new_values = (setting_values[:self.SLOT_IMAGE_COUNT_V10] + [""] + setting_values[self.SLOT_IMAGE_COUNT_V10:]) if new_values[self.SLOT_DESCEND_SUBDIRECTORIES] == cps.YES: new_values[self.SLOT_DESCEND_SUBDIRECTORIES] = SUB_ALL else: new_values[self.SLOT_DESCEND_SUBDIRECTORIES] = SUB_NONE return new_values, 11 if from_matlab: if variable_revision_number == 1: setting_values, variable_revision_number = upgrade_1_to_2(setting_values) if variable_revision_number == 2: setting_values, variable_revision_number = upgrade_2_to_3(setting_values) if variable_revision_number == 3: setting_values, variable_revision_number = upgrade_3_to_4(setting_values) if variable_revision_number == 4: setting_values, variable_revision_number = upgrade_4_to_5(setting_values) if variable_revision_number == 5: setting_values, variable_revision_number = upgrade_5_to_new_1(setting_values) from_matlab = False assert not from_matlab if variable_revision_number == 1: setting_values, variable_revision_number = upgrade_new_1_to_2(setting_values) if variable_revision_number == 2: setting_values, variable_revision_number = upgrade_new_2_to_3(setting_values) if variable_revision_number == 3: setting_values, variable_revision_number = upgrade_new_3_to_4(setting_values) if variable_revision_number == 4: setting_values, variable_revision_number = upgrade_new_4_to_5(setting_values) if variable_revision_number == 5: setting_values, variable_revision_number = upgrade_new_5_to_6(setting_values) if variable_revision_number == 6: setting_values, variable_revision_number = upgrade_new_6_to_7(setting_values) if variable_revision_number == 7: setting_values, variable_revision_number = upgrade_new_7_to_8(setting_values) if variable_revision_number == 8: setting_values, variable_revision_number = upgrade_new_8_to_9(setting_values) if variable_revision_number == 9: setting_values, variable_revision_number = upgrade_new_9_to_10(setting_values) if variable_revision_number == 10: setting_values, variable_revision_number = upgrade_new_10_to_11(setting_values) # Standardize input/output directory name references setting_values[self.SLOT_LOCATION] = \ cps.DirectoryPath.upgrade_setting(setting_values[self.SLOT_LOCATION]) # Upgrade the file type slot if setting_values[self.SLOT_FILE_TYPE] in FF_OTHER_MOVIES_OLD: setting_values[self.SLOT_FILE_TYPE] = FF_OTHER_MOVIES if setting_values[self.SLOT_FILE_TYPE] in FF_AVI_MOVIES_OLD: setting_values[self.SLOT_FILE_TYPE] = FF_AVI_MOVIES assert variable_revision_number == self.variable_revision_number, "Cannot read version %d of %s" % ( variable_revision_number, self.module_name) return setting_values, variable_revision_number, from_matlab def is_load_module(self): '''LoadImages creates image sets so it is a load module''' return True def prepare_run(self, workspace): """Set up all of the image providers inside the image_set_list """ if workspace.pipeline.in_batch_mode(): # Don't set up if we're going to retrieve the image set list # from batch mode return True if self.file_types == FF_OTHER_MOVIES: return self.prepare_run_of_flex(workspace) elif self.load_movies(): return self.prepare_run_of_movies(workspace) else: return self.prepare_run_of_images(workspace) def prepare_run_of_images(self, workspace): """Set up image providers for image files""" pipeline = workspace.pipeline # OK to use workspace.frame, since we're in prepare_run frame = workspace.frame files = self.collect_files(workspace) if len(files) == 0: return False if self.do_group_by_metadata and len(self.get_metadata_tags()): result = self.organize_by_metadata(workspace, files) else: result = self.organize_by_order(workspace, files) if not result: return result return True def organize_by_order(self, workspace, files): """Organize each kind of file by their lexical order """ pipeline = workspace.pipeline # OK to use workspace.frame, since we're in prepare_run frame = workspace.frame m = workspace.measurements assert isinstance(m, cpmeas.Measurements) image_names = self.image_name_vars() list_of_lists = [[] for x in image_names] for pathname, image_index in files: list_of_lists[image_index].append(pathname) image_set_count = len(list_of_lists[0]) for x, name in zip(list_of_lists[1:], image_names[1:]): if len(x) != image_set_count: message = "Image %s has %d files, but image %s has %d files" % \ (image_names[0], image_set_count, name, len(x)) pipeline.report_prepare_run_error(self, message) if frame is not None: images = [(tuple(), [os.path.split(list_of_lists[i][j]) if len(list_of_lists[i]) > j else None for i in range(len(list_of_lists))]) for j in range(image_set_count)] self.report_errors([], images, frame) return False list_of_lists = np.array(list_of_lists) root = self.image_directory() for i in range(image_set_count): for j, image_name in enumerate(image_names): image_number = i + 1 full_path = os.path.join(root, list_of_lists[j, i]) self.write_measurements( m, image_number, self.images[j], full_path) return True def organize_by_metadata(self, workspace, files): """Organize each kind of file by metadata """ pipeline = workspace.pipeline # OK to use workspace.frame, since we're in prepare_run frame = workspace.frame # # Distribute files according to metadata tags. Each image_name # potentially has a subset of the total list of tags. For images # without the complete list of tags, we give the image a wildcard # for the metadata item that matches everything. # tags = self.get_metadata_tags() # # files_by_image_name holds one list of files for each image name index # files_by_image_name = [[] for i in range(len(self.images))] for file, i in files: files_by_image_name[i].append(os.path.split(file)) # # Create a monster dictionary tree for each image describing the # metadata for each tag. Give a file a metadata value of None # if it has no value for a given metadata tag # d = [{} for i in range(len(self.images))] conflicts = [] for i in range(len(self.images)): fd = self.images[i] for path, filename in files_by_image_name[i]: metadata = self.get_filename_metadata(fd, filename, path) parent = d[i] for tag in tags[:-1]: value = metadata.get(tag) if parent.has_key(value): child = parent[value] else: child = {} parent[value] = child parent = child last_value = value tag = tags[-1] value = metadata.get(tag) if parent.has_key(value): # There's already a match to this metadata conflict = [fd, parent[value], (path, filename)] conflicts.append(conflict) if not self.check_images: # # Disambiguate conflicts by picking the most recently # modified file. # old_path, old_filename = parent[value] old_stat = os.stat(os.path.join(self.image_directory(), old_path, old_filename)) old_time = old_stat.st_mtime new_stat = os.stat(os.path.join(self.image_directory(), path, filename)) new_time = new_stat.st_mtime if new_time > old_time: parent[value] = (path, filename) else: parent[value] = (path, filename) image_sets = self.get_image_sets(d) missing_images = [image_set for image_set in image_sets if None in image_set[1]] image_sets = [image_set for image_set in image_sets if not None in image_set[1]] # # Handle errors, raising an exception if the user wants to check images # if len(conflicts) or len(missing_images): if frame: self.report_errors(conflicts, missing_images, frame) if self.check_images: return False if self.check_images.value: message = "" if len(conflicts): message += "Conflicts found:\n" for conflict in conflicts: metadata = self.get_filename_metadata(conflict[0], conflict[1][1], conflict[1][0]) message += ( "Metadata: %s, First path: %s, First filename: %s, Second path: %s, Second filename: %s\n" % (str(metadata), conflict[1][0], conflict[1][1], conflict[2][0], conflict[2][1])) if len(missing_images): message += "Missing images:\n" for mi in missing_images: for i in range(len(self.images)): fd = self.images[i] if mi[1][i] is None: message += ("%s: missing " % fd.channels[0].image_name.value) else: message += ("%s: path=%s, file=%s" % (fd.channels[0].image_name.value, mi[1][i][0], mi[1][i][1])) pipeline.report_prepare_run_error(self, message) return False root = self.image_directory() features = {} # # Two cases - the measurements already have image sets or the # measurements have no image sets. # 1: measurements must have an image measurement corresponding to each # tag. Create a dictionary of tag values to a list of image numbers # with those tag values. Populate our images to that list. # 2: write fresh image set records to measurements. # measurements = workspace.measurements assert isinstance(measurements, cpmeas.Measurements) if measurements.image_set_count > 0: match_metadata = True md_dict = self.get_image_numbers_by_tags(workspace, tags) if md_dict is None: return False n_image_sets = measurements.image_set_count else: match_metadata = False n_image_sets = len(image_sets) for idx, image_set in enumerate(image_sets): keys = {} for tag, value in zip(tags, image_set[0]): keys[tag] = value if match_metadata: image_numbers = md_dict[keys] else: image_numbers = [idx + 1] for i in range(len(self.images)): path = os.path.join(image_set[1][i][0], image_set[1][i][1]) full_path = os.path.join(root, path) for image_number in image_numbers: d = self.write_measurements( None, image_number, self.images[i], full_path) for k, v in d.iteritems(): if not features.has_key(k): features[k] = [None] * n_image_sets features[k][image_number - 1] = v for k, v in features.iteritems(): workspace.measurements.add_all_measurements(cpmeas.IMAGE, k, v) return True def get_image_sets(self, d): """Get image sets from a dictionary tree d - one dictionary tree per image. returns a list of tuples. The first element contains a tuple of metadata values and the second element contains a list of (path,filename) tuples for each image (or are None for errors where there is no metadata match). """ if not any([isinstance(dd, dict) for dd in d]): # This is a leaf if no elements are dictionaries return [(tuple(), d)] # # Get each represented value for this slot # values = set() for dd in d: if not dd is None: values.update([x for x in dd.keys() if x is not None]) result = [] for value in sorted(values): subgroup = tuple((dd and dd.has_key(None) and dd[None]) or # wildcard metadata (dd and dd.get(value)) or # fetch subvalue or None if missing None # metadata is missing for dd in d) subsets = self.get_image_sets(subgroup) for subset in subsets: # prepend the current key to the tuple of keys and # duplicate the filename tuple subset = ((value,) + subset[0], subset[1]) result.append(subset) return result def get_image_numbers_by_tags(self, workspace, tags): '''Make a dictionary of tag values to image numbers Look up the metadata values for each of the tags in "tags" and create a dictionary of tag values to the list of image numbers that have those tag values. This dictionary can then be used to populate all image sets with a particular tag value. workspace.measurements - the measurements populated with metadata values for each tag tags - the metadata tags to be used for matching ''' measurements = workspace.measurements metadata_features = [ x for x in measurements.get_feature_names(cpmeas.IMAGE) if x.startswith(cpmeas.C_METADATA)] for tag in tags: if "_".join((cpmeas.C_METADATA, tag)) not in metadata_features: message = ( "LoadImages needs the prior Load modules to define " 'the metadata tag, "%s", in order to match metadata' % tag) workspace.pipeline.report_prepare_run_error(self, message) return None md_dict = {} for i in measurements.get_image_numbers(): keys = tuple([measurements.get_measurement( cpmeas.IMAGE, "_".join((cpmeas.C_METADATA, tag)), i) for tag in tags]) if md_dict.has_key(keys): md_dict.append(i) else: md_dict[keys] = [i] return md_dict def report_errors(self, conflicts, missing_images, frame): """Create an error report window conflicts: 3-tuple of file dictionary, first path/file and second path/file for two images with the same metadata missing_images: two-tuple of metadata keys and the file tuples where at least one of the file tuples is None indicating a missing image frame: the parent for the error report """ import wx import wx.html my_frame = wx.Frame(frame, title="Load images: Error report", size=(600, 800), pos=(frame.Position[0], frame.Position[1] + frame.Size[1])) my_frame.Icon = frame.Icon panel = wx.Panel(my_frame) uber_sizer = wx.BoxSizer() my_frame.SetSizer(uber_sizer) uber_sizer.Add(panel, 1, wx.EXPAND) sizer = wx.BoxSizer(wx.VERTICAL) panel.SetSizer(sizer) font = wx.Font(16, wx.FONTFAMILY_SWISS, wx.FONTSTYLE_NORMAL, wx.FONTWEIGHT_BOLD) tags = self.get_metadata_tags() tag_ct = len(tags) tables = [] if len(conflicts): title = wx.StaticText(panel, label="Conflicts (two files with same metadata)") title.Font = font sizer.Add(title, 0, wx.ALIGN_CENTER_HORIZONTAL | wx.ALIGN_CENTER_VERTICAL | wx.ALL, 3) table = wx.ListCtrl(panel, style=wx.LC_REPORT) tables.append(table) sizer.Add(table, 1, wx.EXPAND | wx.ALL, 3) for tag, index in zip(tags, range(tag_ct)): table.InsertColumn(index, tag) table.InsertColumn(index + 1, "First path") table.InsertColumn(index + 2, "First file name") table.InsertColumn(index + 3, "Second path") table.InsertColumn(index + 4, "Second file name") for conflict in conflicts: metadata = self.get_filename_metadata(conflict[0], conflict[1][1], conflict[1][0]) row = [metadata.get(tag) or "-" for tag in tags] row.extend([conflict[1][0], conflict[1][1], conflict[2][0], conflict[2][1]]) table.Append(row) if len(missing_images): title = wx.StaticText(panel, label="Missing images") title.Font = font sizer.Add(title, 0, wx.ALIGN_CENTER_HORIZONTAL | wx.ALIGN_CENTER_VERTICAL | wx.ALL, 3) table = wx.ListCtrl(panel, style=wx.LC_REPORT) tables.append(table) sizer.Add(table, 1, wx.EXPAND | wx.ALL, 3) if self.do_group_by_metadata: for tag, index in zip(tags, range(tag_ct)): table.InsertColumn(index, tag) for fd, index in zip(self.images, range(len(self.images))): table.InsertColumn(index * 2 + tag_ct, "%s path" % fd.channels[0].image_name.value) table.InsertColumn(index * 2 + 1 + tag_ct, "%s filename" % fd.channels[0].image_name.value) for metadata, files_and_paths in missing_images: row = list(metadata) for file_and_path in files_and_paths: if file_and_path is None: row.extend(["missing", "missing"]) else: row.extend(file_and_path) table.Append(row) best_total_width = 0 for table in tables: total_width = 0 dc = wx.ClientDC(table) dc.Font = table.Font try: for col in range(table.ColumnCount): text = table.GetColumn(col).Text width = dc.GetTextExtent(text)[0] for row in range(table.ItemCount): text = table.GetItem(row, col).Text width = max(width, dc.GetTextExtent(text)[0]) width += 16 table.SetColumnWidth(col, width) total_width += width + 4 finally: dc.Destroy() best_total_width = max(best_total_width, total_width) table.SetMinSize((best_total_width, table.GetMinHeight())) my_frame.Fit() my_frame.Show() def prepare_run_of_flex(self, workspace): '''Set up image providers for flex files''' import bioformats.omexml from bioformats.formatreader import get_omexml_metadata pipeline = workspace.pipeline # OK to use workspace.frame, since we're in prepare_run frame = workspace.frame m = workspace.measurements assert isinstance(m, cpmeas.Measurements) if m.image_set_count > 0 and self.do_group_by_metadata: match_metadata = True tags = list(self.get_metadata_tags()) + [M_Z, M_T, C_SERIES] md_dict = self.get_image_numbers_by_tags(workspace, tags) if md_dict is None: return False else: match_metadata = False files = self.collect_files(workspace) if len(files) == 0: return False # # Organize the files into one column per image index. # index_count = np.bincount([x[1] for x in files]) if np.any(index_count != index_count[0]): bad = np.argwhere(index_count != index_count[0]).flatten() raise RuntimeError("Image %s has %d files, but image %s has %d files" % (self.images[0].channels[0].image_name.value, index_count[0], self.images[bad].channels[0].image_name.value, index_count[bad])) file_list = [[] for _ in range(len(index_count))] for file_pathname, image_index in files: file_list[image_index].append(file_pathname) # # Rearrange to a row-oriented list # file_list = [[column[i] for column in file_list] for i in range(len(file_list[0]))] root = self.image_directory() image_set_count = 0 for image_set_files in file_list: starting_image_index = image_set_count d = [] for image_settings, file_pathname in \ zip(self.images, image_set_files): pathname = os.path.join(self.image_directory(), file_pathname) path, filename = os.path.split(pathname) url = pathname2url(os.path.abspath(pathname)) metadata = self.get_filename_metadata(image_settings, filename, path) image_set_count = starting_image_index xml = get_omexml_metadata(pathname) omemetadata = bioformats.omexml.OMEXML(xml) image_count = omemetadata.image_count if len(d) == 0: d = [{} for _ in range(image_count)] elif len(d) != image_count: message = ( "File %s has %d series, but file %s has %d series." % (image_set_files[0], len(d), file_pathname, image_count)) pipeline.report_prepare_run_error(self, message) return False for i in range(image_count): pixels = omemetadata.image(i).Pixels channel_count = pixels.SizeC stack_count = pixels.SizeZ if not d[i].has_key("Z"): d[i]["Z"] = stack_count elif stack_count != d[i]["Z"]: message = ( ("File %s, series %d has %d Z stacks, " "but file %s has %d") % (image_set_files[0], i, d[i]["Z"], file_pathname, stack_count)) pipeline.report_prepare_run_error(self, message) return False timepoint_count = pixels.SizeT if not d[i].has_key("T"): d[i]["T"] = timepoint_count elif timepoint_count != d[i]["T"]: message = ( ("File %s, series %d has %d " "timepoints, but file %s has %d") % (image_set_files[0], i, d[i]["T"], file_pathname, timepoint_count)) pipeline.report_prepare_run_error(self, message) return False if image_settings.wants_movie_frame_grouping: # # For movie frame grouping, assume that all of # the images are to be processed in a consecutive # series taking T as the outside, then Z, then C # and divvied up among the channels. # # Metadata Z = 1 # Metadata T is the cycle # # # Really, whoever set up the microscope should # have done it right so that the file saved # the channels into the TIF correctly. # nframes = timepoint_count * stack_count * channel_count nchannels = image_settings.channels_per_group.value if nframes % nchannels != 0: logger.warning( ("Warning: the movie, %s, has %d frames divided into " "%d channels per group.\n" "%d frames will be discarded.\n") % (pathname, nframes, nchannels, nframes % nchannels)) nframes -= nframes % nchannels nsets = int(nframes / nchannels) for idx in range(nsets): frame_metadata = metadata.copy() frame_metadata[M_Z] = 0 # so sorry, real Z obliterated frame_metadata[M_T] = idx frame_metadata[C_SERIES] = i image_numbers = [image_set_count + 1] if match_metadata: key = dict([(k, str(v)) for k, v in frame_metadata.items()]) if not md_dict.has_key(key): message = ( "Could not find a matching image set for " % ", ".join(["%s=%s%" % kv for kv in frame_metadata.items()])) pipeline.report_prepare_run_error(self, message) return False image_numbers = md_dict[key] for image_number in image_numbers: for channel_settings in image_settings.channels: image_name = channel_settings.get_image_name() channel = int(channel_settings.channel_number.value) - 1 if image_settings.interleaving == I_INTERLEAVED: cidx = idx * nchannels + channel else: cidx = channel * nsets + idx c = cidx % channel_count z = int(cidx / channel_count) % stack_count t = int(cidx / channel_count / stack_count) % timepoint_count m.add_measurement( cpmeas.IMAGE, "_".join((C_FILE_NAME, image_name)), filename, image_set_number=image_number) m.add_measurement( cpmeas.IMAGE, "_".join((C_PATH_NAME, image_name)), path, image_set_number=image_number) m.add_measurement( cpmeas.IMAGE, "_".join((C_URL, image_name)), url, image_set_number=image_number) m.add_measurement( cpmeas.IMAGE, "_".join((C_SERIES, image_name)), i, image_set_number=image_number) m.add_measurement( cpmeas.IMAGE, "_".join((C_FRAME, image_name)), cidx, image_set_number=image_number) for k in frame_metadata.keys(): m.add_measurement( cpmeas.IMAGE, "_".join((cpmeas.C_METADATA, k)), frame_metadata[k], image_set_number=image_number) image_set_count += 1 else: distance = 1 for dimension in pixels.DimensionOrder[2:]: if dimension == "C": strideC = distance distance *= pixels.SizeC elif dimension == "Z": strideZ = distance distance *= pixels.SizeZ elif dimension == "T": strideT = distance distance *= pixels.SizeT for z in range(pixels.SizeZ): for t in range(pixels.SizeT): frame_metadata = metadata.copy() frame_metadata[M_Z] = z frame_metadata[M_T] = t frame_metadata[C_SERIES] = i image_numbers = [image_set_count + 1] if match_metadata: key = dict([(k, str(v)) for k, v in frame_metadata.items()]) if not md_dict.has_key(key): message = ( "Could not find a matching image set for " % ", ".join(["%s=%s%" % kv for kv in frame_metadata.items()])) pipeline.report_prepare_run_error( self, message) return False image_numbers = md_dict[key] for image_number in image_numbers: for channel_settings in image_settings.channels: c = int(channel_settings.channel_number.value) - 1 image_name = channel_settings.get_image_name() if c >= channel_count: message = \ ("The flex file, ""%s"", series # %d, has only %d channels. " "%s is assigned to channel % d") % (file_pathname, i, channel_count, image_name, c + 1) pipeline.report_prepare_run_error( self, message) return False index = c * strideC + t * strideT + z * strideZ m.add_measurement( cpmeas.IMAGE, "_".join((C_FILE_NAME, image_name)), filename, image_set_number=image_number) m.add_measurement( cpmeas.IMAGE, "_".join((C_PATH_NAME, image_name)), path, image_set_number=image_number) m.add_measurement( cpmeas.IMAGE, "_".join((C_URL, image_name)), url, image_set_number=image_number) m.add_measurement( cpmeas.IMAGE, "_".join((C_SERIES, image_name)), i, image_set_number=image_number) m.add_measurement( cpmeas.IMAGE, "_".join((C_FRAME, image_name)), index, image_set_number=image_number) for k in frame_metadata.keys(): m.add_measurement( cpmeas.IMAGE, "_".join((cpmeas.C_METADATA, k)), frame_metadata[k], image_set_number=image_number) image_set_count += 1 return True def prepare_run_of_movies(self, workspace): """Set up image providers for movie files""" pipeline = workspace.pipeline # OK to use workspace.frame, since we're in prepare_run frame = workspace.frame m = workspace.measurements files = self.collect_files(workspace) if len(files) == 0: return False root = self.image_directory() image_names = self.image_name_vars() # # The list of lists has one list per image type. Each per-image type # list is composed of tuples of pathname and frame # # list_of_lists = [[] for x in image_names] image_index = 0 for pathname, image_group_index in files: pathname = os.path.join(self.image_directory(), pathname) frame_count = self.get_frame_count(pathname) if frame_count == 0: print "Warning - no frame count detected" frame_count = 256 # # 3 choices here: # # No grouping by movie frames: one channel # Interleaved grouping # Sequential grouping # image = self.images[image_group_index] if image.wants_movie_frame_grouping: group_size = image.channels_per_group.value remainder = frame_count % group_size if remainder > 0: logger.warning( ("Warning: the movie, %s, has %d frames divided into " "%d channels per group.\n" "%d frames will be discarded.\n") % (pathname, frame_count, group_size, remainder)) group_count = int(frame_count / group_size) for group_number in range(group_count): for i, channel in enumerate(image.channels): channel_idx = int(channel.channel_number.value) - 1 if image.interleaving == I_INTERLEAVED: frame_number = \ group_number * group_size + channel_idx else: frame_number = \ group_count * channel_idx + group_number list_of_lists[image_index + i] += [ (pathname, frame_number, group_number, image_group_index)] image_index += len(image.channels) else: for i in range(frame_count): list_of_lists[image_group_index].append( (pathname, i, i, image_group_index)) image_set_count = len(list_of_lists[0]) for x, name in zip(list_of_lists[1:], image_names): if len(x) != image_set_count: message = ( "Image %s has %d frames, but image %s has %d frames" % (image_names[0], image_set_count, name, len(x))) pipeline.report_prepare_run_error(self, message) return False list_of_lists = np.array(list_of_lists, dtype=object) for i in range(0, image_set_count): for name, (file, frame, t, image_group_index) \ in zip(image_names, list_of_lists[:, i]): image_group = self.images[image_group_index] self.write_measurements(m, i + 1, image_group, file, frame=frame, channel_name=name) m.add_measurement(cpmeas.IMAGE, "_".join((cpmeas.C_METADATA, M_T)), t, image_set_number=i + 1) return True def prepare_to_create_batch(self, workspace, fn_alter_path): '''Prepare to create a batch file This function is called when CellProfiler is about to create a file for batch processing. The function adjusts any paths in settings or paths in measurements using the function "fn_alter_path" to map between file systems. pipeline - the pipeline to be saved image_set_list - the image set list to be saved fn_alter_path - this is a function that takes a pathname on the local host and returns a pathname on the remote host. It handles issues such as replacing backslashes and mapping mountpoints. It should be called for every pathname stored in the settings or legacy fields. ''' image_set_list = workspace.image_set_list m = workspace.measurements for image_group in self.images: for channel in image_group.channels: m.alter_path_for_create_batch( channel.get_image_name(), self.channel_wants_images(channel), fn_alter_path) self.location.alter_for_create_batch_files(fn_alter_path) return True def run(self, workspace): """Run the module - add the measurements """ header = ["Image name", "Path", "Filename"] ratio = [1.0, 2.5, 2.0] tags = self.get_metadata_tags() ratio += [1.0 for tag in tags] ratio = [x / sum(ratio) for x in ratio] header += tags statistics = [] m = workspace.measurements image_set = workspace.image_set image_set_metadata = {} image_size = None first_image_filename = None for fd in self.images: for channel in fd.channels: wants_images = self.channel_wants_images(channel) image_name = channel.get_image_name() if wants_images: feature = C_URL + "_" + image_name path_feature = C_PATH_NAME + "_" + image_name file_feature = C_FILE_NAME + "_" + image_name else: feature = C_OBJECTS_URL + "_" + image_name path_feature = C_OBJECTS_PATH_NAME + "_" + image_name file_feature = C_OBJECTS_FILE_NAME + "_" + image_name row = [image_name, m.get_current_image_measurement(path_feature), m.get_current_image_measurement(file_feature)] url = m.get_measurement(cpmeas.IMAGE, feature) full_name = url2pathname(url.encode('utf-8')) path, filename = os.path.split(full_name) rescale = channel.rescale.value metadata = self.get_filename_metadata(fd, filename, path) if self.file_types == FF_STK_MOVIES: index = m.get_measurement(cpmeas.IMAGE, "_".join((C_FRAME, image_name))) provider = LoadImagesSTKFrameProvider( image_name, path, filename, index, rescale) elif self.file_types == FF_OTHER_MOVIES: series = m.get_measurement(cpmeas.IMAGE, "_".join((C_SERIES, image_name))) index = m.get_measurement(cpmeas.IMAGE, "_".join((C_FRAME, image_name))) metadata[C_SERIES] = series for f in (M_Z, M_T): feature = cpmeas.C_METADATA + "_" + f metadata[f] = m.get_measurement(cpmeas.IMAGE, feature) provider = LoadImagesFlexFrameProvider( image_name, path, filename, series, index, rescale) elif self.file_types == FF_AVI_MOVIES: index = m.get_measurement(cpmeas.IMAGE, "_".join((C_FRAME, image_name))) metadata[M_T] = m.get_measurement( cpmeas.IMAGE, cpmeas.C_METADATA + "_" + M_T) provider = LoadImagesMovieFrameProvider( image_name, path, filename, index, rescale) else: provider = LoadImagesImageProvider( image_name, path, filename, rescale) if wants_images: image = provider.provide_image(workspace.image_set) pixel_data = image.pixel_data image_set.providers.append(provider) m.add_image_measurement( "_".join((C_MD5_DIGEST, image_name)), provider.get_md5_hash(m)) m.add_image_measurement("_".join((C_SCALING, image_name)), image.scale) m.add_image_measurement("_".join((C_HEIGHT, image_name)), int(pixel_data.shape[0])) m.add_image_measurement("_".join((C_WIDTH, image_name)), int(pixel_data.shape[1])) if image_size is None: image_size = tuple(pixel_data.shape[:2]) first_image_filename = filename elif image_size != tuple(pixel_data.shape[:2]): warning = bad_sizes_warning(image_size, first_image_filename, pixel_data.shape[:2], filename) if get_headless(): print warning elif self.show_window: workspace.display_data.warning = warning else: ################ # # Interpret file as objects # # Get the frame count from the OME-XML metadta # from bioformats.formatreader import get_omexml_metadata import bioformats.omexml md = get_omexml_metadata(provider.get_full_name()) md = bioformats.omexml.OMEXML(md) mdpixels = md.image().Pixels if (mdpixels.channel_count == 1 and mdpixels.Channel().SamplesPerPixel == 3): # # Single interleaved color image # n_frames = 1 else: n_frames = mdpixels.SizeZ * mdpixels.SizeC * mdpixels.SizeT series = provider.series ijv = np.zeros((0, 3), int) offset = 0 for index in range(n_frames): provider.index = index provider.rescale = False labels = provider.provide_image(None).pixel_data shape = labels.shape[:2] labels = convert_image_to_objects(labels) i, j = np.mgrid[0:labels.shape[0], 0:labels.shape[1]] ijv = np.vstack(( ijv, np.column_stack((i[labels != 0], j[labels != 0], labels[labels != 0] + offset)))) if ijv.shape[0] > 0: offset = np.max(ijv[:, 2]) o = cpo.Objects() o.set_ijv(ijv, shape) object_set = workspace.object_set assert isinstance(object_set, cpo.ObjectSet) object_name = channel.object_name.value object_set.add_objects(o, object_name) provider.release_memory() row[0] = object_name I.add_object_count_measurements(m, object_name, o.count) I.add_object_location_measurements_ijv(m, object_name, ijv) if channel.wants_outlines: outlines = np.zeros(shape, bool) for l, c in o.get_labels(): outlines |= centrosome.outline.outline(l).astype( outlines.dtype) outline_image = cpimage.Image(outlines, path_name=path, file_name=filename) workspace.image_set.add(channel.outlines_name.value, outline_image) for tag in tags: if metadata.has_key(tag): row.append(metadata[tag]) elif image_set_metadata.has_key(tag): row.append(image_set_metadata[tag]) else: row.append("") statistics.append(row) workspace.display_data.statistics = statistics workspace.display_data.col_labels = header def display(self, workspace, figure): if self.show_window: if hasattr(workspace.display_data, "warning"): show_warning("Images have different sizes", workspace.display_data.warning, get_show_report_bad_sizes_dlg, set_show_report_bad_sizes_dlg) figure.set_subplots((1, 1)) figure.subplot_table(0, 0, workspace.display_data.statistics, col_labels=workspace.display_data.col_labels) def get_filename_metadata(self, fd, filename, path): """Get the filename and path metadata for a given image fd - file/image dictionary filename - filename to be parsed path - path to be parsed """ metadata = {} if self.has_file_metadata(fd): metadata.update(cpmeas.extract_metadata(fd.file_metadata.value, filename)) if self.has_path_metadata(fd): path = os.path.abspath(os.path.join(self.image_directory(), path)) metadata.update(cpmeas.extract_metadata(fd.path_metadata.value, path)) if needs_well_metadata(metadata.keys()): well_row_token, well_column_token = well_metadata_tokens(metadata.keys()) metadata[cpmeas.FTR_WELL] = (metadata[well_row_token] + metadata[well_column_token]) return metadata def write_measurements(self, measurements, image_number, image_settings, full_path, series=None, frame=None, channel_name=None): '''Write the image filename, path, url and metadata meas. for a file measurements - write measurements into this measurements structure. if None, return the measurements as a dictionary image_number - image number for this image image_settings - the image settings group for the channel full_path - the full file path to the image series - For formats like TIFF which can have multiple image stacks, this is the index to the image stack. frame - this is the frame in a movie or the index within an image stack. channel - write measurements for all channels if None, else write only for this channel. ''' if measurements is not None: assert isinstance(measurements, cpmeas.Measurements) def add_fn(feature, value): measurements.add_measurement( cpmeas.IMAGE, feature, value, image_set_number=image_number) else: d = {} def add_fn(feature, value): d[feature] = value path, filename = os.path.split(full_path) url = pathname2url(full_path) metadata = self.get_filename_metadata(image_settings, filename, path) for channel in image_settings.channels: if (channel_name is not None and channel_name != channel.get_image_name()): continue if self.channel_wants_images(channel): path_name_category = C_PATH_NAME file_name_category = C_FILE_NAME url_category = C_URL else: path_name_category = C_OBJECTS_PATH_NAME file_name_category = C_OBJECTS_FILE_NAME url_category = C_OBJECTS_URL image_data = [(path_name_category, path), (file_name_category, filename), (url_category, url)] if series is not None: image_data.append((C_SERIES, series)) if frame is not None: image_data.append((C_FRAME, frame)) for category, value in image_data: add_fn("_".join((category, channel.get_image_name())), value) for key in metadata.keys(): add_fn("_".join((cpmeas.C_METADATA, key)), metadata[key]) if measurements is None: return d def get_frame_count(self, pathname): """Return the # of frames in a movie""" import bioformats.omexml from bioformats.formatreader import get_omexml_metadata if self.file_types in (FF_AVI_MOVIES, FF_OTHER_MOVIES, FF_STK_MOVIES): url = pathname2url(pathname) xml = get_omexml_metadata(pathname) omexml = bioformats.omexml.OMEXML(xml) frame_count = omexml.image(0).Pixels.SizeT if frame_count == 1: frame_count = omexml.image(0).Pixels.SizeZ return frame_count raise NotImplementedError("get_frame_count not implemented for %s" % self.file_types) @staticmethod def has_file_metadata(fd): '''True if the metadata choice is either M_FILE_NAME or M_BOTH fd - one of the image file descriptors from self.images ''' return fd.metadata_choice in (M_FILE_NAME, M_BOTH) @staticmethod def has_path_metadata(fd): '''True if the metadata choice is either M_PATH or M_BOTH fd - one of the image file descriptors from self.images ''' return fd.metadata_choice in (M_PATH, M_BOTH) def get_metadata_tags(self, fd=None): """Find the metadata tags for the indexed image fd - an image file directory from self.images """ if not fd: s = set() for fd in self.images: s.update(self.get_metadata_tags(fd)) tags = list(s) tags.sort() return tags tags = [] if self.has_file_metadata(fd): tags += cpmeas.find_metadata_tokens(fd.file_metadata.value) if self.has_path_metadata(fd): tags += cpmeas.find_metadata_tokens(fd.path_metadata.value) if self.file_types == FF_OTHER_MOVIES: tags += [M_Z, M_T, C_SERIES] elif self.file_types in (FF_AVI_MOVIES, FF_STK_MOVIES): tags += [M_T] if needs_well_metadata(tags): tags += [cpmeas.FTR_WELL] return tags def get_groupings(self, workspace): '''Return the groupings as indicated by the metadata_fields setting returns a tuple of key_names and group_list: key_names - the names of the keys that identify the groupings group_list - a sequence composed of two-tuples. the first element of the tuple is a dictionary giving the metadata values for the metadata keys the second element of the tuple is a sequence of image numbers comprising the image sets of the group For instance, an experiment might have key_names of 'Metadata_Row' and 'Metadata_Column' and a group_list of: [ ({'Metadata_Row':'A','Metadata_Column':'01'}, [1,97,193]), ({'Metadata_Row':'A','Metadata_Column':'02'), [2,98,194]),... ] Returns None to indicate that the module does not contribute any groupings. ''' image_name = self.images[0].channels[0].get_image_name() url_feature = '_'.join((C_URL, image_name)) if self.do_group_by_metadata: keys = ['_'.join((cpmeas.C_METADATA, s)) for s in self.metadata_fields.selections] mapping = dict([(key, s) for key, s in zip( keys, self.metadata_fields.selections)]) if len(keys) == 0: return None elif self.load_movies() and self.file_types == FF_OTHER_MOVIES: # # Pick the first channel for grouping movie frames # series_feature = '_'.join((C_SERIES, image_name)) # Default for Flex is to group by file name and series keys = (url_feature, series_feature) mapping = dict(((url_feature, C_URL), (series_feature, C_SERIES))) elif self.load_movies(): keys = (url_feature,) mapping = dict(((url_feature, C_URL),)) else: return None groupings = workspace.measurements.get_groupings(keys) # # Rewrite the grouping dictionaries using the preferred key names # groupings = [(dict([(mapping[k], g[k]) for k in g]), image_numbers) for g, image_numbers in groupings] return tuple([mapping[k] for k in keys]), groupings def load_images(self): """Return true if we're loading images """ return self.file_types == FF_INDIVIDUAL_IMAGES def load_movies(self): """Return true if we're loading movies """ return self.file_types != FF_INDIVIDUAL_IMAGES def load_choice(self): """Return the way to match against files: MS_EXACT_MATCH, MS_REGULAR_EXPRESSIONS or MS_ORDER """ return self.match_method.value def analyze_sub_dirs(self): """Return True if we should analyze subdirectories in addition to the root image directory """ return self.descend_subdirectories != SUB_NONE def collect_files(self, workspace): """Collect the files that match the filter criteria Collect the files that match the filter criteria, starting at the image directory and descending downward if AnalyzeSubDirs allows it. Returns a list of two-tuples where the first element of the tuple is the path from the root directory, including the file name, the second element is the index within the image settings (e.g. ImageNameVars). """ global cached_file_lists can_cache = workspace.pipeline.test_mode frame = workspace.frame root = self.image_directory() use_cached = False if can_cache and frame is not None and cached_file_lists.has_key(root): how_long, files = cached_file_lists[root] if how_long > 3: import wx if wx.MessageBox( ("The last time you started test mode it took %f seconds\n" "to find all of the image sets. Do you want to find the\n" 'files again? Choose "No" if you are in a hurry.') % how_long, "Do you want to wait %f seconds again?" % how_long, wx.YES_NO | wx.NO_DEFAULT | wx.ICON_QUESTION, frame) == wx.NO: use_cached = True if not use_cached: import time start_time = time.clock() listdir = lambda path: [ x for x in os.listdir(path) if os.path.isfile(os.path.join(path, x))] my_relpath = os.path.relpath realpath = os.path.realpath join = os.path.join if root.lower().startswith("http:") or root.lower().startswith("https:"): w = None elif sys.version_info[0] == 2 and sys.version_info[1] < 6: w = os.walk(root, topdown=True) else: w = os.walk(root, topdown=True, followlinks=True) if self.analyze_sub_dirs(): if self.descend_subdirectories == SUB_SOME: prohibited = self.subdirectory_filter.get_selections() else: prohibited = [] files = [] seen_dirs = set() for dirpath, dirnames, filenames in w: path = my_relpath(dirpath, root) if (path in prohibited) or (os.path.realpath(dirpath) in seen_dirs): # Don't descend into prohibited directories, and avoid infinite loops from links del dirnames[:] continue # update list of visited paths seen_dirs.add(realpath(dirpath)) dirnames.sort() # try to ensure consistent behavior. Is this needed? if path == os.path.curdir: files += [(file_name, file_name) for file_name in filenames] else: files += [(join(path, file_name), file_name) for file_name in filenames] else: files = [(file_name, file_name) for file_name in sorted(listdir(root))] how_long = time.clock() - start_time cached_file_lists[self.image_directory()] = (how_long, files) if self.load_choice() == MS_EXACT_MATCH: files = [(path, self.assign_filename_by_exact_match(file_name)) for path, file_name in files] elif self.load_choice() == MS_REGEXP: files = [(path, self.assign_filename_by_regexp(file_name)) for path, file_name in files] else: # Load by order. files = [path for path, file_name in files if self.filter_filename(file_name)] files = [(path, self.assign_filename_by_order(idx)) for idx, path in enumerate(files)] files = [(path, idx) for path, idx in files if idx is not None] files.sort() if len(files) == 0: message = ( "CellProfiler did not find any image files that " 'matched your matching pattern: "%s"' % self.images[0].common_text.value) workspace.pipeline.report_prepare_run_error(self, message) return files def image_directory(self): """Return the image directory """ return self.location.get_absolute_path() def image_name_vars(self): """Return the list of values in the image name field (the name that later modules see) """ result = [] for image in self.images: if (self.is_multichannel or (self.load_movies() and image.wants_movie_frame_grouping)): result += [channel.get_image_name() for channel in image.channels] else: result += [image.channels[0].get_image_name()] return result def text_to_find_vars(self): """Return the list of values in the image name field (the name that later modules see) """ return [fd.common_text for fd in self.images] def text_to_exclude(self): """Return the text to match against the file name to exclude it from the set """ return self.match_exclude.value def filter_filename(self, filename): """Returns True if the file extension is correct Returns true if in image mode and an image extension or if in movie mode and extension is a movie extension. """ if self.file_types in (FF_AVI_MOVIES, FF_STK_MOVIES, FF_OTHER_MOVIES): if not is_movie(filename): return False elif not is_image(filename): return False if ((self.text_to_exclude() != cps.DO_NOT_USE) and self.exclude and (filename.find(self.text_to_exclude()) >= 0)): return False return True def assign_filename_by_exact_match(self, filename): '''Assign the file name to an image by matching a portion exactly filename - filename in question Returns either the index of the image or None if no match ''' if not self.filter_filename(filename): return None ttfs = self.text_to_find_vars() for i, ttf in enumerate(ttfs): if filename.find(ttf.value) >= 0: return i return None def assign_filename_by_regexp(self, filename): '''Assign the file name to an image by regular expression matching filename - filename in question Returns either the index of the image or None if no match ''' ttfs = self.text_to_find_vars() for i, ttf in enumerate(ttfs): if re.search(ttf.value, filename): return i return None def assign_filename_by_order(self, index): '''Assign the file name to an image by alphabetical order index - the order in which it appears in the list Returns either the image index or None if the index, modulo the # of files in a group is greater than the number of images. ''' result = (index % self.order_group_size.value) + 1 for i, fd in enumerate(self.images): if result == fd.order_position: return i return None def get_categories(self, pipeline, object_name): '''Return the categories of measurements that this module produces object_name - return measurements made on this object (or 'Image' for image measurements) ''' res = [] object_names = sum( [[channel.object_name.value for channel in image.channels if channel.image_object_choice == IO_OBJECTS] for image in self.images], []) has_image_name = any([any( [True for channel in image.channels if channel.image_object_choice == IO_IMAGES]) for image in self.images]) if object_name == cpmeas.IMAGE: if has_image_name: res += [C_FILE_NAME, C_PATH_NAME, C_URL, C_MD5_DIGEST, C_SCALING, C_HEIGHT, C_WIDTH] has_metadata = (self.file_types in (FF_AVI_MOVIES, FF_STK_MOVIES, FF_OTHER_MOVIES)) if self.file_types == FF_OTHER_MOVIES: res += [C_SERIES, C_FRAME] elif self.load_movies(): res += [C_FRAME] for fd in self.images: if fd.metadata_choice != M_NONE: has_metadata = True if has_metadata: res += [cpmeas.C_METADATA] if len(object_names) > 0: res += [C_OBJECTS_FILE_NAME, C_OBJECTS_PATH_NAME, C_OBJECTS_URL, I.C_COUNT] elif object_name in object_names: res += [I.C_LOCATION, I.C_NUMBER] return res def get_measurements(self, pipeline, object_name, category): '''Return the measurements that this module produces object_name - return measurements made on this object (or 'Image' for image measurements) category - return measurements made in this category ''' result = [] object_names = sum( [[channel.object_name.value for channel in image.channels if channel.image_object_choice == IO_OBJECTS] for image in self.images], []) if object_name == cpmeas.IMAGE: if category == I.C_COUNT: result += object_names else: result += [c[1].split('_', 1)[1] for c in self.get_measurement_columns(pipeline) if c[1].split('_')[0] == category] elif object_name in object_names: if category == I.C_NUMBER: result += [I.FTR_OBJECT_NUMBER] elif category == I.C_LOCATION: result += [I.FTR_CENTER_X, I.FTR_CENTER_Y] return result def get_measurement_columns(self, pipeline): '''Return a sequence describing the measurement columns needed by this module ''' cols = [] all_tokens = [] for fd in self.images: for channel in fd.channels: if not self.channel_wants_images(channel): name = channel.object_name.value cols += I.get_object_measurement_columns(name) path_name_category = C_OBJECTS_PATH_NAME file_name_category = C_OBJECTS_FILE_NAME url_category = C_URL else: name = channel.get_image_name() path_name_category = C_PATH_NAME file_name_category = C_FILE_NAME url_category = C_URL cols += [(cpmeas.IMAGE, "_".join((C_MD5_DIGEST, name)), cpmeas.COLTYPE_VARCHAR_FORMAT % 32)] cols += [(cpmeas.IMAGE, "_".join((C_SCALING, name)), cpmeas.COLTYPE_FLOAT)] cols += [(cpmeas.IMAGE, "_".join((feature, name)), cpmeas.COLTYPE_INTEGER) for feature in (C_HEIGHT, C_WIDTH)] cols += [(cpmeas.IMAGE, "_".join((file_name_category, name)), cpmeas.COLTYPE_VARCHAR_FILE_NAME)] cols += [(cpmeas.IMAGE, "_".join((path_name_category, name)), cpmeas.COLTYPE_VARCHAR_PATH_NAME)] cols += [(cpmeas.IMAGE, "_".join((url_category, name)), cpmeas.COLTYPE_VARCHAR_PATH_NAME)] if self.file_types == FF_OTHER_MOVIES: cols += [(cpmeas.IMAGE, "_".join((C_SERIES, name)), cpmeas.COLTYPE_INTEGER), (cpmeas.IMAGE, "_".join((C_FRAME, name)), cpmeas.COLTYPE_INTEGER)] elif self.load_movies(): cols += [(cpmeas.IMAGE, "_".join((C_FRAME, name)), cpmeas.COLTYPE_INTEGER)] if self.has_file_metadata(fd): tokens = cpmeas.find_metadata_tokens(fd.file_metadata.value) cols += [(cpmeas.IMAGE, '_'.join((cpmeas.C_METADATA, token)), cpmeas.COLTYPE_VARCHAR_FILE_NAME) for token in tokens if token not in all_tokens] all_tokens += tokens if self.has_path_metadata(fd): tokens = cpmeas.find_metadata_tokens(fd.path_metadata.value) cols += [(cpmeas.IMAGE, '_'.join((cpmeas.C_METADATA, token)), cpmeas.COLTYPE_VARCHAR_PATH_NAME) for token in tokens if token not in all_tokens] all_tokens += tokens # # Add a well feature if we have well row and well column # if needs_well_metadata(all_tokens): cols += [(cpmeas.IMAGE, '_'.join((cpmeas.C_METADATA, cpmeas.FTR_WELL)), cpmeas.COLTYPE_VARCHAR_FILE_NAME)] if self.file_types in (FF_AVI_MOVIES, FF_STK_MOVIES): cols += [(cpmeas.IMAGE, "_".join((cpmeas.C_METADATA, M_T)), cpmeas.COLTYPE_INTEGER)] elif self.file_types == FF_OTHER_MOVIES: cols += [(cpmeas.IMAGE, "_".join((cpmeas.C_METADATA, feature)), cpmeas.COLTYPE_INTEGER) for feature in (M_Z, M_T)] return cols def change_causes_prepare_run(self, setting): '''Check to see if changing the given setting means you have to restart Some settings, esp in modules like LoadImages, affect more than the current image set when changed. For instance, if you change the name specification for files, you have to reload your image_set_list. Override this and return True if changing the given setting means that you'll have to do "prepare_run". ''' # # It's safest to say that any change in loadimages requires a restart # return True def needs_conversion(self): if self.match_method not in (MS_EXACT_MATCH, MS_REGEXP): raise ValueError( "Can't convert a LoadImages module that matches images by %s" % self.match_method.value) return True def convert(self, pipeline, metadata, namesandtypes, groups): '''Convert to the modern format using the input modules This method converts any LoadImages modules in the pipeline to Images / Metadata / NamesAndTypes and Groups metadata - the metadata module namesandtypes - the namesandtypes module groups - the groups module first - True if no images have been added to namesandtypes yet. ''' import cellprofiler.modules.metadata as cpmetadata import cellprofiler.modules.namesandtypes as cpnamesandtypes import cellprofiler.modules.groups as cpgroups assert isinstance(metadata, cpmetadata.Metadata) assert isinstance(namesandtypes, cpnamesandtypes.NamesAndTypes) assert isinstance(groups, cpgroups.Groups) if self.match_method not in (MS_EXACT_MATCH, MS_REGEXP): raise ValueError( "Can't convert a LoadImages module that matches images by %s" % self.match_method.value) if self.group_by_metadata: groups.notes.append( "WARNING: original pipeline used metadata grouping" " which was not converted during processing" " of current pipeline.") warn_metadata_match = False warn_gray_color = False edited_modules = set() for group in self.images: for channel in group.channels: if namesandtypes.assignment_method == cpnamesandtypes.ASSIGN_ALL: namesandtypes.assignment_method.value = \ cpnamesandtypes.ASSIGN_RULES else: namesandtypes.add_assignment() edited_modules.add(namesandtypes) assignment = namesandtypes.assignments[-1] if channel.image_object_choice == IO_IMAGES: name = assignment.image_name.value = \ channel.image_name.value assignment.load_as_choice.value = \ cpnamesandtypes.LOAD_AS_GRAYSCALE_IMAGE warn_gray_color = True else: name = assignment.object_name.value = \ channel.object_name.value assignment.load_as_choice.value = \ cpnamesandtypes.LOAD_AS_OBJECTS rfilter = assignment.rule_filter assert isinstance(rfilter, cps.Filter) structure = [cps.Filter.AND_PREDICATE] fp = cpnamesandtypes.FilePredicate() fp_does, fp_does_not = [ [d for d in fp.subpredicates if isinstance(d, c)][0] for c in (cps.Filter.DoesPredicate, cps.Filter.DoesNotPredicate)] if self.exclude: # Exclude with a predicate of # File doesnot contain