#!/usr/bin/env python __version__ = "0.6" import base64 import codecs import io import locale import os import re import shutil import stat import sys import time from datetime import datetime from io import IOBase from optparse import OptionParser from xml.dom.pulldom import START_ELEMENT, END_ELEMENT, parse from xml.dom.minidom import Node try: import pyexiv2 except ImportError: pyexiv2 = None # To allow Unicode characters to be displayed # (see http://wiki.python.org/moin/PrintFails) sys.stdout = codecs.getwriter(locale.getpreferredencoding())(sys.stdout) sys.stderr = codecs.getwriter(locale.getpreferredencoding())(sys.stderr) class iPhotoLibraryError(Exception): pass # Some AlbumData.xml files contain null bytes. Strip them so the SAX parser # doesn't fail with an Invalid Token error. class RemoveNullsStream(IOBase): def __init__(self, filename): self.file = open(filename, 'r') def read(self, bufsize=2**20): return self.file.read(bufsize).translate(None,"\0") def close(self): self.file.close() class iPhotoLibrary(object): def __init__(self, albumDir, destDir, use_album=False, use_date=False, use_faces=False, use_metadata=False, deconflict=False, quiet=False, year_dir=False, import_missing=False, import_from_date=None, test=False, date_delimiter="-", ignore_time_delta=False, originals=False): self.use_album = use_album self.use_date = use_date self.use_faces = use_faces self.use_metadata = use_metadata self.deconflict = deconflict self.dest_dir = destDir self.output_dirs = set() self.output_files = set() self.quiet = quiet self.albums = [] self.keywords = {} self.faces = {} self.images = {} self.test = test self.year_dir = year_dir self.import_missing = import_missing self.ignore_time_delta = ignore_time_delta self.date_delimiter = date_delimiter self.originals=originals self.import_albums = [] if import_from_date: self.import_from_date = datetime.strptime(import_from_date, "%Y-%m-%d") else: self.import_from_date = None if self.import_missing: self.build_import_list() albumDataXml = os.path.join(albumDir, "AlbumData.xml") albumDataStream = RemoveNullsStream(albumDataXml) self.status("* Parsing iPhoto Library data... ") self.parseAlbumData(albumDataStream) albumDataStream.close() self.status("Done.\n") major_version = 2 minor_version = 0 interesting_image_keys = [ 'OriginalPath', 'ImagePath', 'Rating', 'Keywords', 'Caption', 'Comment', 'Faces', 'face key' ] apple_epoch = 978307200 def parseAlbumData(self, filename): """ Parse an iPhoto AlbumData.xml file, keeping the interesting bits. """ doc = parse(filename) stack = [] last_top_key = None if self.use_album: album_list_key = "List of Albums" else: album_list_key = "List of Rolls" for event, node in doc: if event == START_ELEMENT: stack.append(node) level = len(stack) if level == 3: if node.nodeName == 'key': doc.expandNode(node) last_top_key = self.getText(node) stack.pop() elif last_top_key == 'List of Keywords': doc.expandNode(node) self.keywords = self.dePlist(node) stack.pop() elif last_top_key == 'List of Faces': doc.expandNode(node) self.faces = dict([ (k, v['name']) for k,v in self.dePlist(node, ['name']).items() ]) stack.pop() elif last_top_key == 'Major Version': doc.expandNode(node) major_version = self.dePlist(node) stack.pop() if major_version != self.major_version: raise iPhotoLibraryError, \ "Sorry, I can't understand version %i iPhoto Libraries." % major_version elif last_top_key == 'Minor Version': doc.expandNode(node) minor_version = self.dePlist(node) stack.pop() if minor_version > self.minor_version: self.status( "\nI don't recognise iPhoto libraries when the minor version is %i, but let's try anyway.\n" % minor_version, force=True ) elif level == 4: # process large items individually so we don't # load them all into memory. if last_top_key == album_list_key: doc.expandNode(node) self.albums.append(self.dePlist(node)) stack.pop() elif last_top_key == 'Master Image List': doc.expandNode(node) if node.nodeName == 'key': last_image_key = self.getText(node) else: self.images[last_image_key] = self.dePlist( node, self.interesting_image_keys ) stack.pop() elif event == END_ELEMENT: stack.pop() def dePlist(self, node, interesting_keys=None): """ Given a DOM node, convert the plist (fragment) it refers to and return the corresponding Python data structure. If interesting_keys is a list, "dict" keys will be filtered so that only those nominated are returned (for ALL descendant dicts). Numeric keys aren't filtered. """ ik = interesting_keys dtype = node.nodeName if dtype == 'string': return self.getText(node) elif dtype == 'integer': try: return int(self.getText(node)) except ValueError: raise iPhotoLibraryError, \ "Corrupted Library; unexpected value '%s' for integer" % \ self.getText(node) elif dtype == 'real': try: return float(self.getText(node)) except ValueError: raise iPhotoLibraryError, \ "Corrupted Library; unexpected value '%s' for real" % \ self.getText(node) elif dtype == 'array': return [self.dePlist(c, ik) for c in node.childNodes \ if c.nodeType == Node.ELEMENT_NODE] elif dtype == 'dict': d = {} last_key = None for c in node.childNodes: if c.nodeType != Node.ELEMENT_NODE: continue # TODO: catch out-of-order keys/values if c.nodeName == 'key': last_key = self.getText(c) else: # value if interesting_keys: # check to see if we're interested if last_key not in interesting_keys \ and not last_key.isdigit(): continue # nope. d[intern(str(last_key))] = self.dePlist(c, ik) return d elif dtype == 'true': return True elif dtype == 'false': return False elif dtype == 'data': return base64.decodestring(self.getText(c)) elif dtype == 'date': return self.appleDate(self.getText(c)) else: raise Exception, "Don't know what a %s is." % dtype @staticmethod def getText(element, default=None): if element is None: return default if len(element.childNodes) == 0: return None else: return "".join([n.nodeValue for n in element.childNodes]) def walk(self, funcs): """ Walk through the events or albums (depending on the value of albums) in this library and apply each function in the list funcs to each image, calling it as: func(folderName, folderDate, imageId) where: - folderName is the name the folder, - folderDate is the date of the folder, and - imageId is the string identifier for the image. """ if self.use_album: targetName = "AlbumName" albums = [a for a in self.albums if a.get("Album Type", None) == "Regular"] else: targetName = "RollName" albums = self.albums i = 0 for folder in albums: i += 1 if self.use_album: folderDate = None else: folderDate = self.appleDate(folder["RollDateAsTimerInterval"]) images = folder["KeyList"] folderName = folder[targetName] #as we process albums/events in the iPhoto library, remove that album #from the list of import_albums we'll be importing at the end if self.import_albums: for ia in self.import_albums: for album_name in ia['album_names']: album_name = unicode(album_name, 'utf-8') if folderName == album_name: self.import_albums.remove(ia) if folderDate and self.use_date: date = '%(year)d%(delim)s%(month)02d%(delim)s%(day)02d' % { 'year': folderDate.year, 'month': folderDate.month, 'day': folderDate.day, 'delim': self.date_delimiter } if re.match("[A-Z][a-z]{2} [0-9]{1,2}, [0-9]{4}", folderName): outputPath = date elif re.match("[0-9]{4}.[0-9]{2}.[0-9]{2} ?.*", folderName): outputPath = folderName else: outputPath = date + " " + folderName if self.year_dir: outputPath = os.path.join(str(folderDate.year), outputPath) else: outputPath = folderName # Deconflict output directories targetFileDir = os.path.join(self.dest_dir, outputPath) if self.deconflict: j = 1 while targetFileDir in self.output_dirs: targetFileDir = os.path.join(self.dest_dir, outputPath + " %02d"%j) j += 1 self.output_dirs.add(targetFileDir) self.status("* Processing %i of %i: %s (%i images)...\n" % ( i, len(albums), folderName, len(images) )) for imageId in images: for func in funcs: func(imageId, targetFileDir, folderDate) self.status("\n") if self.import_missing: self.status("importing folders:\n") for ia in self.import_albums: self.status(ia["album_dir"] + "\n") #using the "Auto Import" dir in iPhoto was unpredictable with respect to the resulting event name. #Using AppleScript to import the event, seams to always result in the event being properly named if not self.test: #There is probably a better way to do this. I noticed I had an album with an ' in it that errored... escaped_dir = ia["album_dir"].replace("'", "\\'").replace('"', '\\"') os.system('''osascript -e ' tell application "iPhoto" import from "%s" end tell ' ''' % escaped_dir) def copyImage(self, imageId, folderName, folderDate): """ Copy an image from the library to a folder in the dest_dir. The name of the folder is based on folderName and folderDate; if folderDate is None, it's only based upon the folderName. If use_metadata is True, also write the image metadata from the library to the copy. If use_faces is True, faces will be saved as keywords. """ try: image = self.images[imageId] except KeyError: raise iPhotoLibraryError, "Can't find image #%s" % imageId if not os.path.exists(folderName): try: if not self.test: os.makedirs(folderName) except OSError, why: raise iPhotoLibraryError, \ "Can't create %s: %s" % (folderName, why[1]) self.status(" Created %s\n" % folderName) #Unedited images only have ImagePath, edited images have both ImagePath and OriginalPath, #except for some corrupted iPhoto libraries, where some images only have OriginalPath. #Trying to satisfy both conditions with this nested logic. if self.originals: if "OriginalPath" in image: mFilePath = image["OriginalPath"] else: mFilePath = image["ImagePath"] else: if not "ImagePath" in image: mFilePath = image["OriginalPath"] else: mFilePath = image["ImagePath"] basename = os.path.basename(mFilePath) # Deconflict ouput filenames tFilePath = os.path.join(folderName, basename) if self.deconflict: j = 1 while tFilePath in self.output_files: tFilePath = os.path.join(folderName, "%02d_"%j + basename) j += 1 self.output_files.add(tFilePath) # Skip unchanged files, unless we're writing metadata. if not self.use_metadata and os.path.exists(tFilePath): mStat = os.stat(mFilePath) tStat = os.stat(tFilePath) if not self.ignore_time_delta and abs(tStat[stat.ST_MTIME] - mStat[stat.ST_MTIME]) <= 10: self.status("-") return if tStat[stat.ST_SIZE] == mStat[stat.ST_SIZE]: self.status("-") return if not self.test and os.path.exists(mFilePath): shutil.copy2(mFilePath, tFilePath) md_written = False if self.use_metadata: md_written = self.writePhotoMD(imageId, tFilePath) if md_written: self.status("+") else: self.status(".") def writePhotoMD(self, imageId, filePath=None): """ Write the metadata from the library for imageId to filePath. If filePath is None, write it to the photo in the library. If use_faces is True, iPhoto face names will be written to keywords. """ try: image = self.images[imageId] except KeyError: raise iPhotoLibraryError, "Can't find image #%s" % imageId if not filePath: if self.originals: if "OriginalPath" in image: mFilePath = image["OriginalPath"] else: mFilePath = image["ImagePath"] else: if not "ImagePath" in image: mFilePath = image["OriginalPath"] else: mFilePath = image["ImagePath"] caption = image.get("Caption", None) rating = image.get("Rating", None) comment = image.get("Comment", None) keywords = set([self.keywords[k] for k in image.get("Keywords", [])]) if self.use_faces: keywords.update([self.faces[f['face key']] for f in image.get("Faces", []) if self.faces.has_key(f['face key'])] ) if caption or comment or rating or keywords: try: md = pyexiv2.ImageMetadata(filePath) md.read() if caption: md["Iptc.Application2.Headline"] = [caption] if rating: md["Xmp.xmp.Rating"] = rating if comment: md["Iptc.Application2.Caption"] = [comment] if keywords: md["Iptc.Application2.Keywords"] = list(keywords) if not self.test: md.write(preserve_timestamps=True) return True except IOError, why: self.status("\nProblem setting metadata (%s) on %s\n" % ( unicode(why.__str__(), errors='replace'), filePath )) return False def appleDate(self, text): try: return datetime.utcfromtimestamp(self.apple_epoch + float(text)) except (ValueError, TypeError): raise iPhotoLibraryError, \ "Corrupted Library; unexpected value '%s' for date" % text def status(self, msg, force=False): if force or not self.quiet: sys.stdout.write(msg) sys.stdout.flush() def build_import_list(self): ''' We are going to make some assumptions here. 1. The dest_dir is a directory of albums containing images, optionally the albums can be in year dirs. 2. Album dirs are assumed to follow one of these naming patterns: [0-9]{4}.[0-9]{2}.[0-9]{2} ?.* - Dated folder, unnamed, iPhoto album name could match or could be iPhoto date format .* - Named folder, iPhoto album name Walk the dest dir and find all folders and files. For each folder determine the possible iPhoto album names. When walking the xml eliminate any folder we find where one of the possible album names matches an existing album name. ''' if self.year_dir: year_dir_list = os.listdir(self.dest_dir) for year_dir in year_dir_list: # if year_dir was specified, then only match on folders inside year folders if not re.match("^[0-9]{4}$", year_dir): continue # if import_from_date was specified, then skip folders where the year_dir is before the import_from_date.year if self.import_from_date and int(year_dir) < self.import_from_date.year: continue self.build_import_album_dirs(os.path.join(self.dest_dir, year_dir)) else: self.build_import_album_dirs(self.dest_dir) def build_import_album_dirs(self, base_dir): delim = str(self.date_delimiter) for album_name in os.listdir(base_dir): album_names = [album_name] folder_date = None # Folder pattern: "2011_01_01 New Years Party" m = re.match(r"([0-9]{4}\%s[0-9]{2}\%s[0-9]{2}) ?(.*)" % (delim, delim), album_name) if m: folder_date = datetime.strptime(m.group(1), "%Y" + delim + "%m" + delim + "%d") album_names.append(m.group(2)) # Folder pattern: "2011_01_01" m = re.match(r"^[0-9]{4}\%s[0-9]{2}\%s[0-9]{2}$" % (delim, delim), album_name) if m: folder_date = datetime.strptime(album_name, "%Y" + delim + "%m" + delim + "%d") month, day, year = folder_date.strftime("%b %d %Y").split(" ") album_names.append("%s %d, %s" %(month, int(day), year)) # Don't import folders that are prior to the specified date if not folder_date: continue if self.import_from_date and folder_date < self.import_from_date: continue album_dir = os.path.abspath(os.path.join(base_dir, album_name)) this_album = { "album_names": album_names, "album_dir":album_dir, } self.import_albums.append(this_album) def error(msg): sys.stderr.write("\n%s\n" % msg) sys.exit(1) if __name__ == '__main__': usage = "Usage: %prog [options] " version = "exportiphoto version %s" % __version__ default_date_delimiter = "-" option_parser = OptionParser(usage=usage, version=version) option_parser.set_defaults( test=False, albums=False, metadata=False, faces=False, quiet=False, date=True, ignore_time_delta=False, originals=False ) option_parser.add_option("-a", "--albums", action="store_true", dest="albums", help="use albums instead of events" ) option_parser.add_option("-q", "--quiet", action="store_true", dest="quiet", help="use quiet mode" ) option_parser.add_option("-d", "--date", action="store_false", dest="date", help="stop use date prefix in folder name" ) option_parser.add_option("-o", "--originals", action="store_true", dest="originals", help="export original images instead of edited ones" ) option_parser.add_option("-x", "--deconflict", action="store_true", dest="deconflict", help="deconflict export directories of same name" ) option_parser.add_option("-t", "--test", action="store_true", dest="test", help="don't actually copy files or import folders" ) option_parser.add_option("-y", "--yeardir", action="store_true", dest="year_dir", help="add year directory to output" ) option_parser.add_option("-e", "--date_delimiter", action="store", type="string", dest="date_delimiter", help="date delimiter default=%s" % default_date_delimiter ) option_parser.add_option("-i", "--import", action="store_true", dest="import_missing", help="import missing albums from dest directory" ) option_parser.add_option("-j", "--ignore_time_delta", action="store_true", dest="ignore_time_delta", help="ignore time delta when determining whether or not to copy a file" ) option_parser.add_option("-z", "--import_from_date", action="store", type="string", dest="import_from_date", help="only import missing folers if folder date occurs after (YYYY-MM-DD). Uses date in folder name." ) if pyexiv2: option_parser.add_option("-m", "--metadata", action="store_true", dest="metadata", help="write metadata to images" ) option_parser.add_option("-f", "--faces", action="store_true", dest="faces", help="store faces as keywords (requires -m)" ) (options, args) = option_parser.parse_args() if len(args) != 2: option_parser.error( "Please specify an iPhoto library and a destination." ) try: if options.date_delimiter is None: options.date_delimiter = default_date_delimiter library = iPhotoLibrary(args[0], # src args[1], # dest use_album=options.albums, use_date=options.date, use_faces=options.faces, use_metadata=options.metadata, deconflict=options.deconflict, quiet=options.quiet, year_dir=options.year_dir, import_missing=options.import_missing, import_from_date=options.import_from_date, test=options.test, date_delimiter=options.date_delimiter, ignore_time_delta=options.ignore_time_delta, originals=options.originals ) def copyImage(imageId, folderName, folderDate): library.copyImage(imageId, folderName, folderDate) except iPhotoLibraryError, why: error(why[0]) except KeyboardInterrupt: error("Interrupted.") try: library.walk([copyImage]) except iPhotoLibraryError, why: error(why[0]) except KeyboardInterrupt: error("Interrupted. Copy may be incomplete.")