From 6908e6fc48cce80f2ab171d43f60ee30cc396021 Mon Sep 17 00:00:00 2001 From: Jakob Kemi Date: Wed, 25 Feb 2009 21:21:59 +0100 Subject: [PATCH] Ticket #272: Extfs: added S3 backend by Jakob Kemi --- configure.ac | 1 + vfs/extfs/Makefile.am | 2 + vfs/extfs/extfs.ini | 4 + vfs/extfs/s3.in | 414 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 421 insertions(+) create mode 100644 vfs/extfs/s3.in diff --git a/configure.ac b/configure.ac index da04e4ea8..a025bb334 100644 --- a/configure.ac +++ b/configure.ac @@ -605,6 +605,7 @@ vfs/extfs/lslR vfs/extfs/mailfs vfs/extfs/patchfs vfs/extfs/rpms +vfs/extfs/s3 vfs/extfs/uace vfs/extfs/ualz vfs/extfs/uar diff --git a/vfs/extfs/Makefile.am b/vfs/extfs/Makefile.am index eacaebcbc..a33f624ad 100644 --- a/vfs/extfs/Makefile.am +++ b/vfs/extfs/Makefile.am @@ -23,6 +23,7 @@ EXTFS_IN = \ mailfs.in \ patchfs.in \ rpms.in \ + s3.in \ uace.in \ ualz.in \ uar.in \ @@ -50,6 +51,7 @@ EXTFS_OUT = \ mailfs \ patchfs \ rpms \ + s3 \ uace \ ualz \ uar \ diff --git a/vfs/extfs/extfs.ini b/vfs/extfs/extfs.ini index b6dab532b..f4d04a2cf 100644 --- a/vfs/extfs/extfs.ini +++ b/vfs/extfs/extfs.ini @@ -62,3 +62,7 @@ bpp # ISO image iso9660 + +# Amazon S3 +s3: + diff --git a/vfs/extfs/s3.in b/vfs/extfs/s3.in new file mode 100644 index 000000000..ece420755 --- /dev/null +++ b/vfs/extfs/s3.in @@ -0,0 +1,414 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +# +# Midnight Commander compatible EXTFS for accessing Amazon Web Services S3. +# Written by Jakob Kemi 2009 +# +# Copyright (c) 2009 Free Software Foundation, Inc. +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +# +# +# Notes: +# This EXTFS exposes buckets as directories and keys as files +# Due to EXTFS limitations all buckets & keys have to be read initially which might +# take quite some time. +# Tested on Debian with Python 2.4-2.6 and boto 1.4c and 1.6b +# (Python 2.6 might need -W ignore::DeprecationWarning due to boto using +# deprecated module Popen2) +# +# +# Installation: +# Make sure that boto (python-boto in Debian) is installed. +# Preferably pytz (package python-tz in Debian) should be installed as well. +# +# Save as executable file /share/mc/extfs/s3 (or wherever your mc expects to find extfs modules) +# Add the the following to your extfs.ini (might exists as /usr/share/mc/extfs/extfs.ini): +# ----- begin extfs.ini ----- +# # Amazon S3 +# s3: +# ----- end extfs.ini ----- +# +# +# Settings: (should be set via environment) +# Required: +# AWS_ACCESS_KEY_ID : Amazon AWS acces key (required) +# AWS_SECRET_ACCESS_KEY : Amazon AWS secret access key (required) +# Optional: +# MCVFS_EXTFS_S3_LOCATION : where to create new buckets, "EU"(default) or "US" +# MCVFS_EXTFS_S3_DEBUGFILE : write debug info to this file (no info default) +# +# +# Usage: +# Open dialog "Quick cd" () and type: #s3 (or simply type ''cd #s3'' in shell line) +# +# +# History: +# 2009-02-07 Jakob Kemi +# - Updated instructions. +# - Improved error reporting. +# +# 2009-02-06 Jakob Kemi +# - Threaded list command. +# - Handle rm of empty "subdirectories" (as seen in mc). +# - List most recent datetime and total size of keys as directory properties. +# - List modification time in local time. +# +# 2009-02-05 Jakob Kemi +# - Initial version. +# + +import sys +import os +import time +import re +import datetime + + +import boto +from boto.s3.connection import S3Connection +from boto.s3.key import Key +from boto.exception import BotoServerError + + +# Get settings from environment +USER=os.getenv('USER','0') +AWS_ACCESS_KEY_ID=os.getenv('AWS_ACCESS_KEY_ID') +AWS_SECRET_ACCESS_KEY=os.getenv('AWS_SECRET_ACCESS_KEY') +LOCATION = os.getenv('MCVFS_EXTFS_S3_LOCATION', 'EU').lower() +DEBUGFILE = os.getenv('MCVFS_EXTFS_S3_DEBUGFILE') + +if not AWS_ACCESS_KEY_ID or not AWS_SECRET_ACCESS_KEY: + sys.stderr.write('Missing AWS_ACCESS_KEY_ID or AWS_SECRET_ACCESS_KEY environment variables.\n') + sys.exit(1) + +# Setup logging +if DEBUGFILE: + import logging + logging.basicConfig( + filename=DEBUGFILE, + level=logging.DEBUG, + format='%(asctime)s %(levelname)s %(message)s') + logging.getLogger('boto').setLevel(logging.WARNING) +else: + class Void(object): + def __getattr__(self, attr): + return self + def __call__(self, *args, **kw): + return self + logging = Void() + +logger=logging.getLogger('s3extfs') + + +def threadmap(fun, iterable, maxthreads=16): + """ + Quick and dirty threaded version of builtin method map. + Propagates exception safely. + """ + from threading import Thread + import Queue + + items = list(iterable) + nitems = len(items) + if nitems < 2: + return map(fun, items) + + # Create and fill input queue + input = Queue.Queue() + output = Queue.Queue() + + for i,item in enumerate(items): + input.put( (i,item) ) + + class WorkThread(Thread): + """ + Takes one item from input queue (thread terminates when input queue is empty), + performs fun, puts result in output queue + """ + def run(self): + while True: + try: + (i,item) = input.get_nowait() + try: + result = fun(item) + output.put( (i,result) ) + except: + output.put( (None,sys.exc_info()) ) + except Queue.Empty: + return + + # Start threads + for i in range( min(len(items), maxthreads) ): + t = WorkThread() + t.setDaemon(True) + t.start() + + # Wait for all threads to finish & collate results + ret = [] + for i in range(nitems): + try: + i,res = output.get() + if i == None: + raise res[0],res[1],res[2] + except Queue.Empty: + break + ret.append(res) + + return ret + +logger.debug('started') + +# Global S3 connection +s3 = S3Connection(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY) +if LOCATION == 'eu': + logger.debug('Using location EU for new buckets') + S3LOCATION = boto.s3.connection.Location.EU +else: + logger.debug('Using location US for new buckets') + S3LOCATION = boto.s3.connection.Location.US + +logger.debug('argv: ' + str(sys.argv)) + +try: + cmd = sys.argv[1] + args = sys.argv[2:] +except: + sys.stderr.write('This program should be called from within MC\n') + sys.exit(1) + +def handleServerError(msg): + e = sys.exc_info() + msg += ', reason: ' + e[1].reason + logger.error(msg, exc_info=e) + sys.stderr.write(msg+'\n') + sys.exit(1) + +# +# Lists all S3 contents +# +if cmd == 'list': + if len(args) > 0: + path = args[0] + else: + path = '' + + logger.info('list') + + rs = s3.get_all_buckets() + + # Import python timezones (pytz) + try: + import pytz + except: + logger.warning('Missing pytz module, timestamps will be off') + # A fallback UTC tz stub + class pytzutc(datetime.tzinfo): + def __init__(self): + datetime.tzinfo.__init__(self) + self.utc = self + self.zone = 'UTC' + def utcoffset(self, dt): + return datetime.timedelta(0) + def tzname(self, dt): + return "UTC" + def dst(self, dt): + return datetime.timedelta(0) + pytz = pytzutc() + + + # Find timezone + # (yes, timeZONE as in _geographic zone_ not EST/CEST or whatever crap we get from time.tzname) + # http://regebro.wordpress.com/2008/05/10/python-and-time-zones-part-2-the-beast-returns/ + def getGuessedTimezone(): + # 1. check TZ env. var + try: + tz = os.getenv('TZ', '') + return pytz.timezone(tz) + except: + pass + # 2. check if /etc/timezone exists (Debian at least) + try: + if os.path.isfile('/etc/timezone'): + tz = open('/etc/timezone', 'r').readline().strip() + return pytz.timezone(tz) + except: + pass + # 3. check if /etc/localtime is a _link_ to something useful + try: + if os.path.islink('/etc/localtime'): + link = os.readlink('/etc/localtime') + tz = '/'.join(p.split(os.path.sep)[-2:]) + return pytz.timezone(tz) + except: + pass + # 4. use time.tzname which will probably be wrong by an hour 50% of the time. + try: + return pytz.timezone(time.tzname[0]) + except: + pass + # 5. use plain UTC ... + return pytz.utc + + tz=getGuessedTimezone() + logger.debug('Using timezone: ' + tz.zone) + + # AWS time is on format: 2009-01-07T16:43:39.000Z + # we "want" MM-DD-YYYY hh:mm (in localtime) + expr = re.compile(r'^(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2})\.\d{3}Z$') + def convDate(awsdatetime): + m = expr.match(awsdatetime) + ye,mo,da,ho,mi,se = map(int,m.groups()) + + dt = datetime.datetime(ye,mo,da,ho,mi,se, tzinfo=pytz.utc) + return dt.astimezone(tz).strftime('%m-%d-%Y %H:%M') + + + def bucketList(b): + totsz = 0 + mostrecent = '1970-01-01T00:00:00.000Z' + ret = [] + for k in b.list(): + mostrecent = max(mostrecent, k.last_modified) + datetime = convDate(k.last_modified) + ret.append('%10s %3d %-8s %-8s %d %s %s\n' % ( + '-rw-r--r--', 1, USER, USER, k.size, datetime, b.name+'/'+k.name) + ) + totsz += k.size + + datetime=convDate(mostrecent) + sys.stdout.write('%10s %3d %-8s %-8s %d %s %s\n' % ( + 'drwxr-xr-x', 1, USER, USER, totsz, datetime, b.name) + ) + for line in ret: + sys.stdout.write(line) + + threadmap(bucketList, rs) + +# +# Fetch file from S3 +# +elif cmd == 'copyout': + archivename = args[0] + storedfilename = args[1] + extractto = args[2] + + bucket,key = storedfilename.split('/', 1) + logger.info('copyout bucket: %s, key: %s'%(bucket, key)) + + try: + b = s3.get_bucket(bucket) + k = b.get_key(key) + + out = open(extractto, 'w') + + k.open(mode='r') + for buf in k: + out.write(buf) + k.close() + out.close() + except BotoServerError: + handleServerError('Unable to fetch key "%s"'%(key)) + +# +# Upload file to S3 +# +elif cmd == 'copyin': + archivename = args[0] + storedfilename = args[1] + sourcefile = args[2] + + bucket,key = storedfilename.split('/', 1) + logger.info('copyin bucket: %s, key: %s'%(bucket, key)) + + try: + b = s3.get_bucket(bucket) + k = b.new_key(key) + k.set_contents_from_file(fp=open(sourcefile,'r')) + except BotoServerError: + handleServerError('Unable to upload key "%s"' % (key)) + +# +# Remove file from S3 +# +elif cmd == 'rm': + archivename = args[0] + storedfilename = args[1] + + bucket,key = storedfilename.split('/', 1) + logger.info('rm bucket: %s, key: %s'%(bucket, key)) + + try: + b = s3.get_bucket(bucket) + b.delete_key(key) + except BotoServerError: + handleServerError('Unable to remove key "%s"' % (key)) + +# +# Create directory +# +elif cmd == 'mkdir': + archivename = args[0] + dirname = args[1] + + logger.info('mkdir dir: %s' %(dirname)) + if '/' in dirname: + logger.warning('skipping mkdir') + pass + else: + bucket = dirname + try: + s3.create_bucket(bucket, location=boto.s3.connection.Location.EU) + except BotoServerError: + handleServerError('Unable to create bucket "%s"' % (bucket)) + +# +# Remove directory +# +elif cmd == 'rmdir': + archivename = args[0] + dirname = args[1] + + logger.info('rmdir dir: %s' %(dirname)) + if '/' in dirname: + logger.warning('skipping rmdir') + pass + else: + bucket = dirname + try: + b = s3.get_bucket(bucket) + s3.delete_bucket(b) + except BotoServerError: + handleServerError('Unable to delete bucket "%s"' % (bucket)) + +# +# Run from S3 +# +elif cmd == 'run': + archivename = args[0] + storedfilename = args[1] + arguments = args[2:] + + bucket,key = storedfilename.split('/', 1) + logger.info('run bucket: %s, key: %s'%(bucket, key)) + + os.execv(storedfilename, arguments) +else: + logger.error('unhandled, bye') + sys.exit(1) + +logger.debug('command handled') +sys.exit(0) +