从s3下载文件

首先导入所需的库

import boto3
import os

首先有一个工具函数,用来建立不存在的本地文件夹

def mkdir_recursive(folder):
    if folder.endswith('/'):
        folder = folder[:-1]
    
    paths = folder.split('/')
    
    full = ''
    while paths:
        full += paths[0] +'/'
        if not os.path.exists(full):
            print('Creating {}'.format(full))
            os.mkdir(full)
        
        paths = paths[1:]
# 循环创建 p1/p2/p3文件夹
mkdir_recursive('p1/p2/p3/')
Creating p1/
Creating p1/p2/
Creating p1/p2/p3/

下载文件,输入为

  • bucket
  • 文件名
  • 本地文件夹
import boto3
import os

def download_file(bucket, filename, local_folder):
    s3 = boto3.client('s3')
    
    # make sure local floder extist
    mkdir_recursive(local_folder)
    
    # filename must not end with '/'
    if filename.endswith('/'):
        raise ValueError
    
    # set local file full name
    local_file = os.path.join(local_folder, filename.split('/')[-1])
    # download s3
    s3.download_fileobj(bucket, filename, open(local_file, 'wb'))
    print('Downloaded file from s3://{} to {}'.format(os.path.join(bucket, filename), local_file))
    return local_file
# 从xx-bucket下载r/f3.txt文件到本地p5文件夹,如果没有文件夹会自动循环创建
download_file('xxx-bucket','r/f3.txt','./p5')
Creating ./p5/
Downloaded file from s3://xxx-bucket/r/f3.txt to ./p5/f3.txt

下载文件,输入为

  • s3链接,例如s3://xxx-bucket/r/f3.txt
  • 本地文件夹
import boto3
import os

def download_file(bucket, filename, local_folder):
    s3 = boto3.client('s3')
    
    # make sure local floder extist
    mkdir_recursive(local_folder)
    
    # filename must not end with '/'
    if filename.endswith('/'):
        raise ValueError
    
    # set local file full name
    local_file = os.path.join(local_folder, filename.split('/')[-1])
    # download s3
    s3.download_fileobj(bucket, filename, open(local_file, 'wb'))
    if filename.startswith('/'):
        filename = filename[1:]
    print('Downloaded file from s3://{} to {}'.format(os.path.join(bucket, filename), local_file))
    return local_file
download_file('xxx-bucket','r/f3.txt','./p5')
Creating ./p6/
Downloaded file from s3://xxx-bucket/r/f3.txt to ./p6/f3.txt

循环下载整个文件夹

  • bucket
  • prefix文件夹前缀,必须以/结尾
  • local_folder 本地文件夹
import boto3
import os

def download_folder(bucket, prefix, local_folder):
    s3 = boto3.client('s3')
    
    # prefix must end with /
    if not prefix.endswith('/') and prefix !=  '':
        raise ValueError
    
    if prefix == '/':
        prefix = ''
    
    # s3 object list
    objs = s3.list_objects(Bucket=bucket, Prefix=prefix).get('Contents')
    
    # if no object in that prefix
    if not objs:
        return []
    
    keys = [obj['Key'] for obj in objs if not obj['Key'].endswith('/') and obj['Key']]
    
    # local files
    local_files = []
    
    for key in keys:
        
        # local folder+obj prefix
        obj_prefix_list = [local_folder] + key.split('/')[:-1]
        obj_prefix = os.path.join(*obj_prefix_list)
        # print('bucket:{}, key:{}, local_folder:{}'.format(bucket, key, obj_prefix))
        local_file = download_file(bucket, key, obj_prefix)
        local_files.append(local_file)
        
    return local_files
download_folder('xxx-bucket','','./x/')
Downloaded file from s3://xxx-bucket/f1.txt to ./x/f1.txt
Downloaded file from s3://xxx-bucket/f2.txt to ./x/f2.txt
Downloaded file from s3://xxx-bucket/f3.txt to ./x/f3.txt
Downloaded file from s3://xxx-bucket/a/f3.txt to ./x/a/f3.txt
Downloaded file from s3://xxx-bucket/q/w/e/nb to ./x/q/w/e/nb
Downloaded file from s3://xxx-bucket/r/f3.txt to ./x/r/f3.txt
Downloaded file from s3://xxx-bucket/rr/f3.txt to ./x/rr/f3.txt
Downloaded file from s3://xxx-bucket/xxx2.txt/f3.txt to ./x/xxx2.txt/f3.txt

基于s3链接循环下载整个文件夹

import boto3
import os

def download_folder_s3link(s3link, local_folder):
    s3 = boto3.client('s3')
    
    # Replace s3://
    s3link = s3link .replace('s3://', '')
    
    bucket = s3link.split('/')[0]
    prefix = s3link[len(bucket) + 1:]
    
    # prefix must end with /
    if not prefix.endswith('/') and prefix !=  '':
        raise ValueError
    
    if prefix == '/':
        prefix = ''
    
    # s3 object list
    objs = s3.list_objects(Bucket=bucket, Prefix=prefix).get('Contents')
    
    # if no object in that prefix
    if not objs:
        return []
    
    keys = [obj['Key'] for obj in objs if not obj['Key'].endswith('/') and obj['Key']]
    
    # local files
    local_files = []
    
    for key in keys:
        
        # local folder+obj prefix
        obj_prefix_list = [local_folder] + key.split('/')[:-1]
        obj_prefix = os.path.join(*obj_prefix_list)
        # print('bucket:{}, key:{}, local_folder:{}'.format(bucket, key, obj_prefix))
        local_file = download_file(bucket, key, obj_prefix)
        local_files.append(local_file)
        
    return local_files

download_folder('xxx-bucket','/','./x/')
Downloaded file from s3://xxx-bucket/f1.txt to ./x/f1.txt
Downloaded file from s3://xxx-bucket/f2.txt to ./x/f2.txt
Downloaded file from s3://xxx-bucket/f3.txt to ./x/f3.txt
Downloaded file from s3://xxx-bucket/a/f3.txt to ./x/a/f3.txt
Downloaded file from s3://xxx-bucket/q/w/e/nb to ./x/q/w/e/nb
Downloaded file from s3://xxx-bucket/r/f3.txt to ./x/r/f3.txt
Downloaded file from s3://xxx-bucket/rr/f3.txt to ./x/rr/f3.txt
Downloaded file from s3://xxx-bucket/xxx2.txt/f3.txt to ./x/xxx2.txt/f3.txt

复制/移动object

输入:

  • source_bucket
  • source_key
  • target_bucket
  • target_prefix
  • delete_source:是否删除原来的object
import boto3
import os

def copy_object(source_bucket, source_key, target_bucket, target_prefix, newname=None, delete_source=False):
    s3 = boto3.client('s3')
    if not target_prefix.endswith('/') and target_prefix != '':
        target_prefix += '/'
    
    copy_source = {
        'Bucket': source_bucket,
        'Key': source_key
    }
    if newname == None:
        target_filename = os.path.join(target_prefix, source_key.split('/')[-1])
    else:
        target_filename = os.path.join(target_prefix, newname)
    
    s3.copy(copy_source, target_bucket, target_filename)
    
    if delete_source:
        s3.delete_object(Bucket=source_bucket, Key=source_key)
    action = 'Moved' if delete_source else 'Copied'
    print('{} s3://{} to s3://{}'.format(action, os.path.join(source_bucket, source_key), os.path.join(target_bucket, target_filename)))
    return 's3://{}'.format(os.path.join(target_bucket, target_filename))
copy_object('xxx-bucket','a/f3.txt', 'xxx-bucket', 'b/', newname='f4.txt')
Copied s3://xxx-bucket/a/f3.txt to s3://xxx-bucket/b/f4.txt
import boto3
import os

def copy_object_s3link(source_link, target_link, newname=None, delete_source=False):
    s3 = boto3.client('s3')
    
    
    # Replace s3://
    source_link = source_link .replace('s3://', '')
    
    source_bucket = source_link.split('/')[0]
    source_key = source_link[len(source_bucket) + 1:]
    
    target_link = target_link .replace('s3://', '')
    
    target_bucket = target_link.split('/')[0]
    target_prefix = target_link[len(target_bucket) + 1:]
    
    
    if not target_prefix.endswith('/') and target_prefix != '':
        target_prefix += '/'
    
    copy_source = {
        'Bucket': source_bucket,
        'Key': source_key
    }
    if newname == None:
        target_filename = os.path.join(target_prefix, source_key.split('/')[-1])
    else:
        target_filename = os.path.join(target_prefix, newname)
    
    s3.copy(copy_source, target_bucket, target_filename)
    
    if delete_source:
        s3.delete_object(Bucket=source_bucket, Key=source_key)
    action = 'Moved' if delete_source else 'Copied'
    print('{} s3://{} to s3://{}'.format(action, os.path.join(source_bucket, source_key), os.path.join(target_bucket, target_filename)))

    return 's3://{}'.format(os.path.join(target_bucket, target_filename))
copy_object_s3link('s3://deeplens-liu/b/f3.txt','s3://deeplens-liu/c/', newname='f4.txt')
Copied s3://deeplens-liu/b/f3.txt to s3://deeplens-liu/c/f4.txt

copy整个文件夹

import boto3
import os

def copy_folder(source_bucket, source_prefix, target_bucket, target_prefix, delete_source=False):
    s3 = boto3.client('s3')
    
    # prefix must end with /
    if not source_prefix.endswith('/') and source_prefix !=  '':
        raise ValueError
    if not target_prefix.endswith('/') and target_prefix !=  '':
        raise ValueError
    
    if source_prefix == '/':
        source_prefix = ''
    if target_prefix == '/':
        target_prefix = ''
    
    # s3 object list
    objs = s3.list_objects(Bucket=source_bucket, Prefix=source_prefix).get('Contents')
    
    # if no object in that prefix
    if not objs:
        return []
    
    keys = [obj['Key'] for obj in objs if not obj['Key'].endswith('/') and obj['Key']]
    print(keys)
    
    for key in keys:
        
        
        my_target_prefix = os.path.join(key[len(source_prefix):-len(key.split('/')[-1])], target_prefix)
        print('....', my_target_prefix)
        copy_object(source_bucket, key, target_bucket, my_target_prefix, delete_source=delete_source)
    
    return
    # local files
    local_files = []
    
    for key in keys:
        
        # local folder+obj prefix
        obj_prefix_list = [local_folder] + key.split('/')[:-1]
        obj_prefix = os.path.join(*obj_prefix_list)
        # print('bucket:{}, key:{}, local_folder:{}'.format(bucket, key, obj_prefix))
        local_file = download_file(bucket, key, obj_prefix)
        local_files.append(local_file)
    
 

上传文件

def upload_to_s3(bucket, prefix, file_name, only_file_name=False):
    s3 = boto3.client('s3')
    upload_file_name = file_name.split('/')[-1] if only_file_name else file_name
    s3.upload_fileobj(open(file_name, 'rb'), bucket, prefix + upload_file_name)
upload_to_s3('deeplens-liu', 'test-upload/', '')
最后修改:2021 年 07 月 07 日 07 : 03 PM
如果觉得我的文章对你有用,请随意赞赏