从s3下载文件
首先导入所需的库
import boto3
import os
首先有一个工具函数,用来建立不存在的本地文件夹
def mkdir_recursive(folder):
if folder.endswith('/'):
folder = folder[:-1]
paths = folder.split('/')
full = ''
while paths:
full += paths[0] +'/'
if not os.path.exists(full):
print('Creating {}'.format(full))
os.mkdir(full)
paths = paths[1:]
# 循环创建 p1/p2/p3文件夹
mkdir_recursive('p1/p2/p3/')
Creating p1/
Creating p1/p2/
Creating p1/p2/p3/
下载文件,输入为
- bucket
- 文件名
- 本地文件夹
import boto3
import os
def download_file(bucket, filename, local_folder):
s3 = boto3.client('s3')
# make sure local floder extist
mkdir_recursive(local_folder)
# filename must not end with '/'
if filename.endswith('/'):
raise ValueError
# set local file full name
local_file = os.path.join(local_folder, filename.split('/')[-1])
# download s3
s3.download_fileobj(bucket, filename, open(local_file, 'wb'))
print('Downloaded file from s3://{} to {}'.format(os.path.join(bucket, filename), local_file))
return local_file
# 从xx-bucket下载r/f3.txt文件到本地p5文件夹,如果没有文件夹会自动循环创建
download_file('xxx-bucket','r/f3.txt','./p5')
Creating ./p5/
Downloaded file from s3://xxx-bucket/r/f3.txt to ./p5/f3.txt
下载文件,输入为
- s3链接,例如
s3://xxx-bucket/r/f3.txt
- 本地文件夹
import boto3
import os
def download_file(bucket, filename, local_folder):
s3 = boto3.client('s3')
# make sure local floder extist
mkdir_recursive(local_folder)
# filename must not end with '/'
if filename.endswith('/'):
raise ValueError
# set local file full name
local_file = os.path.join(local_folder, filename.split('/')[-1])
# download s3
s3.download_fileobj(bucket, filename, open(local_file, 'wb'))
if filename.startswith('/'):
filename = filename[1:]
print('Downloaded file from s3://{} to {}'.format(os.path.join(bucket, filename), local_file))
return local_file
download_file('xxx-bucket','r/f3.txt','./p5')
Creating ./p6/
Downloaded file from s3://xxx-bucket/r/f3.txt to ./p6/f3.txt
循环下载整个文件夹
- bucket
- prefix文件夹前缀,必须以
/
结尾 - local_folder 本地文件夹
import boto3
import os
def download_folder(bucket, prefix, local_folder):
s3 = boto3.client('s3')
# prefix must end with /
if not prefix.endswith('/') and prefix != '':
raise ValueError
if prefix == '/':
prefix = ''
# s3 object list
objs = s3.list_objects(Bucket=bucket, Prefix=prefix).get('Contents')
# if no object in that prefix
if not objs:
return []
keys = [obj['Key'] for obj in objs if not obj['Key'].endswith('/') and obj['Key']]
# local files
local_files = []
for key in keys:
# local folder+obj prefix
obj_prefix_list = [local_folder] + key.split('/')[:-1]
obj_prefix = os.path.join(*obj_prefix_list)
# print('bucket:{}, key:{}, local_folder:{}'.format(bucket, key, obj_prefix))
local_file = download_file(bucket, key, obj_prefix)
local_files.append(local_file)
return local_files
download_folder('xxx-bucket','','./x/')
Downloaded file from s3://xxx-bucket/f1.txt to ./x/f1.txt
Downloaded file from s3://xxx-bucket/f2.txt to ./x/f2.txt
Downloaded file from s3://xxx-bucket/f3.txt to ./x/f3.txt
Downloaded file from s3://xxx-bucket/a/f3.txt to ./x/a/f3.txt
Downloaded file from s3://xxx-bucket/q/w/e/nb to ./x/q/w/e/nb
Downloaded file from s3://xxx-bucket/r/f3.txt to ./x/r/f3.txt
Downloaded file from s3://xxx-bucket/rr/f3.txt to ./x/rr/f3.txt
Downloaded file from s3://xxx-bucket/xxx2.txt/f3.txt to ./x/xxx2.txt/f3.txt
基于s3链接循环下载整个文件夹
import boto3
import os
def download_folder_s3link(s3link, local_folder):
s3 = boto3.client('s3')
# Replace s3://
s3link = s3link .replace('s3://', '')
bucket = s3link.split('/')[0]
prefix = s3link[len(bucket) + 1:]
# prefix must end with /
if not prefix.endswith('/') and prefix != '':
raise ValueError
if prefix == '/':
prefix = ''
# s3 object list
objs = s3.list_objects(Bucket=bucket, Prefix=prefix).get('Contents')
# if no object in that prefix
if not objs:
return []
keys = [obj['Key'] for obj in objs if not obj['Key'].endswith('/') and obj['Key']]
# local files
local_files = []
for key in keys:
# local folder+obj prefix
obj_prefix_list = [local_folder] + key.split('/')[:-1]
obj_prefix = os.path.join(*obj_prefix_list)
# print('bucket:{}, key:{}, local_folder:{}'.format(bucket, key, obj_prefix))
local_file = download_file(bucket, key, obj_prefix)
local_files.append(local_file)
return local_files
download_folder('xxx-bucket','/','./x/')
Downloaded file from s3://xxx-bucket/f1.txt to ./x/f1.txt
Downloaded file from s3://xxx-bucket/f2.txt to ./x/f2.txt
Downloaded file from s3://xxx-bucket/f3.txt to ./x/f3.txt
Downloaded file from s3://xxx-bucket/a/f3.txt to ./x/a/f3.txt
Downloaded file from s3://xxx-bucket/q/w/e/nb to ./x/q/w/e/nb
Downloaded file from s3://xxx-bucket/r/f3.txt to ./x/r/f3.txt
Downloaded file from s3://xxx-bucket/rr/f3.txt to ./x/rr/f3.txt
Downloaded file from s3://xxx-bucket/xxx2.txt/f3.txt to ./x/xxx2.txt/f3.txt
复制/移动object
输入:
- source_bucket
- source_key
- target_bucket
- target_prefix
- delete_source:是否删除原来的object
import boto3
import os
def copy_object(source_bucket, source_key, target_bucket, target_prefix, newname=None, delete_source=False):
s3 = boto3.client('s3')
if not target_prefix.endswith('/') and target_prefix != '':
target_prefix += '/'
copy_source = {
'Bucket': source_bucket,
'Key': source_key
}
if newname == None:
target_filename = os.path.join(target_prefix, source_key.split('/')[-1])
else:
target_filename = os.path.join(target_prefix, newname)
s3.copy(copy_source, target_bucket, target_filename)
if delete_source:
s3.delete_object(Bucket=source_bucket, Key=source_key)
action = 'Moved' if delete_source else 'Copied'
print('{} s3://{} to s3://{}'.format(action, os.path.join(source_bucket, source_key), os.path.join(target_bucket, target_filename)))
return 's3://{}'.format(os.path.join(target_bucket, target_filename))
copy_object('xxx-bucket','a/f3.txt', 'xxx-bucket', 'b/', newname='f4.txt')
Copied s3://xxx-bucket/a/f3.txt to s3://xxx-bucket/b/f4.txt
import boto3
import os
def copy_object_s3link(source_link, target_link, newname=None, delete_source=False):
s3 = boto3.client('s3')
# Replace s3://
source_link = source_link .replace('s3://', '')
source_bucket = source_link.split('/')[0]
source_key = source_link[len(source_bucket) + 1:]
target_link = target_link .replace('s3://', '')
target_bucket = target_link.split('/')[0]
target_prefix = target_link[len(target_bucket) + 1:]
if not target_prefix.endswith('/') and target_prefix != '':
target_prefix += '/'
copy_source = {
'Bucket': source_bucket,
'Key': source_key
}
if newname == None:
target_filename = os.path.join(target_prefix, source_key.split('/')[-1])
else:
target_filename = os.path.join(target_prefix, newname)
s3.copy(copy_source, target_bucket, target_filename)
if delete_source:
s3.delete_object(Bucket=source_bucket, Key=source_key)
action = 'Moved' if delete_source else 'Copied'
print('{} s3://{} to s3://{}'.format(action, os.path.join(source_bucket, source_key), os.path.join(target_bucket, target_filename)))
return 's3://{}'.format(os.path.join(target_bucket, target_filename))
copy_object_s3link('s3://deeplens-liu/b/f3.txt','s3://deeplens-liu/c/', newname='f4.txt')
Copied s3://deeplens-liu/b/f3.txt to s3://deeplens-liu/c/f4.txt
copy整个文件夹
import boto3
import os
def copy_folder(source_bucket, source_prefix, target_bucket, target_prefix, delete_source=False):
s3 = boto3.client('s3')
# prefix must end with /
if not source_prefix.endswith('/') and source_prefix != '':
raise ValueError
if not target_prefix.endswith('/') and target_prefix != '':
raise ValueError
if source_prefix == '/':
source_prefix = ''
if target_prefix == '/':
target_prefix = ''
# s3 object list
objs = s3.list_objects(Bucket=source_bucket, Prefix=source_prefix).get('Contents')
# if no object in that prefix
if not objs:
return []
keys = [obj['Key'] for obj in objs if not obj['Key'].endswith('/') and obj['Key']]
print(keys)
for key in keys:
my_target_prefix = os.path.join(key[len(source_prefix):-len(key.split('/')[-1])], target_prefix)
print('....', my_target_prefix)
copy_object(source_bucket, key, target_bucket, my_target_prefix, delete_source=delete_source)
return
# local files
local_files = []
for key in keys:
# local folder+obj prefix
obj_prefix_list = [local_folder] + key.split('/')[:-1]
obj_prefix = os.path.join(*obj_prefix_list)
# print('bucket:{}, key:{}, local_folder:{}'.format(bucket, key, obj_prefix))
local_file = download_file(bucket, key, obj_prefix)
local_files.append(local_file)
上传文件
def upload_to_s3(bucket, prefix, file_name, only_file_name=False):
s3 = boto3.client('s3')
upload_file_name = file_name.split('/')[-1] if only_file_name else file_name
s3.upload_fileobj(open(file_name, 'rb'), bucket, prefix + upload_file_name)
upload_to_s3('deeplens-liu', 'test-upload/', '')