Files
code-dumps/aws/s3-batch-restore.py
T

253 lines
7.8 KiB
Python
Executable File

#!/usr/bin/env python3
"""
S3 Batch Restore Script
Restores objects from S3 Glacier Deep Archive using AWS S3 Batch Operations.
"""
import sys
import json
import secrets
import string
import time
import boto3
from botocore.exceptions import ClientError
def generate_random_id(length=4):
"""Generate a random alphanumeric ID of specified length."""
# Use lowercase alphanumeric characters (0-9, a-z)
characters = string.digits
return ''.join(secrets.choice(characters) for _ in range(length))
def create_trust_policy():
"""Create the trust policy document for the IAM role."""
return {
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Principal": {
"Service": "batchoperations.s3.amazonaws.com"
},
"Action": "sts:AssumeRole"
}
]
}
def create_iam_role(iam_client, role_name="S3BatchRestoreRole"):
"""Create IAM role for S3 batch operations."""
trust_policy = create_trust_policy()
try:
# Create role
iam_client.create_role(
RoleName=role_name,
Description="S3 batch restore role",
AssumeRolePolicyDocument=json.dumps(trust_policy)
)
print(f"Created IAM role: {role_name}")
except ClientError as e:
if e.response['Error']['Code'] == 'EntityAlreadyExists':
print(f"IAM role {role_name} already exists, skipping creation")
else:
raise
# Attach policy
try:
iam_client.attach_role_policy(
RoleName=role_name,
PolicyArn='arn:aws:iam::aws:policy/AmazonS3FullAccess'
)
print(f"Attached policy to role: {role_name}")
except ClientError as e:
if e.response['Error']['Code'] == 'EntityAlreadyExists':
print(f"Policy already attached to {role_name}")
else:
raise
return role_name
def create_manifest_bucket(s3_client, bucket_name, region="ap-east-1"):
"""Create S3 bucket for manifest file in the specified region."""
try:
# For regions other than us-east-1, specify LocationConstraint
if region == "us-east-1":
s3_client.create_bucket(Bucket=bucket_name)
else:
s3_client.create_bucket(
Bucket=bucket_name,
CreateBucketConfiguration={'LocationConstraint': region}
)
print(f"Created manifest bucket: {bucket_name} in region: {region}")
except ClientError as e:
if e.response['Error']['Code'] == 'BucketAlreadyExists':
print(f"Bucket {bucket_name} already exists")
else:
raise
def upload_manifest(s3_client, bucket_name, manifest_file_path, object_key="objectlist.csv"):
"""Upload manifest file to S3 and return ETag."""
try:
with open(manifest_file_path, 'rb') as f:
response = s3_client.put_object(
Bucket=bucket_name,
Key=object_key,
Body=f
)
print(f"Uploaded manifest to s3://{bucket_name}/{object_key}")
# Get ETag (remove quotes if present)
etag = response['ETag'].strip('"')
return etag
except FileNotFoundError:
print(f"Error: Manifest file '{manifest_file_path}' not found", file=sys.stderr)
sys.exit(1)
except Exception as e:
print(f"Error uploading manifest: {e}", file=sys.stderr)
sys.exit(1)
def create_manifest_spec(bucket_name, etag, object_key="objectlist.csv"):
"""Create manifest specification for batch operation."""
return {
"Spec": {
"Format": "S3BatchOperations_CSV_20180820",
"Fields": ["Bucket", "Key"]
},
"Location": {
"ObjectArn": f"arn:aws:s3:::{bucket_name}/{object_key}",
"ETag": etag
}
}
def create_report_spec(bucket_name, prefix="batch-reports"):
"""Create report specification for batch operation."""
return {
"Bucket": f"arn:aws:s3:::{bucket_name}",
"Prefix": prefix,
"Format": "Report_CSV_20180820",
"Enabled": True,
"ReportScope": "AllTasks"
}
def create_batch_job(s3control_client, account_id, role_arn, manifest_spec, report_spec,
expiration_days=14, glacier_job_tier="STANDARD", priority=10,
description="Restore objects from Deep Archive"):
"""Create S3 batch restore job."""
operation = {
"S3InitiateRestoreObject": {
"ExpirationInDays": expiration_days,
"GlacierJobTier": glacier_job_tier
}
}
try:
response = s3control_client.create_job(
AccountId=account_id,
Operation=operation,
Manifest=manifest_spec,
Report=report_spec,
Priority=priority,
RoleArn=role_arn,
Description=description
)
job_id = response['JobId']
print(f"Submitted S3 batch job: {job_id}")
return job_id
except ClientError as e:
print(f"Error creating batch job: {e}", file=sys.stderr)
sys.exit(1)
def approve_job(s3control_client, account_id, job_id):
"""Approve the batch job to start execution."""
try:
s3control_client.update_job_status(
AccountId=account_id,
JobId=job_id,
RequestedJobStatus='Ready'
)
print(f"Approved job: {job_id}")
except ClientError as e:
print(f"Error approving job: {e}", file=sys.stderr)
sys.exit(1)
def get_account_id(sts_client):
"""Get AWS account ID."""
try:
response = sts_client.get_caller_identity()
return response['Account']
except ClientError as e:
print(f"Error getting account ID: {e}", file=sys.stderr)
sys.exit(1)
def main():
if len(sys.argv) < 2:
print("Usage: python3 s3-batch-restore.py <manifest.csv>", file=sys.stderr)
print("You must first prepare the manifest, which is a csv with content <bucket>:<key>", file=sys.stderr)
sys.exit(1)
manifest_file = sys.argv[1]
# Initialize AWS clients
# Note: IAM and STS are global services, but S3 and S3 Control are region-specific
region = "ap-east-1" # Specify the region for S3 operations
session = boto3.Session()
iam_client = session.client('iam') # IAM is global
s3_client = session.client('s3', region_name=region) # S3 client for ap-east-1
s3control_client = session.client('s3control', region_name=region) # S3 Control for ap-east-1
sts_client = session.client('sts') # STS is global
# Get account ID
account_id = get_account_id(sts_client)
print(f"Using AWS account: {account_id}")
# Create IAM role
role_name = create_iam_role(iam_client)
role_arn = f"arn:aws:iam::{account_id}:role/{role_name}"
# Create manifest bucket
random_id = generate_random_id(4)
manifest_bucket = f"deep-archive-batch-restore-{random_id}"
create_manifest_bucket(s3_client, manifest_bucket, region=region)
# Upload manifest and get ETag
etag = upload_manifest(s3_client, manifest_bucket, manifest_file)
# Create manifest and report specs (in memory, no temp files)
manifest_spec = create_manifest_spec(manifest_bucket, etag)
report_spec = create_report_spec(manifest_bucket)
# Create batch job
print("Submitting S3 batch job...")
job_id = create_batch_job(
s3control_client,
account_id,
role_arn,
manifest_spec,
report_spec
)
# Wait a bit before approving
time.sleep(5)
# Approve job
print(f"Approving submitted job {job_id}...")
approve_job(s3control_client, account_id, job_id)
print(f"\nTo delete the manifest bucket:")
print(f"aws s3 rb s3://{manifest_bucket} --force")
if __name__ == "__main__":
main()