#!/usr/bin/env python3 """ S3 Batch Restore Script Restores objects from S3 Glacier Deep Archive using AWS S3 Batch Operations. """ import sys import json import secrets import string import time import boto3 from botocore.exceptions import ClientError def generate_random_id(length=4): """Generate a random alphanumeric ID of specified length.""" # Use lowercase alphanumeric characters (0-9, a-z) characters = string.digits return ''.join(secrets.choice(characters) for _ in range(length)) def create_trust_policy(): """Create the trust policy document for the IAM role.""" return { "Version": "2012-10-17", "Statement": [ { "Effect": "Allow", "Principal": { "Service": "batchoperations.s3.amazonaws.com" }, "Action": "sts:AssumeRole" } ] } def create_iam_role(iam_client, role_name="S3BatchRestoreRole"): """Create IAM role for S3 batch operations.""" trust_policy = create_trust_policy() try: # Create role iam_client.create_role( RoleName=role_name, Description="S3 batch restore role", AssumeRolePolicyDocument=json.dumps(trust_policy) ) print(f"Created IAM role: {role_name}") except ClientError as e: if e.response['Error']['Code'] == 'EntityAlreadyExists': print(f"IAM role {role_name} already exists, skipping creation") else: raise # Attach policy try: iam_client.attach_role_policy( RoleName=role_name, PolicyArn='arn:aws:iam::aws:policy/AmazonS3FullAccess' ) print(f"Attached policy to role: {role_name}") except ClientError as e: if e.response['Error']['Code'] == 'EntityAlreadyExists': print(f"Policy already attached to {role_name}") else: raise return role_name def create_manifest_bucket(s3_client, bucket_name, region="ap-east-1"): """Create S3 bucket for manifest file in the specified region.""" try: # For regions other than us-east-1, specify LocationConstraint if region == "us-east-1": s3_client.create_bucket(Bucket=bucket_name) else: s3_client.create_bucket( Bucket=bucket_name, CreateBucketConfiguration={'LocationConstraint': region} ) print(f"Created manifest bucket: {bucket_name} in region: {region}") except ClientError as e: if e.response['Error']['Code'] == 'BucketAlreadyExists': print(f"Bucket {bucket_name} already exists") else: raise def upload_manifest(s3_client, bucket_name, manifest_file_path, object_key="objectlist.csv"): """Upload manifest file to S3 and return ETag.""" try: with open(manifest_file_path, 'rb') as f: response = s3_client.put_object( Bucket=bucket_name, Key=object_key, Body=f ) print(f"Uploaded manifest to s3://{bucket_name}/{object_key}") # Get ETag (remove quotes if present) etag = response['ETag'].strip('"') return etag except FileNotFoundError: print(f"Error: Manifest file '{manifest_file_path}' not found", file=sys.stderr) sys.exit(1) except Exception as e: print(f"Error uploading manifest: {e}", file=sys.stderr) sys.exit(1) def create_manifest_spec(bucket_name, etag, object_key="objectlist.csv"): """Create manifest specification for batch operation.""" return { "Spec": { "Format": "S3BatchOperations_CSV_20180820", "Fields": ["Bucket", "Key"] }, "Location": { "ObjectArn": f"arn:aws:s3:::{bucket_name}/{object_key}", "ETag": etag } } def create_report_spec(bucket_name, prefix="batch-reports"): """Create report specification for batch operation.""" return { "Bucket": f"arn:aws:s3:::{bucket_name}", "Prefix": prefix, "Format": "Report_CSV_20180820", "Enabled": True, "ReportScope": "AllTasks" } def create_batch_job(s3control_client, account_id, role_arn, manifest_spec, report_spec, expiration_days=14, glacier_job_tier="STANDARD", priority=10, description="Restore objects from Deep Archive"): """Create S3 batch restore job.""" operation = { "S3InitiateRestoreObject": { "ExpirationInDays": expiration_days, "GlacierJobTier": glacier_job_tier } } try: response = s3control_client.create_job( AccountId=account_id, Operation=operation, Manifest=manifest_spec, Report=report_spec, Priority=priority, RoleArn=role_arn, Description=description ) job_id = response['JobId'] print(f"Submitted S3 batch job: {job_id}") return job_id except ClientError as e: print(f"Error creating batch job: {e}", file=sys.stderr) sys.exit(1) def approve_job(s3control_client, account_id, job_id): """Approve the batch job to start execution.""" try: s3control_client.update_job_status( AccountId=account_id, JobId=job_id, RequestedJobStatus='Ready' ) print(f"Approved job: {job_id}") except ClientError as e: print(f"Error approving job: {e}", file=sys.stderr) sys.exit(1) def get_account_id(sts_client): """Get AWS account ID.""" try: response = sts_client.get_caller_identity() return response['Account'] except ClientError as e: print(f"Error getting account ID: {e}", file=sys.stderr) sys.exit(1) def main(): if len(sys.argv) < 2: print("Usage: python3 s3-batch-restore.py ", file=sys.stderr) print("You must first prepare the manifest, which is a csv with content :", file=sys.stderr) sys.exit(1) manifest_file = sys.argv[1] # Initialize AWS clients # Note: IAM and STS are global services, but S3 and S3 Control are region-specific region = "ap-east-1" # Specify the region for S3 operations session = boto3.Session() iam_client = session.client('iam') # IAM is global s3_client = session.client('s3', region_name=region) # S3 client for ap-east-1 s3control_client = session.client('s3control', region_name=region) # S3 Control for ap-east-1 sts_client = session.client('sts') # STS is global # Get account ID account_id = get_account_id(sts_client) print(f"Using AWS account: {account_id}") # Create IAM role role_name = create_iam_role(iam_client) role_arn = f"arn:aws:iam::{account_id}:role/{role_name}" # Create manifest bucket random_id = generate_random_id(4) manifest_bucket = f"deep-archive-batch-restore-{random_id}" create_manifest_bucket(s3_client, manifest_bucket, region=region) # Upload manifest and get ETag etag = upload_manifest(s3_client, manifest_bucket, manifest_file) # Create manifest and report specs (in memory, no temp files) manifest_spec = create_manifest_spec(manifest_bucket, etag) report_spec = create_report_spec(manifest_bucket) # Create batch job print("Submitting S3 batch job...") job_id = create_batch_job( s3control_client, account_id, role_arn, manifest_spec, report_spec ) # Wait a bit before approving time.sleep(5) # Approve job print(f"Approving submitted job {job_id}...") approve_job(s3control_client, account_id, job_id) print(f"\nTo delete the manifest bucket:") print(f"aws s3 rb s3://{manifest_bucket} --force") if __name__ == "__main__": main()