#!/usr/bin/env python3 """ # S3 Batch Restore Script # Restores objects from S3 Glacier Deep Archive using AWS S3 Batch Operations. # Generate objectlist.csv with the following script: # BUCKET=whk1-bea-icc-mbk-prd-s3-log-infra-log # PREFIX=elb/alb-icc-mbk/AWSLogs/851239346925/elasticloadbalancing/ap-east-1/2025/08/11/ # aws s3 ls s3://$BUCKET/$PREFIX | awk "{print \"$BUCKET,$PREFIX\"\$NF}" | tee /tmp/objectlist.csv """ import sys import json import time import boto3 import random from botocore.exceptions import ClientError def generate_random_id(): """Generate a random alphanumeric ID of specified length.""" return random.randint(1000, 9999) def create_trust_policy(): """Create the trust policy document for the IAM role.""" return { "Version": "2012-10-17", "Statement": [ { "Effect": "Allow", "Principal": { "Service": "batchoperations.s3.amazonaws.com" }, "Action": "sts:AssumeRole" } ] } def create_iam_role(iam_client, role_name="S3BatchRestoreRole"): """Create IAM role for S3 batch operations.""" trust_policy = create_trust_policy() try: # Create role iam_client.create_role( RoleName=role_name, Description="S3 batch restore role", AssumeRolePolicyDocument=json.dumps(trust_policy) ) print(f"Created IAM role: {role_name}") except ClientError as e: if e.response['Error']['Code'] == 'EntityAlreadyExists': print(f"IAM role {role_name} already exists, skipping creation") else: raise # Attach policy try: iam_client.attach_role_policy( RoleName=role_name, PolicyArn='arn:aws:iam::aws:policy/AmazonS3FullAccess' ) print(f"Attached policy to role: {role_name}") except ClientError as e: if e.response['Error']['Code'] == 'EntityAlreadyExists': print(f"Policy already attached to {role_name}") else: raise return role_name def create_manifest_bucket(s3_client, bucket_name, region): """Create S3 bucket for manifest file.""" s3_client.create_bucket( Bucket=bucket_name, CreateBucketConfiguration={'LocationConstraint': region} ) print(f"Created manifest bucket: {bucket_name}") def upload_manifest(s3_client, bucket_name, manifest_file_path, object_key="objectlist.csv"): """Upload manifest file to S3 and return ETag.""" try: with open(manifest_file_path, 'rb') as f: response = s3_client.put_object( Bucket=bucket_name, Key=object_key, Body=f ) print(f"Uploaded manifest to s3://{bucket_name}/{object_key}") # Get ETag (remove quotes if present) etag = response['ETag'].strip('"') return etag except FileNotFoundError: print(f"Error: Manifest file '{manifest_file_path}' not found", file=sys.stderr) sys.exit(1) except Exception as e: print(f"Error uploading manifest: {e}", file=sys.stderr) sys.exit(1) def create_manifest_spec(bucket_name, etag, object_key="objectlist.csv"): """Create manifest specification for batch operation.""" return { "Spec": { "Format": "S3BatchOperations_CSV_20180820", "Fields": ["Bucket", "Key"] }, "Location": { "ObjectArn": f"arn:aws:s3:::{bucket_name}/{object_key}", "ETag": etag } } def create_report_spec(bucket_name, prefix="batch-reports"): """Create report specification for batch operation.""" return { "Bucket": f"arn:aws:s3:::{bucket_name}", "Prefix": prefix, "Format": "Report_CSV_20180820", "Enabled": True, "ReportScope": "AllTasks" } def create_batch_job(s3control_client, account_id, role_arn, manifest_spec, report_spec, expiration_days=14, glacier_job_tier="STANDARD", priority=10, description="Restore objects from Deep Archive"): """Create S3 batch restore job.""" operation = { "S3InitiateRestoreObject": { "ExpirationInDays": expiration_days, "GlacierJobTier": glacier_job_tier } } try: response = s3control_client.create_job( AccountId=account_id, Operation=operation, Manifest=manifest_spec, Report=report_spec, Priority=priority, RoleArn=role_arn, Description=description ) job_id = response['JobId'] print(f"Submitted S3 batch job: {job_id}") return job_id except ClientError as e: print(f"Error creating batch job: {e}", file=sys.stderr) sys.exit(1) def approve_job(s3control_client, account_id, job_id) -> bool: """Approve the batch job to start execution.""" try: s3control_client.update_job_status( AccountId=account_id, JobId=job_id, RequestedJobStatus='Ready' ) print(f"Approved job: {job_id}") return True except ClientError as e: print(f"Error approving job: {e}", file=sys.stderr) return False def get_account_id(sts_client): """Get AWS account ID.""" try: response = sts_client.get_caller_identity() return response['Account'] except ClientError as e: print(f"Error getting account ID: {e}", file=sys.stderr) sys.exit(1) def main(): if len(sys.argv) < 2: print("Usage: python3 s3-batch-restore.py ", file=sys.stderr) print("You must first prepare the manifest, which is a csv with content :", file=sys.stderr) sys.exit(1) manifest_file = sys.argv[1] # Initialize AWS clients region = "ap-east-1" session = boto3.Session(region_name=region) iam_client = session.client('iam') s3_client = session.client('s3') s3control_client = session.client('s3control') sts_client = session.client('sts') # Get account ID account_id = get_account_id(sts_client) print(f"Using AWS account: {account_id}") # Create IAM role role_name = create_iam_role(iam_client) role_arn = f"arn:aws:iam::{account_id}:role/{role_name}" # Create manifest bucket random_id = generate_random_id() manifest_bucket = f"deep-archive-batch-restore-{random_id}" create_manifest_bucket(s3_client, manifest_bucket, session.region_name) # Upload manifest and get ETag etag = upload_manifest(s3_client, manifest_bucket, manifest_file) # Create manifest and report specs (in memory, no temp files) manifest_spec = create_manifest_spec(manifest_bucket, etag) report_spec = create_report_spec(manifest_bucket) # Create batch job print("Submitting S3 batch job...") job_id = create_batch_job( s3control_client, account_id, role_arn, manifest_spec, report_spec ) # Wait a bit before approving time.sleep(5) # Approve job print(f"Approving submitted job {job_id}...") while not approve_job(s3control_client, account_id, job_id): time.sleep(5) print(f"\nReview s3 batch job status. When it is completed, delete the manifest bucket:") print(f"aws s3 rb s3://{manifest_bucket} --force") if __name__ == "__main__": main()