NEW: Rewritten bash script in python with help from Cursor
This commit is contained in:
Executable
+252
@@ -0,0 +1,252 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
S3 Batch Restore Script
|
||||
Restores objects from S3 Glacier Deep Archive using AWS S3 Batch Operations.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import json
|
||||
import secrets
|
||||
import string
|
||||
import time
|
||||
import boto3
|
||||
from botocore.exceptions import ClientError
|
||||
|
||||
|
||||
def generate_random_id(length=4):
|
||||
"""Generate a random alphanumeric ID of specified length."""
|
||||
# Use lowercase alphanumeric characters (0-9, a-z)
|
||||
characters = string.digits
|
||||
return ''.join(secrets.choice(characters) for _ in range(length))
|
||||
|
||||
|
||||
def create_trust_policy():
|
||||
"""Create the trust policy document for the IAM role."""
|
||||
return {
|
||||
"Version": "2012-10-17",
|
||||
"Statement": [
|
||||
{
|
||||
"Effect": "Allow",
|
||||
"Principal": {
|
||||
"Service": "batchoperations.s3.amazonaws.com"
|
||||
},
|
||||
"Action": "sts:AssumeRole"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
def create_iam_role(iam_client, role_name="S3BatchRestoreRole"):
|
||||
"""Create IAM role for S3 batch operations."""
|
||||
trust_policy = create_trust_policy()
|
||||
|
||||
try:
|
||||
# Create role
|
||||
iam_client.create_role(
|
||||
RoleName=role_name,
|
||||
Description="S3 batch restore role",
|
||||
AssumeRolePolicyDocument=json.dumps(trust_policy)
|
||||
)
|
||||
print(f"Created IAM role: {role_name}")
|
||||
except ClientError as e:
|
||||
if e.response['Error']['Code'] == 'EntityAlreadyExists':
|
||||
print(f"IAM role {role_name} already exists, skipping creation")
|
||||
else:
|
||||
raise
|
||||
|
||||
# Attach policy
|
||||
try:
|
||||
iam_client.attach_role_policy(
|
||||
RoleName=role_name,
|
||||
PolicyArn='arn:aws:iam::aws:policy/AmazonS3FullAccess'
|
||||
)
|
||||
print(f"Attached policy to role: {role_name}")
|
||||
except ClientError as e:
|
||||
if e.response['Error']['Code'] == 'EntityAlreadyExists':
|
||||
print(f"Policy already attached to {role_name}")
|
||||
else:
|
||||
raise
|
||||
|
||||
return role_name
|
||||
|
||||
|
||||
def create_manifest_bucket(s3_client, bucket_name, region="ap-east-1"):
|
||||
"""Create S3 bucket for manifest file in the specified region."""
|
||||
try:
|
||||
# For regions other than us-east-1, specify LocationConstraint
|
||||
if region == "us-east-1":
|
||||
s3_client.create_bucket(Bucket=bucket_name)
|
||||
else:
|
||||
s3_client.create_bucket(
|
||||
Bucket=bucket_name,
|
||||
CreateBucketConfiguration={'LocationConstraint': region}
|
||||
)
|
||||
print(f"Created manifest bucket: {bucket_name} in region: {region}")
|
||||
except ClientError as e:
|
||||
if e.response['Error']['Code'] == 'BucketAlreadyExists':
|
||||
print(f"Bucket {bucket_name} already exists")
|
||||
else:
|
||||
raise
|
||||
|
||||
|
||||
def upload_manifest(s3_client, bucket_name, manifest_file_path, object_key="objectlist.csv"):
|
||||
"""Upload manifest file to S3 and return ETag."""
|
||||
try:
|
||||
with open(manifest_file_path, 'rb') as f:
|
||||
response = s3_client.put_object(
|
||||
Bucket=bucket_name,
|
||||
Key=object_key,
|
||||
Body=f
|
||||
)
|
||||
print(f"Uploaded manifest to s3://{bucket_name}/{object_key}")
|
||||
|
||||
# Get ETag (remove quotes if present)
|
||||
etag = response['ETag'].strip('"')
|
||||
return etag
|
||||
except FileNotFoundError:
|
||||
print(f"Error: Manifest file '{manifest_file_path}' not found", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
print(f"Error uploading manifest: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def create_manifest_spec(bucket_name, etag, object_key="objectlist.csv"):
|
||||
"""Create manifest specification for batch operation."""
|
||||
return {
|
||||
"Spec": {
|
||||
"Format": "S3BatchOperations_CSV_20180820",
|
||||
"Fields": ["Bucket", "Key"]
|
||||
},
|
||||
"Location": {
|
||||
"ObjectArn": f"arn:aws:s3:::{bucket_name}/{object_key}",
|
||||
"ETag": etag
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def create_report_spec(bucket_name, prefix="batch-reports"):
|
||||
"""Create report specification for batch operation."""
|
||||
return {
|
||||
"Bucket": f"arn:aws:s3:::{bucket_name}",
|
||||
"Prefix": prefix,
|
||||
"Format": "Report_CSV_20180820",
|
||||
"Enabled": True,
|
||||
"ReportScope": "AllTasks"
|
||||
}
|
||||
|
||||
|
||||
def create_batch_job(s3control_client, account_id, role_arn, manifest_spec, report_spec,
|
||||
expiration_days=14, glacier_job_tier="STANDARD", priority=10,
|
||||
description="Restore objects from Deep Archive"):
|
||||
"""Create S3 batch restore job."""
|
||||
operation = {
|
||||
"S3InitiateRestoreObject": {
|
||||
"ExpirationInDays": expiration_days,
|
||||
"GlacierJobTier": glacier_job_tier
|
||||
}
|
||||
}
|
||||
|
||||
try:
|
||||
response = s3control_client.create_job(
|
||||
AccountId=account_id,
|
||||
Operation=operation,
|
||||
Manifest=manifest_spec,
|
||||
Report=report_spec,
|
||||
Priority=priority,
|
||||
RoleArn=role_arn,
|
||||
Description=description
|
||||
)
|
||||
job_id = response['JobId']
|
||||
print(f"Submitted S3 batch job: {job_id}")
|
||||
return job_id
|
||||
except ClientError as e:
|
||||
print(f"Error creating batch job: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def approve_job(s3control_client, account_id, job_id):
|
||||
"""Approve the batch job to start execution."""
|
||||
try:
|
||||
s3control_client.update_job_status(
|
||||
AccountId=account_id,
|
||||
JobId=job_id,
|
||||
RequestedJobStatus='Ready'
|
||||
)
|
||||
print(f"Approved job: {job_id}")
|
||||
except ClientError as e:
|
||||
print(f"Error approving job: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def get_account_id(sts_client):
|
||||
"""Get AWS account ID."""
|
||||
try:
|
||||
response = sts_client.get_caller_identity()
|
||||
return response['Account']
|
||||
except ClientError as e:
|
||||
print(f"Error getting account ID: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def main():
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: python3 s3-batch-restore.py <manifest.csv>", file=sys.stderr)
|
||||
print("You must first prepare the manifest, which is a csv with content <bucket>:<key>", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
manifest_file = sys.argv[1]
|
||||
|
||||
# Initialize AWS clients
|
||||
# Note: IAM and STS are global services, but S3 and S3 Control are region-specific
|
||||
region = "ap-east-1" # Specify the region for S3 operations
|
||||
session = boto3.Session()
|
||||
iam_client = session.client('iam') # IAM is global
|
||||
s3_client = session.client('s3', region_name=region) # S3 client for ap-east-1
|
||||
s3control_client = session.client('s3control', region_name=region) # S3 Control for ap-east-1
|
||||
sts_client = session.client('sts') # STS is global
|
||||
|
||||
# Get account ID
|
||||
account_id = get_account_id(sts_client)
|
||||
print(f"Using AWS account: {account_id}")
|
||||
|
||||
# Create IAM role
|
||||
role_name = create_iam_role(iam_client)
|
||||
role_arn = f"arn:aws:iam::{account_id}:role/{role_name}"
|
||||
|
||||
# Create manifest bucket
|
||||
random_id = generate_random_id(4)
|
||||
manifest_bucket = f"deep-archive-batch-restore-{random_id}"
|
||||
create_manifest_bucket(s3_client, manifest_bucket, region=region)
|
||||
|
||||
# Upload manifest and get ETag
|
||||
etag = upload_manifest(s3_client, manifest_bucket, manifest_file)
|
||||
|
||||
# Create manifest and report specs (in memory, no temp files)
|
||||
manifest_spec = create_manifest_spec(manifest_bucket, etag)
|
||||
report_spec = create_report_spec(manifest_bucket)
|
||||
|
||||
# Create batch job
|
||||
print("Submitting S3 batch job...")
|
||||
job_id = create_batch_job(
|
||||
s3control_client,
|
||||
account_id,
|
||||
role_arn,
|
||||
manifest_spec,
|
||||
report_spec
|
||||
)
|
||||
|
||||
# Wait a bit before approving
|
||||
time.sleep(5)
|
||||
|
||||
# Approve job
|
||||
print(f"Approving submitted job {job_id}...")
|
||||
approve_job(s3control_client, account_id, job_id)
|
||||
|
||||
print(f"\nTo delete the manifest bucket:")
|
||||
print(f"aws s3 rb s3://{manifest_bucket} --force")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user