Files
code-dumps/aws/s3-restore-status.py
T

124 lines
4.1 KiB
Python
Executable File

#!/usr/bin/env python3
"""
S3 Restore Status Checker
This script checks the restore status of S3 objects that have been archived to
Glacier or Deep Archive storage classes. It reads a CSV file containing bucket
and object key pairs, then queries each object's restore status using the
S3 head_object API.
The script is useful for monitoring the progress of S3 batch restore operations
or checking the status of individual object restorations from Glacier storage.
Usage:
# Normal mode (debug output with filenames):
python3 s3-restore-status.py
# Optimized mode (status only, no filenames):
python3 -O s3-restore-status.py
Input File Format:
The script reads /tmp/objectlist.csv by default. Each line should contain:
<bucket>,<object_key>
Example:
my-bucket,path/to/file1.log
my-bucket,path/to/file2.log
another-bucket,archive/data.json
Output:
The script prints the restore status for each object:
- "Not restoring" if the object has no active restore operation
- Restore status string (e.g., 'ongoing-request="true"') if restoration is in progress
- Restore status with expiry date (e.g., 'ongoing-request="false", expiry-date="..."')
if restoration is complete
Example Output:
Not-being-restored
ongoing-request="true"
ongoing-request="false", expiry-date="Mon, 25 Nov 2025 00:00:00 GMT"
"""
from collections import Counter
import boto3
import csv
import sys
def read_objectlist(path: str = "/tmp/objectlist.csv") -> tuple[list[str], str]:
"""Read object list CSV file and extract bucket and object keys.
Parses a CSV file containing bucket and object key pairs. Each line should
have the format: "<bucket>,<object_key>". The function collects all object
keys and returns the last bucket name encountered (assuming all objects are
in the same bucket).
Uses Python's csv module for proper CSV parsing, which handles edge cases
like quoted fields, escaped characters, and commas within fields.
Args:
path (str): Path to the CSV file containing bucket and object key pairs.
Defaults to "/tmp/objectlist.csv".
Returns:
tuple: A tuple containing:
- list: List of object keys (strings)
- str: The bucket name (last bucket encountered in the file)
Raises:
SystemExit: If the file is not found, the script exits with status code 1.
"""
bucket_to_keys: list[str] = []
bucket: str = ""
try:
with open(path, "r", encoding="utf-8", newline="") as f:
reader = csv.reader(f)
for row in reader:
bucket = row[0].strip()
key = row[1].strip()
bucket_to_keys.append(key)
except FileNotFoundError:
print(f"Error: object list file not found at {path}", file=sys.stderr)
sys.exit(1)
return bucket_to_keys, bucket
def main():
"""Main function to check restore status of S3 objects.
Reads the object list from CSV file, connects to AWS S3 in ap-east-1 region,
and queries the restore status for each object. Prints the restore status
for each object to stdout.
The restore status indicates:
- "Not-being-restored": Object is not currently being restored (or restore completed and expired)
- Restore status string: Contains restore progress and expiry information
"""
session = boto3.Session(region_name="ap-east-1")
s3_client = session.client('s3')
keys, bucket = read_objectlist(path="/tmp/objectlist.csv")
status_counts: Counter[str] = Counter()
for object in keys:
response = s3_client.head_object(
Bucket=bucket,
Key=object
)
# Split object path by / and get only the last element (filename)
restore_status = response.get('Restore') or "Not-being-restored"
if __debug__:
filename = object.split('/')[-1]
print(f"{filename}: {restore_status}")
# print(restore_status)
status_counts[restore_status] += 1
print(status_counts)
if __name__ == "__main__":
main()