Object Storage & Blob Storage

import boto3
from botocore.config import Config
import os

# Create S3 client
s3_client = boto3.client('s3', region_name='us-west-2')
s3_resource = boto3.resource('s3')

# Upload file
def upload_file(bucket, key, file_path):
    s3_client.upload_file(
        file_path,
        bucket,
        key,
        ExtraArgs={
            'ContentType': 'image/jpeg',
            'ServerSideEncryption': 'AES256',
            'Metadata': {'user-id': '123', 'upload-date': '2025-02-14'}
        }
    )

# Upload with progress callback
def upload_file_with_progress(bucket, key, file_path):
    s3_resource.meta.client.upload_file(
        file_path, bucket, key,
        Callback=ProgressPercentage(file_path)
    )

# Download file
def download_file(bucket, key, local_path):
    s3_client.download_file(bucket, key, local_path)

# List objects
def list_objects(bucket, prefix='', max_keys=100):
    response = s3_client.list_objects_v2(
        Bucket=bucket,
        Prefix=prefix,
        MaxKeys=max_keys
    )
    
    objects = []
    for obj in response.get('Contents', []):
        objects.append({
            'key': obj['Key'],
            'size': obj['Size'],
            'last_modified': obj['LastModified'],
            'storage_class': obj.get('StorageClass', 'STANDARD')
        })
    
    return objects

# Generate presigned URL (temporary access)
def get_presigned_url(bucket, key, expiration_seconds=3600):
    url = s3_client.generate_presigned_url(
        'get_object',
        Params={'Bucket': bucket, 'Key': key},
        ExpiresIn=expiration_seconds
    )
    return url

# Multipart upload for large files
def upload_large_file(bucket, key, file_path):
    file_size = os.path.getsize(file_path)
    part_size = 5 * 1024 * 1024  # 5 MB
    
    multipart = s3_client.create_multipart_upload(Bucket=bucket, Key=key)
    upload_id = multipart['UploadId']
    
    parts = []
    with open(file_path, 'rb') as f:
        part_num = 1
        while True:
            data = f.read(part_size)
            if not data:
                break
            
            response = s3_client.upload_part(
                Bucket=bucket,
                Key=key,
                PartNumber=part_num,
                UploadId=upload_id,
                Body=data
            )
            
            parts.append({
                'ETag': response['ETag'],
                'PartNumber': part_num
            })
            part_num += 1
    
    s3_client.complete_multipart_upload(
        Bucket=bucket,
        Key=key,
        UploadId=upload_id,
        MultipartUpload={'Parts': parts}
    )

# Lifecycle policy (auto-archive)
def set_lifecycle_policy(bucket):
    s3_client.put_bucket_lifecycle_configuration(
        Bucket=bucket,
        LifecycleConfiguration={
            'Rules': [
                {
                    'Id': 'archive-old-photos',
                    'Filter': {'Prefix': 'photos/'},
                    'Status': 'Enabled',
                    'Transitions': [
                        {
                            'Days': 30,
                            'StorageClass': 'INTELLIGENT_TIERING'
                        },
                        {
                            'Days': 90,
                            'StorageClass': 'GLACIER'
                        },
  ],
                    'Expiration': {'Days': 2555}  # 7 years
                }
        }
    )

# Versioning
def enable_versioning(bucket):
    s3_client.put_bucket_versioning(
        Bucket=bucket,
        VersioningConfiguration={'Status': 'Enabled'}
    )

# Server-side replication
def setup_replication(source_bucket, dest_bucket):
    s3_client.put_bucket_replication(
        Bucket=source_bucket,
        ReplicationConfiguration={
            'Role': 'arn:aws:iam::ACCOUNT:role/s3-replication',
            'Rules': [
                {
                    'ID': 'replicate-all',
                    'Status': 'Enabled',
                    'Priority': 1,
                    'Filter': {'Prefix': ''},
                    'Destination': {
                        'Bucket': f'arn:aws:s3:::{dest_bucket}',
                        'ReplicationTime': {'Status': 'Enabled', 'Time': {'Minutes': 15}}
                    }
                }
        }
    )

from google.cloud import storage
from google.cloud.storage import Bucket

# Create client
storage_client = storage.Client()
bucket = storage_client.bucket('my-bucket')

# Upload blob
def upload_blob(bucket_name, source_file, destination_blob):
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(destination_blob)
    
    blob.upload_from_filename(source_file)
    print(f"File {source_file} uploaded to {destination_blob}")

# Stream upload
def upload_blob_stream(bucket_name, source_file, destination_blob):
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(destination_blob)
    
    with open(source_file, 'rb') as f:
        blob.upload_from_file(f)

# Download blob
def download_blob(bucket_name, source_blob, destination_file):
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(source_blob)
    
    blob.download_to_filename(destination_file)

# List blobs with prefix
def list_blobs(bucket_name, prefix=''):
    blobs = storage_client.list_blobs(bucket_name, prefix=prefix)
    
    for blob in blobs:
        print(f"{blob.name} - {blob.size} bytes - {blob.updated}")

# Generate signed URL
def generate_signed_url(bucket_name, blob_name, version_seconds=3600):
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(blob_name)
    
    url = blob.generate_signed_url(version=4, expiration=version_seconds)
    return url

# Set lifecycle policy
def set_lifecycle(bucket_name):
    bucket = storage_client.bucket(bucket_name)
    
    rule = storage.Bucket.LifecycleRuleDelete(
        age_days=365
    )
    
    archive_rule = storage.Bucket.LifecycleRuleSetStorageClass(
        storage_class='COLDLINE',
        age_days=30
    )
    
    bucket.lifecycle_rules = [archive_rule, rule]
    bucket.patch()

# Object composition (merge multiple objects)
def compose_objects(bucket_name, source_blobs, destination_blob):
    bucket = storage_client.bucket(bucket_name)
    destination = bucket.blob(destination_blob)
    
    sources = [bucket.blob(name) for name in source_blobs]
    
    destination.compose(sources)
    print(f"Composed {len(sources)} objects into {destination_blob}")

const AWS = require('aws-sdk');
const fs = require('fs');

const s3 = new AWS.S3({
  region: 'us-west-2'
});

// Upload file
async function uploadFile(bucket, key, filePath) {
  const fileStream = fs.createReadStream(filePath);
  
  const params = {
    Bucket: bucket,
    Key: key,
    Body: fileStream,
    ContentType: 'image/jpeg',
    ServerSideEncryption: 'AES256'
  };
  
  try {
    const result = await s3.upload(params).promise();
    console.log(`File uploaded: ${result.Location}`);
    return result;
  } catch (err) {
    console.error('Upload failed:', err);
    throw err;
  }
}

// Download file
async function downloadFile(bucket, key, filePath) {
  const params = { Bucket: bucket, Key: key };
  
  try {
    const data = await s3.getObject(params).promise();
    fs.writeFileSync(filePath, data.Body);
    console.log(`File downloaded to ${filePath}`);
  } catch (err) {
    console.error('Download failed:', err);
    throw err;
  }
}

// List objects
async function listObjects(bucket, prefix = '') {
  const params = { Bucket: bucket, Prefix: prefix };
  
  try {
    const data = await s3.listObjectsV2(params).promise();
    return data.Contents.map(obj => ({
      key: obj.Key,
      size: obj.Size,
      lastModified: obj.LastModified
    }));
  } catch (err) {
    console.error('List failed:', err);
    throw err;
  }
}

// Presigned URL
function getPresignedUrl(bucket, key, expiresIn = 3600) {
  const params = {
    Bucket: bucket,
    Key: key,
    Expires: expiresIn
  };
  
  return s3.getSignedUrl('getObject', params);
}

// Delete object
async function deleteObject(bucket, key) {
  const params = { Bucket: bucket, Key: key };
  
  try {
    await s3.deleteObject(params).promise();
    console.log(`Deleted ${key}`);
  } catch (err) {
    console.error('Delete failed:', err);
    throw err;
  }
}

// Parallel uploads
async function uploadMultiple(bucket, files) {
  const uploads = files.map(file => 
    uploadFile(bucket, file.key, file.path)
  );
  
  try {
    const results = await Promise.all(uploads);
    console.log(`Uploaded ${results.length} files`);
    return results;
  } catch (err) {
    console.error('Batch upload failed:', err);
    throw err;
  }
}

module.exports = {
  uploadFile,
  downloadFile,
  listObjects,
  getPresignedUrl,
  deleteObject,
  uploadMultiple
};

Object Storage & Blob Storage

TL;DR

Learning Objectives

Motivating Scenario

Core Concepts

Practical Example

When to Use Object Storage / When Not to Use

Patterns and Pitfalls

Design Review Checklist

Self-Check

Next Steps

References

Object Storage & Blob Storage

TL;DR​

Learning Objectives​

Motivating Scenario​

Core Concepts​

Practical Example​

When to Use Object Storage / When Not to Use​

Patterns and Pitfalls​

Design Review Checklist​

Self-Check​

Next Steps​

References​

TL;DR

Learning Objectives

Motivating Scenario

Core Concepts

Practical Example

When to Use Object Storage / When Not to Use

Patterns and Pitfalls

Design Review Checklist

Self-Check

Next Steps

References