Files
dify/api/extensions/storage/aws_s3_storage.py
Harry 248fa38c34 refactor(storage): unified storage cache layer and preasign interface
- Updated storage wrappers to utilize a new base class, StorageWrapper, for better delegation of methods.
- Introduced SilentStorage to handle read operations gracefully by returning empty values instead of raising exceptions.
- Enhanced CachedPresignStorage to support batch caching of download URLs, improving performance.
- Refactored FilePresignStorage to support both presigned URLs and signed proxy URLs for downloads.
- Updated AppAssetService to utilize the new storage structure, ensuring consistent asset management.
2026-01-23 17:01:10 +08:00

114 lines
4.1 KiB
Python

import logging
from collections.abc import Generator
import boto3
from botocore.client import Config
from botocore.exceptions import ClientError
from configs import dify_config
from extensions.storage.base_storage import BaseStorage
logger = logging.getLogger(__name__)
class AwsS3Storage(BaseStorage):
"""Implementation for Amazon Web Services S3 storage."""
def __init__(self):
super().__init__()
self.bucket_name = dify_config.S3_BUCKET_NAME
if dify_config.S3_USE_AWS_MANAGED_IAM:
logger.info("Using AWS managed IAM role for S3")
session = boto3.Session()
region_name = dify_config.S3_REGION
self.client = session.client(service_name="s3", region_name=region_name)
else:
logger.info("Using ak and sk for S3")
self.client = boto3.client(
"s3",
aws_secret_access_key=dify_config.S3_SECRET_KEY,
aws_access_key_id=dify_config.S3_ACCESS_KEY,
endpoint_url=dify_config.S3_ENDPOINT,
region_name=dify_config.S3_REGION,
config=Config(s3={"addressing_style": dify_config.S3_ADDRESS_STYLE}),
)
# create bucket
try:
self.client.head_bucket(Bucket=self.bucket_name)
except ClientError as e:
# if bucket not exists, create it
if e.response.get("Error", {}).get("Code") == "404":
self.client.create_bucket(Bucket=self.bucket_name)
# if bucket is not accessible, pass, maybe the bucket is existing but not accessible
elif e.response.get("Error", {}).get("Code") == "403":
pass
else:
# other error, raise exception
raise
def save(self, filename, data):
self.client.put_object(Bucket=self.bucket_name, Key=filename, Body=data)
def load_once(self, filename: str) -> bytes:
try:
data: bytes = self.client.get_object(Bucket=self.bucket_name, Key=filename)["Body"].read()
except ClientError as ex:
if ex.response.get("Error", {}).get("Code") == "NoSuchKey":
raise FileNotFoundError("File not found")
else:
raise
return data
def load_stream(self, filename: str) -> Generator:
try:
response = self.client.get_object(Bucket=self.bucket_name, Key=filename)
yield from response["Body"].iter_chunks()
except ClientError as ex:
if ex.response.get("Error", {}).get("Code") == "NoSuchKey":
raise FileNotFoundError("file not found")
elif "reached max retries" in str(ex):
raise ValueError("please do not request the same file too frequently")
else:
raise
def download(self, filename, target_filepath):
self.client.download_file(self.bucket_name, filename, target_filepath)
def exists(self, filename):
try:
self.client.head_object(Bucket=self.bucket_name, Key=filename)
return True
except:
return False
def delete(self, filename):
self.client.delete_object(Bucket=self.bucket_name, Key=filename)
def get_download_url(self, filename: str, expires_in: int = 3600) -> str:
url: str = self.client.generate_presigned_url(
ClientMethod="get_object",
Params={"Bucket": self.bucket_name, "Key": filename},
ExpiresIn=expires_in,
)
return url
def get_download_urls(self, filenames: list[str], expires_in: int = 3600) -> list[str]:
return [
self.client.generate_presigned_url(
ClientMethod="get_object",
Params={"Bucket": self.bucket_name, "Key": filename},
ExpiresIn=expires_in,
)
for filename in filenames
]
def get_upload_url(self, filename: str, expires_in: int = 3600) -> str:
url: str = self.client.generate_presigned_url(
ClientMethod="put_object",
Params={"Bucket": self.bucket_name, "Key": filename},
ExpiresIn=expires_in,
)
return url