Creating Custom Plugins¶
Extend strutex with your own providers, extractors, and security plugins.
Plugin System Overview¶
Strutex uses an inheritance-based plugin system. To create a custom plugin:
- Inherit from the appropriate base class
- Implement required methods
- Your plugin is auto-registered!
┌────────────────────────────────────────────────────┐
│ Plugin Types │
├────────────────────────────────────────────────────┤
│ Provider → Custom LLM backends │
│ Extractor → Custom file format handlers │
│ SecurityPlugin → Custom security validation │
└────────────────────────────────────────────────────┘
Creating a Custom Provider¶
Create your own LLM provider by inheriting from Provider:
from strutex.plugins.base import Provider
from typing import Any, Optional
class MyCustomProvider(Provider, name="my_provider"):
"""
Custom LLM provider for internal API.
Inheriting from Provider auto-registers the plugin!
"""
# Plugin metadata (optional)
priority = 50 # Lower = higher priority
cost = 0.001 # Cost per request for chain sorting
version = "1.0.0"
def __init__(self, api_key: Optional[str] = None, model: str = "default"):
self.api_key = api_key
self.model = model
def process(
self,
file_path: str,
prompt: str,
schema: Any,
mime_type: str,
**kwargs
) -> dict:
"""
Main extraction method. Required.
Args:
file_path: Path to document
prompt: Extraction prompt
schema: Target schema
mime_type: File MIME type
**kwargs: Additional options
Returns:
Extracted data as dict
"""
# 1. Read the file
with open(file_path, "rb") as f:
content = f.read()
# 2. Call your LLM API
response = self._call_api(content, prompt, schema)
# 3. Parse and return
return response
def _call_api(self, content: bytes, prompt: str, schema: Any) -> dict:
"""Your API implementation."""
import requests
response = requests.post(
"https://your-api.com/extract",
json={
"content": content.decode("utf-8"),
"prompt": prompt,
"schema": schema.to_dict() if hasattr(schema, "to_dict") else schema
},
headers={"Authorization": f"Bearer {self.api_key}"}
)
return response.json()
def health_check(self) -> bool:
"""Optional: Check if provider is available."""
try:
# Ping your API
return True
except Exception:
return False
Using Your Custom Provider¶
from strutex import DocumentProcessor
from my_providers import MyCustomProvider
# Create instance
provider = MyCustomProvider(api_key="secret", model="v2")
# Use with processor
processor = DocumentProcessor(provider=provider)
result = processor.process("doc.pdf", "Extract", schema=MySchema)
# Or use by name (auto-registered!)
processor = DocumentProcessor(provider="my_provider")
Creating a Custom Extractor¶
Handle custom file formats by inheriting from Extractor:
from strutex.plugins.base import Extractor
from typing import Any
class XMLExtractor(Extractor, name="xml"):
"""Extract data from XML files."""
# Supported MIME types
supported_types = ["application/xml", "text/xml"]
def extract(self, file_path: str, **kwargs) -> str:
"""
Extract text content from file.
Args:
file_path: Path to file
**kwargs: Additional options
Returns:
Text content for LLM
"""
import xml.etree.ElementTree as ET
tree = ET.parse(file_path)
root = tree.getroot()
# Convert XML to readable text
return self._xml_to_text(root)
def _xml_to_text(self, element, indent=0) -> str:
"""Recursively convert XML to text."""
lines = []
prefix = " " * indent
if element.text and element.text.strip():
lines.append(f"{prefix}{element.tag}: {element.text.strip()}")
else:
lines.append(f"{prefix}{element.tag}:")
for child in element:
lines.append(self._xml_to_text(child, indent + 1))
return "\n".join(lines)
def supports(self, mime_type: str) -> bool:
"""Check if this extractor handles the MIME type."""
return mime_type in self.supported_types
Registering Your Extractor¶
from strutex.plugins.registry import PluginRegistry
from my_extractors import XMLExtractor
# Auto-registered via inheritance, but you can also manually register
PluginRegistry.register("extractor", XMLExtractor, name="xml")
# Now XML files are automatically handled
processor = DocumentProcessor(provider=provider)
result = processor.process("data.xml", "Extract", schema=MySchema)
Creating a Custom Security Plugin¶
Add custom security validation:
from strutex.plugins.base import SecurityPlugin, SecurityResult
from typing import Any, Dict
class CustomSecurityPlugin(SecurityPlugin, name="custom_security"):
"""Custom security validation."""
def __init__(self, forbidden_words: list = None):
self.forbidden_words = forbidden_words or []
def validate_input(self, text: str) -> SecurityResult:
"""
Validate input prompt before LLM call.
Args:
text: Input prompt to validate
Returns:
SecurityResult with valid=True/False and reason
"""
# Check for forbidden words
for word in self.forbidden_words:
if word.lower() in text.lower():
return SecurityResult(
valid=False,
reason=f"Forbidden word detected: {word}"
)
# Check for suspicious patterns
if "ignore all previous" in text.lower():
return SecurityResult(
valid=False,
reason="Potential injection attempt"
)
return SecurityResult(valid=True, text=text)
def validate_output(self, data: Dict[str, Any]) -> SecurityResult:
"""
Validate LLM output before returning.
Args:
data: Extracted data dict
Returns:
SecurityResult with valid=True/False and data
"""
# Redact sensitive fields
redacted = data.copy()
for key in ["ssn", "password", "secret"]:
if key in redacted:
redacted[key] = "[REDACTED]"
return SecurityResult(valid=True, data=redacted)
Using Your Security Plugin¶
from strutex import DocumentProcessor, GeminiProvider
from my_security import CustomSecurityPlugin
security = CustomSecurityPlugin(
forbidden_words=["confidential", "internal"]
)
processor = DocumentProcessor(
provider=GeminiProvider(),
security=security
)
# Security validation is now active
result = processor.process("doc.pdf", "Extract", schema=MySchema)
Plugin Discovery & Registration¶
Auto-Registration (Recommended)¶
Simply inherit from base class with name argument:
Manual Registration¶
from strutex.plugins.registry import PluginRegistry
PluginRegistry.register("provider", MyProvider, name="my_provider")
List Registered Plugins¶
from strutex.plugins.registry import PluginRegistry
# List all providers
providers = PluginRegistry.list_names("provider")
print(providers) # ["gemini", "openai", "my_provider", ...]
# Get plugin info
info = PluginRegistry.get_plugin_info("provider", "my_provider")
print(info) # {"name": "my_provider", "priority": 50, ...}
Using CLI¶
# List all plugins
strutex plugins list
# Get plugin info
strutex plugins info provider my_provider
# Refresh plugin cache
strutex plugins refresh
Entry Points (Package Distribution)¶
Distribute your plugin as a package:
# pyproject.toml
[project.entry-points."strutex.providers"]
my_provider = "my_package.providers:MyCustomProvider"
[project.entry-points."strutex.extractors"]
xml = "my_package.extractors:XMLExtractor"
After pip install my_package, the plugins are automatically discovered!
Best Practices¶
| Practice | Why |
|---|---|
Set name in class definition |
Clear identification |
Add priority and cost |
Enables smart chain selection |
Implement health_check |
Enables availability checks |
Add version |
For compatibility tracking |
| Use type hints | Better IDE support |
| Add docstrings | Shows in plugins info |
Full Example: Azure OpenAI Provider¶
from strutex.plugins.base import Provider
from typing import Any, Optional
import os
class AzureOpenAIProvider(Provider, name="azure_openai"):
"""Azure OpenAI provider for enterprise deployments."""
priority = 30
cost = 0.01
version = "1.0.0"
def __init__(
self,
api_key: Optional[str] = None,
endpoint: Optional[str] = None,
deployment: str = "gpt-4",
api_version: str = "2024-02-01"
):
self.api_key = api_key or os.getenv("AZURE_OPENAI_API_KEY")
self.endpoint = endpoint or os.getenv("AZURE_OPENAI_ENDPOINT")
self.deployment = deployment
self.api_version = api_version
def process(
self,
file_path: str,
prompt: str,
schema: Any,
mime_type: str,
**kwargs
) -> dict:
from openai import AzureOpenAI
client = AzureOpenAI(
api_key=self.api_key,
azure_endpoint=self.endpoint,
api_version=self.api_version
)
# Read file and convert to base64 for images/PDFs
# ... implementation ...
response = client.chat.completions.create(
model=self.deployment,
messages=[{"role": "user", "content": prompt}],
response_format={"type": "json_object"}
)
import json
return json.loads(response.choices[0].message.content)
def health_check(self) -> bool:
return bool(self.api_key and self.endpoint)
Next Steps¶
| Want to... | Go to... |
|---|---|
| Learn hook system | Hooks Reference |
| See plugin API | Plugin System |
| Distribute package | Entry Points |