OpenAI API Gateway PII Handling

Protect personally identifiable information in AI interactions. Detect, mask, and securely handle sensitive data before it reaches OpenAI APIs with enterprise-grade privacy controls.

PII Detection & Masking
Original Request
Hi, my name is John Smith and my email is john.smith@company.com. My SSN is 123-45-6789 and my card ends in 4242.
Protected Request
Hi, my name is [PERSON_1] and my email is [EMAIL_1]. My SSN is [SSN_1] and my card ends in [CARD_1].

PII Protection Features

Comprehensive PII handling capabilities.

🔍

Automatic Detection

Identify 50+ types of PII automatically using pattern matching and ML models with high accuracy.

🎭

Smart Masking

Replace sensitive data with reversible tokens while preserving context for AI understanding.

🔓

Reversible Redaction

Restore original values in responses for authorized users while keeping AI providers blind.

📊

PII Reporting

Generate compliance reports showing what PII was detected and how it was protected.

⚙️

Custom Patterns

Define organization-specific PII patterns for proprietary data formats and internal identifiers.

🛡️

Zero-Trust Processing

Process AI requests without exposing raw PII to external services or logging systems.

Detectable PII Types

Comprehensive coverage of personally identifiable information.

👤
Names
John Smith, Jane Doe
📧
Email
user@domain.com
📞
Phone
+1 (555) 123-4567
🏠
Address
123 Main St, City
🆔
SSN
123-45-6789
💳
Credit Card
**** **** **** 4242
🏥
Medical ID
Policy numbers
🌐
IP Address
192.168.1.1

Implementation Guide

Build PII protection into your OpenAI gateway.

pii_handler.py Python
class PIIHandler:
    """Handle PII detection and masking for OpenAI requests"""
    
    def __init__(self):
        self.patterns = self.load_patterns()
        self.ml_detector = load_pii_model()
        self.token_store = SecureTokenStore()
        
    def load_patterns(self) -> dict:
        """Load regex patterns for PII detection"""
        return {
            'email': r'[\w\.-]+@[\w\.-]+\.\w+',
            'ssn': r'\d{3}-\d{2}-\d{4}',
            'phone': r'(\+\d{1,3}[-.]?)?\(?\d{3}\)?[-.]?\d{3}[-.]?\d{4}',
            'credit_card': r'\d{4}[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4}',
            'ip_address': r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}'
        }
    
    async def detect_pii(self, text: str) -> List[PIIEntity]:
        """Detect all PII in text"""
        entities = []
        
        # Pattern-based detection
        for pii_type, pattern in self.patterns.items():
            matches = re.finditer(pattern, text)
            for match in matches:
                entities.append(PIIEntity(
                    type=pii_type,
                    value=match.group(),
                    start=match.start(),
                    end=match.end()
                ))
        
        # ML-based detection for names, addresses
        ml_entities = await self.ml_detector.detect(text)
        entities.extend(ml_entities)
        
        return entities
    
    async def mask_pii(
        self,
        text: str,
        entities: List[PIIEntity]
    ) -> Tuple[str, dict]:
        """Replace PII with tokens and store mapping"""
        
        mapping = {}
        masked_text = text
        
        # Sort by position (reverse) to preserve indices
        sorted_entities = sorted(entities, key=lambda e: e.start, reverse=True)
        
        for idx, entity in enumerate(sorted_entities, 1):
            # Generate token
            token = f"[{entity.type.upper()}_{idx}]"
            
            # Store mapping
            mapping[token] = entity.value
            
            # Replace in text
            masked_text = (
                masked_text[:entity.start] + 
                token + 
                masked_text[entity.end:]
            )
        
        # Store mapping securely
        request_id = str(uuid.uuid4())
        await self.token_store.store(request_id, mapping)
        
        return masked_text, request_id
    
    async def unmask_response(
        self,
        response: str,
        request_id: str
    ) -> str:
        """Restore original PII values in response"""
        
        # Retrieve mapping
        mapping = await self.token_store.get(request_id)
        
        if not mapping:
            return response
        
        # Replace tokens with original values
        unmasked = response
        for token, original in mapping.items():
            unmasked = unmasked.replace(token, original)
        
        # Clean up stored mapping
        await self.token_store.delete(request_id)
        
        return unmasked

class OpenAIPIIGateway:
    """OpenAI gateway with PII protection"""
    
    def __init__(self, openai_client, pii_handler):
        self.client = openai_client
        self.pii = pii_handler
        
    async def complete(self, request: dict) -> dict:
        """Process request with PII protection"""
        
        # Extract text from request
        text = request.get('prompt', '')
        messages = request.get('messages', [])
        
        # Detect and mask PII in all text
        all_text = text + ' '.join(
            m.get('content', '') for m in messages
        )
        
        entities = await self.pii.detect_pii(all_text)
        
        if entities:
            # Log PII detection
            logger.info(f"Detected {len(entities)} PII entities")
            
            # Mask PII
            masked_text, request_id = await self.pii.mask_pii(
                all_text, entities
            )
            
            # Update request with masked text
            request['prompt'] = masked_text
            for msg in messages:
                msg['content'] = masked_text
        
        # Call OpenAI
        response = await self.client.chat.completions.create(**request)
        
        # Unmask response if needed
        if entities:
            response_text = response.choices[0].message.content
            unmasked = await self.pii.unmask_response(
                response_text, request_id
            )
            response.choices[0].message.content = unmasked
        
        return response

Partner Resources