Metadata Filtering Strategies
Content: Metadata Filtering Strategies
User Intent
"How do I filter content by metadata? What filters are available?"
Operation
SDK Method:
queryContents()withfilterparameterGraphQL:
queryContentsquery with ContentFilterEntity Type: Content
Common Use Cases: Filtered search, date range queries, type filtering, collection filtering, entity-based queries
Metadata Filtering Overview
Graphlit provides powerful metadata filtering to narrow search results based on indexed properties. Filters can be combined with search queries or used alone.
TypeScript (Canonical)
import { Graphlit } from 'graphlit-client';
import { ContentTypes, EntityState, FileTypes, ObservableTypes, SearchTypes } from 'graphlit-client/dist/generated/graphql-types';
const graphlit = new Graphlit();
// Basic metadata filtering (no search query)
const filtered = await graphlit.queryContents({
filter: {
types: [ContentTypes.Email],
creationDateRange: {
from: '2024-01-01T00:00:00Z',
to: '2024-12-31T23:59:59Z'
}
}
});
// Combine search with filters
const searchAndFilter = await graphlit.queryContents({
search: "machine learning",
filter: {
types: [ContentTypes.File],
fileTypes: [FileTypes.Document],
collections: [{ id: 'research-papers' }]
}
});
// Complex filter with multiple criteria
const complex = await graphlit.queryContents({
filter: {
types: [ContentTypes.Email, ContentTypes.Message],
creationDateRange: { from: '2024-01-01' },
feeds: [{ id: 'slack-feed-id' }, { id: 'gmail-feed-id' }],
collections: [{ id: 'team-docs' }],
states: [EntityState.Enabled]
}
});
console.log(`Found ${complex.contents.results.length} results`);Available Filters
1. Content Type Filter
// Filter by content type
const emails = await graphlit.queryContents({
filter: {
types: [ContentTypes.Email]
}
});
// Multiple types (OR logic)
const communications = await graphlit.queryContents({
filter: {
types: [
ContentTypes.Email,
ContentTypes.Message,
ContentTypes.Post
]
}
});2. File Type Filter
// Filter by file type (for FILE content only)
const documents = await graphlit.queryContents({
filter: {
types: [ContentTypes.File],
fileTypes: [FileTypes.Document]
}
});
// Multiple file types
const media = await graphlit.queryContents({
filter: {
fileTypes: [
FileTypes.Image,
FileTypes.Audio,
FileTypes.Video
]
}
});3. Date Range Filters
// Creation date range
const thisYear = await graphlit.queryContents({
filter: {
creationDateRange: {
from: '2024-01-01T00:00:00Z',
to: '2024-12-31T23:59:59Z'
}
}
});
// Original date (document creation date)
const originalDate = await graphlit.queryContents({
filter: {
dateRange: {
from: '2023-01-01'
}
}
});
// Relative date (last N days) - ISO 8601 duration
const lastMonth = await graphlit.queryContents({
filter: {
inLast: 'P30D' // Last 30 days
}
});
const lastWeek = await graphlit.queryContents({
filter: {
createdInLast: 'P7D' // Created in last 7 days
}
});4. Collection Filter
// Single collection
const inCollection = await graphlit.queryContents({
filter: {
collections: [{ id: 'collection-id' }]
}
});
// Multiple collections (OR logic)
const multiCollection = await graphlit.queryContents({
filter: {
collections: [
{ id: 'engineering-docs' },
{ id: 'product-docs' },
{ id: 'design-docs' }
]
}
});
// Content WITHOUT any collection
const noCollection = await graphlit.queryContents({
filter: {
hasCollections: false
}
});
// Content WITH any collection
const hasCollection = await graphlit.queryContents({
filter: {
hasCollections: true
}
});5. Feed Filter
// Content from specific feed
const fromFeed = await graphlit.queryContents({
filter: {
feeds: [{ id: 'slack-feed-id' }]
}
});
// Multiple feeds
const multiFeeds = await graphlit.queryContents({
filter: {
feeds: [
{ id: 'slack-feed-id' },
{ id: 'gmail-feed-id' },
{ id: 'github-feed-id' }
]
}
});
// Content WITHOUT any feed (manually ingested)
const noFeed = await graphlit.queryContents({
filter: {
hasFeeds: false
}
});6. Workflow Filter
// Content processed by specific workflow
const byWorkflow = await graphlit.queryContents({
filter: {
workflows: [{ id: 'extraction-workflow-id' }]
}
});
// Multiple workflows
const multiWorkflows = await graphlit.queryContents({
filter: {
workflows: [
{ id: 'extraction-workflow' },
{ id: 'preparation-workflow' }
]
}
});
// Content WITHOUT any workflow
const noWorkflow = await graphlit.queryContents({
filter: {
hasWorkflows: false
}
});7. State Filter
// Only enabled content (default)
const enabled = await graphlit.queryContents({
filter: {
states: [EntityState.Enabled]
}
});
// Include disabled (hidden) content
const all = await graphlit.queryContents({
filter: {
states: [
EntityState.Enabled,
EntityState.Disabled
]
}
});
// Only archived content
const archived = await graphlit.queryContents({
filter: {
states: [EntityState.Archived]
}
});8. File Size Filter
// File size range (bytes)
const largeDocs = await graphlit.queryContents({
filter: {
fileSizeRange: {
from: 10000000, // 10MB
to: 100000000 // 100MB
}
}
});
// Files larger than X
const huge = await graphlit.queryContents({
filter: {
fileSizeRange: {
from: 100000000 // > 100MB
}
}
});9. MIME Type Filter
// Specific MIME type
const pdfs = await graphlit.queryContents({
filter: {
formats: ['application/pdf']
}
});
// Multiple MIME types
const images = await graphlit.queryContents({
filter: {
formats: [
'image/jpeg',
'image/png',
'image/gif'
]
}
});10. File Extension Filter
// Specific extensions
const codeFiles = await graphlit.queryContents({
filter: {
fileExtensions: ['ts', 'tsx', 'js', 'jsx']
}
});
const docs = await graphlit.queryContents({
filter: {
fileExtensions: ['pdf', 'docx', 'xlsx', 'pptx']
}
});11. Entity Filter (Observations)
// Content mentioning specific person
const withPerson = await graphlit.queryContents({
filter: {
observations: [{
type: ObservableTypes.Person,
observable: { id: 'person-id' }
}]
}
});
// Multiple entities (AND logic - content must mention ALL)
const multiEntity = await graphlit.queryContents({
filter: {
observations: [
{
type: ObservableTypes.Person,
observable: { id: 'person-1' }
},
{
type: ObservableTypes.Organization,
observable: { id: 'org-1' }
}
]
}
});
// Content with any observations
const hasEntities = await graphlit.queryContents({
filter: {
hasObservations: true
}
});
// Content without observations
const noEntities = await graphlit.queryContents({
filter: {
hasObservations: false
}
});12. Similarity Filter
// Find similar content
const similar = await graphlit.queryContents({
filter: {
similarContents: [{ id: 'content-id' }]
},
limit: 10
});Boolean Logic (OR / AND)
OR Logic Within Filter
// Multiple values in same filter = OR
const emailsOrMessages = await graphlit.queryContents({
filter: {
types: [
ContentTypes.Email, // OR
ContentTypes.Message
]
}
});AND Logic Across Filters
// Multiple filters = AND
const emailsFromSlack = await graphlit.queryContents({
filter: {
types: [ContentTypes.Message], // AND
feeds: [{ id: 'slack-feed' }], // AND
creationDateRange: { from: '2024-01-01' } // AND
}
});Complex OR/AND (Advanced)
// Complex boolean logic
const complex = await graphlit.queryContents({
filter: {
// Base criteria (AND)
types: [ContentTypes.Email],
creationDateRange: { from: '2024-01-01' },
// OR clause
or: [
{
feeds: [{ id: 'gmail-feed' }]
},
{
feeds: [{ id: 'outlook-feed' }]
}
],
// AND clause
and: [
{
collections: [{ id: 'important' }]
}
]
}
});
// This finds: Emails from 2024 AND
// (from Gmail OR Outlook) AND
// in 'important' collectionPerformance Considerations
Fast Filters (Indexed)
// These are fast (indexed):
const fast = await graphlit.queryContents({
filter: {
types: [ContentTypes.Email], // Indexed
fileTypes: [FileTypes.Document], // Indexed
creationDateRange: { from: '2024-01-01' }, // Indexed
feeds: [{ id: 'feed-id' }], // Indexed
collections: [{ id: 'collection-id' }], // Indexed
workflows: [{ id: 'workflow-id' }], // Indexed
states: [EntityState.Enabled], // Indexed
fileExtensions: ['pdf'], // Indexed
formats: ['application/pdf'] // Indexed
}
});Slower Filters (Requires Graph Query)
// Entity filters query graph database (slower)
const slower = await graphlit.queryContents({
filter: {
observations: [{ // Graph query
type: ObservableTypes.Person,
observable: { id: 'person-id' }
}]
}
});
// Still fast enough for production, just slower than pure index queriesMetadata filtering (snake_case)
filtered = await graphlit.queryContents( filter=ContentFilterInput( types=[ContentTypes.Email], creation_date_range=DateRangeInput( from_=datetime(2024, 1, 1).isoformat() ), feeds=[EntityReferenceInput(id='feed-id')] ) )
Complex filter
complex = await graphlit.queryContents( filter=ContentFilterInput( types=[ContentTypes.File], file_types=[FileTypes.Document], file_size_range=Int64RangeInput( from_=10000000 # 10MB ), collections=[EntityReferenceInput(id='collection-id')] ) )
**C#**:
```csharp
using Graphlit;
var client = new Graphlit();
// Metadata filtering (PascalCase)
var filtered = await graphlit.QueryContents(new ContentFilter
{
Types = new[] { ContentTypes.Email },
CreationDateRange = new DateRange
{
From = new DateTime(2024, 1, 1)
},
Feeds = new[] { new EntityReference { Id = "feed-id" } }
});
// Complex filter
var complex = await graphlit.QueryContents(new ContentFilter
{
Types = new[] { ContentTypes.File },
FileTypes = new[] { FileDocument },
FileSizeRange = new Int64Range
{
From = 10000000 // 10MB
},
Collections = new[] { new EntityReference { Id = "collection-id" } }
});Developer Hints
Combine Filters for Precision
// ✓ Narrow down results
const precise = await graphlit.queryContents({
search: "quarterly report",
filter: {
types: [ContentTypes.File],
fileTypes: [FileTypes.Document],
creationDateRange: { from: '2024-01-01', to: '2024-03-31' },
collections: [{ id: 'finance-docs' }]
}
});ISO 8601 Duration Format
// Relative dates use ISO 8601 durations
'P7D' // Last 7 days
'P30D' // Last 30 days
'P1M' // Last 1 month
'P3M' // Last 3 months
'P1Y' // Last 1 year
'PT24H' // Last 24 hoursEmpty Array vs Null
// No filter (all content)
await graphlit.queryContents({});
// Explicit empty array (same as no filter)
await graphlit.queryContents({
filter: {
types: [] // Same as not providing types
}
});
// Single type
await graphlit.queryContents({
filter: {
types: [ContentTypes.Email]
}
});Variations
1. Filter by Type and Date
const emailsThisYear = await graphlit.queryContents({
filter: {
types: [ContentTypes.Email],
creationDateRange: {
from: '2024-01-01'
}
}
});2. Filter by Multiple Collections
const docs = await graphlit.queryContents({
filter: {
collections: [
{ id: 'engineering' },
{ id: 'product' },
{ id: 'design' }
]
}
});3. Large Documents Only
const large = await graphlit.queryContents({
filter: {
types: [ContentTypes.File],
fileTypes: [FileTypes.Document],
fileSizeRange: {
from: 50000000 // > 50MB
}
}
});4. Recent Content from Specific Feed
const recent = await graphlit.queryContents({
filter: {
feeds: [{ id: 'slack-feed' }],
createdInLast: 'P7D' // Last 7 days
}
});5. Content with Entities
const withEntities = await graphlit.queryContents({
filter: {
hasObservations: true
}
});6. PDFs in Collection
const pdfs = await graphlit.queryContents({
filter: {
fileExtensions: ['pdf'],
collections: [{ id: 'research-papers' }]
}
});7. Search + Complex Filter
const searchFiltered = await graphlit.queryContents({
search: "machine learning",
searchType: SearchTypes.Hybrid,
filter: {
types: [ContentTypes.File],
fileTypes: [FileTypes.Document],
creationDateRange: {
from: '2024-01-01'
},
collections: [{ id: 'research' }],
fileSizeRange: {
from: 1000000, // > 1MB
to: 100000000 // < 100MB
}
}
});Common Issues & Solutions
Issue: No results with multiple filters Solution: Check if filters are too restrictive
// Too restrictive (might return 0 results)
await graphlit.queryContents({
filter: {
types: [ContentTypes.Email],
fileTypes: [FileTypes.Document] // Emails don't have fileType!
}
});
// Correct
await graphlit.queryContents({
filter: {
types: [ContentTypes.Email]
}
});Issue: Date filter not working Solution: Use correct ISO 8601 format
// Wrong format
creationDateRange: { from: '01/01/2024' }
// Correct format
creationDateRange: { from: '2024-01-01T00:00:00Z' }
// Also correct (date only)
creationDateRange: { from: '2024-01-01' }Issue: Want content from Feed A OR Feed B Solution: Multiple feeds in array = OR
// ✓ This is OR logic
await graphlit.queryContents({
filter: {
feeds: [
{ id: 'feed-a' },
{ id: 'feed-b' }
]
}
});Production Example
async function advancedFilter() {
console.log(`\n=== ADVANCED FILTERING ===`);
// Build filter programmatically
const filter: any = {
states: [EntityState.Enabled]
};
// Add date filter (last 90 days)
filter.createdInLast = 'P90D';
// Add type filter
filter.types = [
ContentTypes.Email,
ContentTypes.Message,
ContentTypes.File
];
// Add collection filter if specified
const collectionId = 'team-docs';
if (collectionId) {
filter.collections = [{ id: collectionId }];
}
// Execute query
const results = await graphlit.queryContents({
search: "project status",
filter
});
console.log(`\nFilters applied:`);
console.log(` - States: ${filter.states.join(', ')}`);
console.log(` - Created: Last 90 days`);
console.log(` - Types: ${filter.types.join(', ')}`);
if (filter.collections) {
console.log(` - Collection: ${filter.collections[0].id}`);
}
console.log(`\nResults: ${results.contents.results.length}`);
// Group by type
const byType = results.contents.results.reduce((acc, content) => {
acc[content.type] = (acc[content.type] || 0) + 1;
return acc;
}, {} as Record<string, number>);
console.log('\nBy Type:');
Object.entries(byType).forEach(([type, count]) => {
console.log(` ${type}: ${count}`);
});
}
await advancedFilter();Last updated
Was this helpful?