Document Metadata Queries
Content: Document Metadata Queries
User Intent
"How do I query documents by pages, author, file type, etc.?"
Operation
SDK Method:
queryContents()with document-specific patternsGraphQL:
queryContentsqueryEntity Type: Content (type: FILE, fileType: DOCUMENT)
Common Use Cases: Find PDFs, filter by page count, search by author, encrypted documents
Document Metadata Structure
Documents (PDFs, Word, Excel, PowerPoint) have metadata in the document field:
interface DocumentMetadata {
title: string;
subject: string;
summary: string;
author: string;
lastModifiedBy: string;
publisher: string;
description: string;
keywords: string[];
pageCount: number;
worksheetCount: number; // Excel
slideCount: number; // PowerPoint
wordCount: number;
lineCount: number;
paragraphCount: number;
isEncrypted: boolean;
hasDigitalSignature: boolean;
}TypeScript (Canonical)
import { Graphlit } from 'graphlit-client';
import { ContentTypes, FileTypes, SearchTypes } from 'graphlit-client/dist/generated/graphql-types';
const graphlit = new Graphlit();
// Query all documents
const allDocs = await graphlit.queryContents({
filter: {
types: [ContentTypes.File],
fileTypes: [FileTypes.Document]
}
});
// Search documents
const searchDocs = await graphlit.queryContents({
search: "quarterly report",
filter: {
types: [ContentTypes.File],
fileTypes: [FileTypes.Document]
}
});
// Recent documents
const recentDocs = await graphlit.queryContents({
filter: {
types: [ContentTypes.File],
fileTypes: [FileTypes.Document],
createdInLast: 'P30D'
}
});
console.log(`Found ${recentDocs.contents.results.length} recent documents`);
// Access document metadata
recentDocs.contents.results.forEach(doc => {
if (doc.document) {
console.log(`${doc.name}: ${doc.document.pageCount} pages`);
console.log(` Author: ${doc.document.author || 'Unknown'}`);
console.log(` Words: ${doc.document.wordCount}`);
}
});Query Patterns
1. Filter by Document Type
// PDFs only
const pdfs = await graphlit.queryContents({
filter: {
fileExtensions: ['pdf']
}
});
// Word documents
const wordDocs = await graphlit.queryContents({
filter: {
fileExtensions: ['docx', 'doc']
}
});
// Excel spreadsheets
const excel = await graphlit.queryContents({
filter: {
fileExtensions: ['xlsx', 'xls']
}
});
// PowerPoint presentations
const powerpoint = await graphlit.queryContents({
filter: {
fileExtensions: ['pptx', 'ppt']
}
});
console.log(`PDFs: ${pdfs.contents.results.length}`);
console.log(`Word: ${wordDocs.contents.results.length}`);
console.log(`Excel: ${excel.contents.results.length}`);
console.log(`PowerPoint: ${powerpoint.contents.results.length}`);2. Filter by Page Count
// Get all documents
const docs = await graphlit.queryContents({
filter: {
types: [ContentTypes.File],
fileTypes: [FileTypes.Document]
}
});
// Filter by page count
const shortDocs = docs.contents.results.filter(doc =>
doc.document && doc.document.pageCount < 10
);
const mediumDocs = docs.contents.results.filter(doc =>
doc.document && doc.document.pageCount >= 10 && doc.document.pageCount < 50
);
const longDocs = docs.contents.results.filter(doc =>
doc.document && doc.document.pageCount >= 50
);
console.log(`Short (<10 pages): ${shortDocs.length}`);
console.log(`Medium (10-50 pages): ${mediumDocs.length}`);
console.log(`Long (50+ pages): ${longDocs.length}`);
// Find longest documents
const sorted = docs.contents.results
.filter(doc => doc.document?.pageCount)
.sort((a, b) => (b.document?.pageCount || 0) - (a.document?.pageCount || 0));
console.log('\nTop 5 longest documents:');
sorted.slice(0, 5).forEach(doc => {
console.log(` ${doc.name}: ${doc.document?.pageCount} pages`);
});3. Filter by Author
// Search by author name
const byAuthor = await graphlit.queryContents({
search: "Kirk Marple",
searchType: SearchTypes.Keyword,
filter: {
types: [ContentTypes.File],
fileTypes: [FileTypes.Document]
}
});
// Get all docs and filter by author
const docs = await graphlit.queryContents({
filter: {
types: [ContentTypes.File],
fileTypes: [FileTypes.Document]
}
});
const kirkDocs = docs.contents.results.filter(doc =>
doc.document?.author?.toLowerCase().includes('kirk')
);
console.log(`Documents by Kirk: ${kirkDocs.length}`);
// Count documents by author
const byAuthorCount = new Map<string, number>();
docs.contents.results.forEach(doc => {
const author = doc.document?.author || 'Unknown';
byAuthorCount.set(author, (byAuthorCount.get(author) || 0) + 1);
});
console.log('\nTop 10 authors:');
Array.from(byAuthorCount.entries())
.sort((a, b) => b[1] - a[1])
.slice(0, 10)
.forEach(([author, count]) => {
console.log(` ${author}: ${count} documents`);
});4. Filter by File Size
// Large documents (> 10MB)
const largeDocs = await graphlit.queryContents({
filter: {
types: [ContentTypes.File],
fileTypes: [FileTypes.Document],
fileSizeRange: {
from: 10000000 // 10MB in bytes
}
}
});
// Medium documents (1-10MB)
const mediumDocs = await graphlit.queryContents({
filter: {
types: [ContentTypes.File],
fileTypes: [FileTypes.Document],
fileSizeRange: {
from: 1000000, // 1MB
to: 10000000 // 10MB
}
}
});
// Small documents (< 1MB)
const smallDocs = await graphlit.queryContents({
filter: {
types: [ContentTypes.File],
fileTypes: [FileTypes.Document],
fileSizeRange: {
to: 1000000 // 1MB
}
}
});
console.log(`Large (>10MB): ${largeDocs.contents.results.length}`);
console.log(`Medium (1-10MB): ${mediumDocs.contents.results.length}`);
console.log(`Small (<1MB): ${smallDocs.contents.results.length}`);5. Excel-Specific Queries
// Get Excel files
const excel = await graphlit.queryContents({
filter: {
fileExtensions: ['xlsx', 'xls']
}
});
// Filter by worksheet count
const multiSheet = excel.contents.results.filter(doc =>
doc.document && doc.document.worksheetCount > 1
);
console.log(`Excel files: ${excel.contents.results.length}`);
console.log(`Multi-sheet workbooks: ${multiSheet.length}`);
// Find largest workbooks
const sorted = excel.contents.results
.filter(doc => doc.document?.worksheetCount)
.sort((a, b) => (b.document?.worksheetCount || 0) - (a.document?.worksheetCount || 0));
console.log('\nLargest workbooks:');
sorted.slice(0, 5).forEach(doc => {
console.log(` ${doc.name}: ${doc.document?.worksheetCount} worksheets`);
});6. PowerPoint-Specific Queries
// Get PowerPoint files
const ppt = await graphlit.queryContents({
filter: {
fileExtensions: ['pptx', 'ppt']
}
});
// Filter by slide count
const shortDecks = ppt.contents.results.filter(doc =>
doc.document && doc.document.slideCount < 20
);
const longDecks = ppt.contents.results.filter(doc =>
doc.document && doc.document.slideCount >= 50
);
console.log(`PowerPoint files: ${ppt.contents.results.length}`);
console.log(`Short decks (<20 slides): ${shortDecks.length}`);
console.log(`Long decks (50+ slides): ${longDecks.length}`);
// Average slide count
const avgSlides = ppt.contents.results
.filter(doc => doc.document?.slideCount)
.reduce((sum, doc) => sum + (doc.document?.slideCount || 0), 0) /
ppt.contents.results.filter(doc => doc.document?.slideCount).length;
console.log(`Average slides: ${avgSlides.toFixed(1)}`);7. Encrypted Documents
// Get all documents
const docs = await graphlit.queryContents({
filter: {
types: [ContentTypes.File],
fileTypes: [FileTypes.Document]
}
});
// Filter encrypted
const encrypted = docs.contents.results.filter(doc =>
doc.document?.isEncrypted === true
);
// Filter digitally signed
const signed = docs.contents.results.filter(doc =>
doc.document?.hasDigitalSignature === true
);
console.log(`Encrypted documents: ${encrypted.length}`);
console.log(`Digitally signed: ${signed.length}`);
// List encrypted docs
if (encrypted.length > 0) {
console.log('\nEncrypted documents:');
encrypted.forEach(doc => {
console.log(` ${doc.name}`);
});
}8. Content Analysis
// Get documents
const docs = await graphlit.queryContents({
filter: {
types: [ContentTypes.File],
fileTypes: [FileTypes.Document]
}
});
// Word count statistics
const wordCounts = docs.contents.results
.filter(doc => doc.document?.wordCount)
.map(doc => doc.document?.wordCount || 0);
const avgWords = wordCounts.reduce((a, b) => a + b, 0) / wordCounts.length;
const maxWords = Math.max(...wordCounts);
const minWords = Math.min(...wordCounts);
console.log('Word count statistics:');
console.log(` Average: ${avgWords.toFixed(0)} words`);
console.log(` Max: ${maxWords} words`);
console.log(` Min: ${minWords} words`);
// Find most content-rich documents
const sorted = docs.contents.results
.filter(doc => doc.document?.wordCount)
.sort((a, b) => (b.document?.wordCount || 0) - (a.document?.wordCount || 0));
console.log('\nMost content-rich documents:');
sorted.slice(0, 5).forEach(doc => {
console.log(` ${doc.name}: ${doc.document?.wordCount} words`);
});Query documents
docs = await graphlit.queryContents( filter=ContentFilterInput( types=[ContentTypes.File], file_types=[FileTypes.Document] ) )
PDFs only
pdfs = await graphlit.queryContents( filter=ContentFilterInput( file_extensions=['pdf'] ) )
Access metadata
for doc in docs.contents.results: if doc.document: print(f"{doc.name}: {doc.document.page_count} pages") print(f" Author: {doc.document.author}") print(f" Words: {doc.document.word_count}")
**C#**:
```csharp
using Graphlit;
var client = new Graphlit();
// Query documents
var docs = await graphlit.QueryContents(new ContentFilter
{
Filter = new ContentCriteria
{
Types = new[] { ContentTypes.File },
FileTypes = new[] { FileDocument }
}
});
// PDFs only
var pdfs = await graphlit.QueryContents(new ContentFilter
{
Filter = new ContentCriteria
{
FileExtensions = new[] { "pdf" }
}
});
// Access metadata
foreach (var doc in docs.Contents.Results)
{
if (doc.Document != null)
{
Console.WriteLine($"{doc.Name}: {doc.Document.PageCount} pages");
Console.WriteLine($" Author: {doc.Document.Author}");
Console.WriteLine($" Words: {doc.Document.WordCount}");
}
}Developer Hints
Page Count is Automatic
// Page count automatically detected for:
// - PDF files
// - Word documents
// - PowerPoint (slideCount instead)
// - Excel (worksheetCount instead)
if (doc.document?.pageCount) {
console.log(`${doc.document.pageCount} pages`);
}Excel vs Word vs PowerPoint
// Excel: worksheetCount
if (doc.document?.worksheetCount) {
console.log(`Excel: ${doc.document.worksheetCount} worksheets`);
}
// PowerPoint: slideCount
if (doc.document?.slideCount) {
console.log(`PowerPoint: ${doc.document.slideCount} slides`);
}
// Word/PDF: pageCount
if (doc.document?.pageCount) {
console.log(`Document: ${doc.document.pageCount} pages`);
}Author from Document Properties
// Author comes from document properties
// Set in Word/PDF metadata
// May be null if not set
const author = doc.document?.author || 'Unknown';Common Issues & Solutions
Issue: Need to filter by exact page count Solution: Query all, filter client-side
const docs = await graphlit.queryContents({
filter: {
types: [ContentTypes.File],
fileTypes: [FileTypes.Document]
}
});
const exactly10Pages = docs.contents.results.filter(
d => d.document?.pageCount === 10
);Issue: Want PDFs only Solution: Use fileExtensions filter
const pdfs = await graphlit.queryContents({
filter: {
fileExtensions: ['pdf']
}
});Issue: Need to count documents by file extension Solution: Query and aggregate
const docs = await graphlit.queryContents({
filter: {
types: [ContentTypes.File],
fileTypes: [FileTypes.Document]
}
});
const byExtension = new Map<string, number>();
docs.contents.results.forEach(doc => {
const ext = doc.fileExtension || 'unknown';
byExtension.set(ext, (byExtension.get(ext) || 0) + 1);
});Production Example
async function analyzeDocumentLibrary() {
console.log('\n=== DOCUMENT LIBRARY ANALYSIS ===\n');
// Get all documents
const docs = await graphlit.queryContents({
filter: {
types: [ContentTypes.File],
fileTypes: [FileTypes.Document]
},
limit: 1000
});
console.log(`Total documents: ${docs.contents.results.length}`);
// By file type
const byType = new Map<string, number>();
docs.contents.results.forEach(doc => {
const ext = doc.fileExtension || 'unknown';
byType.set(ext, (byType.get(ext) || 0) + 1);
});
console.log('\nDocument types:');
Array.from(byType.entries())
.sort((a, b) => b[1] - a[1])
.forEach(([ext, count]) => {
console.log(` .${ext}: ${count}`);
});
// Page statistics
const withPages = docs.contents.results.filter(d => d.document?.pageCount);
const totalPages = withPages.reduce((sum, d) => sum + (d.document?.pageCount || 0), 0);
const avgPages = totalPages / withPages.length;
console.log(`\nPage statistics:`);
console.log(` Documents with pages: ${withPages.length}`);
console.log(` Total pages: ${totalPages.toLocaleString()}`);
console.log(` Average pages: ${avgPages.toFixed(1)}`);
// Size statistics
const totalSize = docs.contents.results.reduce((sum, d) => sum + (d.fileSize || 0), 0);
const avgSize = totalSize / docs.contents.results.length;
console.log(`\nSize statistics:`);
console.log(` Total size: ${(totalSize / 1024 / 1024).toFixed(2)} MB`);
console.log(` Average size: ${(avgSize / 1024 / 1024).toFixed(2)} MB`);
// Security
const encrypted = docs.contents.results.filter(d => d.document?.isEncrypted);
const signed = docs.contents.results.filter(d => d.document?.hasDigitalSignature);
console.log(`\nSecurity:`);
console.log(` Encrypted: ${encrypted.length}`);
console.log(` Digitally signed: ${signed.length}`);
// Top authors
const authors = new Map<string, number>();
docs.contents.results.forEach(doc => {
const author = doc.document?.author || 'Unknown';
authors.set(author, (authors.get(author) || 0) + 1);
});
console.log(`\nTop 10 authors:`);
Array.from(authors.entries())
.sort((a, b) => b[1] - a[1])
.slice(0, 10)
.forEach(([author, count]) => {
console.log(` ${author}: ${count} documents`);
});
// Recent activity
const last30Days = docs.contents.results.filter(doc => {
const age = Date.now() - new Date(doc.creationDate).getTime();
return age < 30 * 24 * 60 * 60 * 1000;
});
console.log(`\nRecent activity (last 30 days): ${last30Days.length} documents`);
}
await analyzeDocumentLibrary();Last updated
Was this helpful?