Extract Medical Entities from Clinical Content
User Intent
Operation
Prerequisites
Complete Code Example (TypeScript)
import { Graphlit } from 'graphlit-client';
import { ModelServiceTypes, ObservableTypes, SpecificationTypes } from 'graphlit-client/dist/generated/graphql-types';
import {
FilePreparationServiceTypes,
ExtractionServiceTypes,
ObservableTypes,
ModelServiceTypes,
OpenAIModels
} from 'graphlit-client/dist/generated/graphql-types';
const graphlit = new Graphlit();
console.log('=== Building Medical Knowledge Graph ===\n');
// Step 1: Create high-quality medical extraction workflow
console.log('Step 1: Creating medical entity extraction workflow...');
// Use GPT-4 for medical accuracy
const spec = await graphlit.createSpecification({
name: "GPT-4 Medical Extraction",
type: SpecificationTypes.Completion,
serviceType: ModelServiceTypes.OpenAi,
openAI: {
model: OpenAIModels.Gpt4, // Best quality for medical
temperature: 0.1 // Low temperature for consistency
}
});
const workflow = await graphlit.createWorkflow({
name: "Medical Entity Extraction",
preparation: {
jobs: [{
connector: {
type: FilePreparationServiceTypes.ModelDocument // PDFs, Word, etc.
}
}]
},
extraction: {
jobs: [{
connector: {
type: EntityExtractionServiceTypes.ModelText,
extractedTypes: [
// All 12 medical entity types
ObservableTypes.MedicalCondition, // Diseases, symptoms, diagnoses
ObservableTypes.MedicalDrug, // Medications, pharmaceuticals
ObservableMedicalDrugClass, // Drug categories (antibiotics, etc.)
ObservableTypes.MedicalProcedure, // Surgeries, treatments
ObservableTypes.MedicalTest, // Lab tests, diagnostics
ObservableTypes.MedicalStudy, // Clinical trials, research
ObservableMedicalDevice, // Medical equipment, implants
ObservableMedicalTherapy, // Therapies, treatments
ObservableMedicalGuideline, // Clinical guidelines, protocols
ObservableMedicalIndication, // Reasons for treatment
ObservableMedicalContraindication, // Reasons to avoid treatment
// Also extract non-medical entities for context
ObservableTypes.Person, // Patients, doctors, researchers
ObservableTypes.Organization // Hospitals, pharma companies
]
}
}]
},
specification: { id: spec.createSpecification.id }
});
console.log(`✓ Workflow: ${workflow.createWorkflow.id}\n`);
// Step 2: Ingest clinical research paper
console.log('Step 2: Ingesting clinical research paper...');
const paper = await graphlit.ingestUri('https://example.com/papers/clinical-trial.pdf', "Clinical Trial: Drug X for Condition Y", undefined, undefined, undefined, { id: workflow.createWorkflow.id });
console.log(`✓ Ingested: ${paper.ingestUri.id}\n`);
// Step 3: Wait for extraction
console.log('Step 3: Extracting medical entities...');
let isDone = false;
while (!isDone) {
const status = await graphlit.isContentDone(paper.ingestUri.id);
isDone = status.isContentDone.result;
if (!isDone) {
console.log(' Processing...');
await new Promise(resolve => setTimeout(resolve, 3000));
}
}
console.log('✓ Extraction complete\n');
// Step 4: Retrieve extracted entities
console.log('Step 4: Retrieving medical entities...');
const paperDetails = await graphlit.getContent(paper.ingestUri.id);
const content = paperDetails.content;
console.log(`✓ Document: ${content.name}`);
console.log(` Pages: ${content.document?.pageCount}`);
console.log(` Total entities: ${content.observations?.length || 0}\n`);
// Step 5: Analyze by medical entity type
console.log('Step 5: Analyzing medical entities...\n');
const medicalTypes = [
ObservableTypes.MedicalCondition,
ObservableTypes.MedicalDrug,
ObservableMedicalDrugClass,
ObservableTypes.MedicalProcedure,
ObservableTypes.MedicalTest,
ObservableTypes.MedicalStudy,
ObservableMedicalDevice,
ObservableMedicalTherapy
];
medicalforEach(type => {
const entities = content.observations?.filter(obs => obs.type === type) || [];
const unique = new Set(entities.map(e => e.observable.name));
if (unique.size > 0) {
console.log(`${type} (${unique.size}):`);
Array.from(unique).slice(0, 5).forEach(name => {
console.log(` - ${name}`);
});
if (unique.size > 5) {
console.log(` ... and ${unique.size - 5} more`);
}
console.log();
}
});
// Step 6: Build drug-condition relationships
console.log('Step 6: Analyzing drug-condition relationships...\n');
const drugs = content.observations?.filter(obs =>
obs.type === ObservableTypes.MedicalDrug
) || [];
const conditions = content.observations?.filter(obs =>
obs.type === ObservableTypes.MedicalCondition
) || [];
// Co-occurrence analysis
const relationships: Array<{ drug: string; condition: string; confidence: number }> = [];
drugs.forEach(drug => {
conditions.forEach(condition => {
// Check if they appear on same pages
const drugPages = new Set(drug.occurrences?.map(occ => occ.pageIndex));
const condPages = new Set(condition.occurrences?.map(occ => occ.pageIndex));
const sharedPages = Array.from(drugPages).filter(p => condPages.has(p));
if (sharedPages.length > 0) {
// Calculate average confidence
const avgConf = (
(drug.occurrences?.reduce((sum, occ) => sum + occ.confidence, 0) || 0) /
(drug.occurrences?.length || 1) +
(condition.occurrences?.reduce((sum, occ) => sum + occ.confidence, 0) || 0) /
(condition.occurrences?.length || 1)
) / 2;
relationships.push({
drug: drug.observable.name,
condition: condition.observable.name,
confidence: avgConf
});
}
});
});
console.log('Drug-Condition relationships:');
relationships
.sort((a, b) => b.confidence - a.confidence)
.slice(0, 5)
.forEach(({ drug, condition, confidence }) => {
console.log(` ${drug} ↔ ${condition} (confidence: ${confidence.toFixed(2)})`);
});
// Step 7: Query medical knowledge graph
console.log('\nStep 7: Querying medical knowledge graph...\n');
// Get all conditions across all documents
const allConditions = await graphlit.queryObservables({
filter: { types: [ObservableTypes.MedicalCondition] }
});
console.log(`Total conditions in knowledge graph: ${allConditions.observables.results.length}`);
// Get all drugs
const allDrugs = await graphlit.queryObservables({
filter: { types: [ObservableTypes.MedicalDrug] }
});
console.log(`Total drugs in knowledge graph: ${allDrugs.observables.results.length}`);
console.log('\n✓ Medical knowledge graph complete!');Step-by-Step Explanation
Step 1: Understanding Medical Entity Types
Step 2: Model Selection for Medical Content
Step 3: Clinical Document Types
Step 4: Medical Entity Relationships
Step 5: Confidence Scoring for Medical Entities
Configuration Options
Precision vs Recall Tradeoff
Domain-Specific Extraction
Variations
Variation 1: Drug Information Database
Variation 2: Clinical Trial Analysis
Variation 3: Adverse Event Monitoring
Variation 4: Medical Literature Review
Variation 5: Treatment Protocol Assistant
Common Issues & Solutions
Issue: Medical Abbreviations Not Recognized
Issue: False Positives on Common Terms
Issue: Missing Drug-Condition Relationships
Issue: HIPAA Compliance Concerns
Developer Hints
Medical Entity Quality by Source
Model Recommendations by Use Case
Confidence Thresholds
HIPAA and Privacy
Performance Optimization
Production Patterns
Healthcare Use Cases
Compliance Considerations
Last updated