diff --git a/docs/blog/posts/announcing-instructor-responses-support.md b/docs/blog/posts/announcing-instructor-responses-support.md
index f0fc8c43d..b3f3840e1 100644
--- a/docs/blog/posts/announcing-instructor-responses-support.md
+++ b/docs/blog/posts/announcing-instructor-responses-support.md
@@ -23,7 +23,7 @@ We're excited to announce Instructor's integration with OpenAI's new Responses A
 
 The Responses API represents a significant shift in how we interact with OpenAI models. With Instructor's integration, you can leverage this new API with our familiar, type-safe interface.
 
-For our full documentation of the features we support, check out our full [documentation here](../../integrations/openai-responses.md).
+For our full documentation of the features we support, check out our full [OpenAI integration guide](../../integrations/openai.md).
 
 Getting started is now easier than ever. With our unified provider interface, you can initialize your client with a single line of code. This means less time dealing with configuration and more time building features that matter.
 
@@ -150,6 +150,6 @@ To start using the new Responses API integration, update to the latest version o
 
 This integration represents a significant step forward in making LLM development more accessible and powerful. We're excited to see what you'll build with these new capabilities.
 
-For more detailed information about using the Responses API with Instructor, check out our [OpenAI Responses API Guide](../../integrations/openai-responses.md).
+For more detailed information about using the Responses API with Instructor, check out our [OpenAI integration guide](../../integrations/openai.md).
 
 Happy coding!
diff --git a/docs/blog/posts/announcing-unified-provider-interface.md b/docs/blog/posts/announcing-unified-provider-interface.md
index 3ec2db39c..413838080 100644
--- a/docs/blog/posts/announcing-unified-provider-interface.md
+++ b/docs/blog/posts/announcing-unified-provider-interface.md
@@ -202,10 +202,11 @@ These are areas where `instructor` can continue to reduce friction for developer
 We encourage you to try `from_provider()` in your projects, particularly when experimenting with multiple LLMs. Feedback and suggestions for additional providers or features are always welcome.
 
 ## Related Documentation
-- [Provider Patching](/concepts/patching) - How provider integration works
-- [All Integrations](/integrations/) - Supported provider list
+- [Provider Patching](../../concepts/patching.md) - How provider integration works
+- [All Integrations](../../integrations/index.md) - Supported provider list
 
 ## See Also
-- [String-Based Initialization](string-based-init) - Alternative init method
-- [Framework Comparison](best_framework) - Multi-provider advantages
-- [Getting Started](introduction) - Quick start guide
+
+- [String-Based Initialization](string-based-init.md) - Alternative init method
+- [Framework Comparison](best_framework.md) - Multi-provider advantages
+- [Getting Started](introduction.md) - Quick start guide
diff --git a/docs/blog/posts/anthropic-prompt-caching.md b/docs/blog/posts/anthropic-prompt-caching.md
index 600716c48..f76c5f6a8 100644
--- a/docs/blog/posts/anthropic-prompt-caching.md
+++ b/docs/blog/posts/anthropic-prompt-caching.md
@@ -338,10 +338,10 @@ for _ in range(2):
 ```
 
 ## Related Documentation
-- [Caching Strategies](/concepts/caching) - General caching concepts
-- [Anthropic Integration](/integrations/anthropic) - Full Anthropic guide
+- [Caching Strategies](../../concepts/caching.md) - General caching concepts
+- [Anthropic Integration](../../integrations/anthropic.md) - Full Anthropic guide
 
 ## See Also
-- [Anthropic Structured Outputs](structured-output-anthropic) - Use with caching
-- [Response Caching](caching) - General caching strategies
-- [Performance Monitoring](logfire) - Track cache performance
\ No newline at end of file
+- [Anthropic Structured Outputs](structured-output-anthropic.md) - Use with caching
+- [Response Caching](caching.md) - General caching strategies
+- [Performance Monitoring](logfire.md) - Track cache performance
\ No newline at end of file
diff --git a/docs/blog/posts/caching.md b/docs/blog/posts/caching.md
index 41543d7fc..ecac056f3 100644
--- a/docs/blog/posts/caching.md
+++ b/docs/blog/posts/caching.md
@@ -588,7 +588,7 @@ Here's a **validated** real-world performance comparison across different cachin
 
 !!! success "Validated Performance"
 
-    These numbers are from actual test runs using our comprehensive [caching examples](../../examples/caching/). The `functools.cache` result showing **207,636x improvement** demonstrates the dramatic impact of in-memory caching.
+    These numbers are from actual test runs using our comprehensive [caching examples](https://github.com/jxnl/instructor/tree/main/examples/caching). The `functools.cache` result showing **207,636x improvement** demonstrates the dramatic impact of in-memory caching.
 
 ### Cost Impact Analysis
 
@@ -945,7 +945,7 @@ These are **actual results** from running the examples, not theoretical projecti
 - [Dictionary Operations](../../concepts/dictionary_operations.md) - Low-level optimization techniques
 
 ### Working Examples
-- [**Caching Examples**](../../examples/caching/) - **Complete working examples** validating all strategies
+- [**Caching Examples**](https://github.com/jxnl/instructor/tree/main/examples/caching) - **Complete working examples** validating all strategies
 - [Streaming Responses](../../concepts/partial.md) - Combine caching with real-time streaming
 - [Async Processing](../../blog/posts/learn-async.md) - Async patterns for high-throughput applications
 - [Batch Processing](../../examples/batch_job_oai.md) - Efficient batch operations with caching
diff --git a/docs/blog/posts/chat-with-your-pdf-with-gemini.md b/docs/blog/posts/chat-with-your-pdf-with-gemini.md
index 83c674a8e..55c84d234 100644
--- a/docs/blog/posts/chat-with-your-pdf-with-gemini.md
+++ b/docs/blog/posts/chat-with-your-pdf-with-gemini.md
@@ -108,11 +108,11 @@ By combining Gemini's multimodal capabilities with Instructor's structured outpu
 No more wrestling with parsing rules, managing embeddings, or building complex pipelines - just define your data model and let the LLM do the heavy lifting.
 
 ## Related Documentation
-- [Multimodal Processing](/concepts/multimodal) - Core multimodal concepts
+- [Multimodal Processing](../../concepts/multimodal.md) - Core multimodal concepts
 
 ## See Also
-- [Gemini Multimodal Features](multimodal-gemini) - Full Gemini capabilities
-- [PDF Citation Generation](generating-pdf-citations) - Extract citations from PDFs
-- [RAG and Beyond](rag-and-beyond) - Advanced document processing
+- [Gemini Multimodal Features](multimodal-gemini.md) - Full Gemini capabilities
+- [PDF Citation Generation](generating-pdf-citations.md) - Extract citations from PDFs
+- [RAG and Beyond](rag-and-beyond.md) - Advanced document processing
 
 If you liked this, give `instructor` a try today and see how much easier structured outputs makes working with LLMs become. [Get started with Instructor today!](../../index.md)
diff --git a/docs/blog/posts/citations.md b/docs/blog/posts/citations.md
index 2c2d6ed5a..5f4324421 100644
--- a/docs/blog/posts/citations.md
+++ b/docs/blog/posts/citations.md
@@ -270,12 +270,12 @@ except ValidationError as e:
 ```
 
 ## Related Documentation
-- [Validation Guide](/concepts/validation) - Validate citations
+- [Validation Guide](../../concepts/validation.md) - Validate citations
 
 ## See Also
-- [RAG Techniques](rag-and-beyond) - Use citations in RAG
-- [PDF Citations](generating-pdf-citations) - Extract from PDFs
-- [Validation Basics](validation-part1) - Ensure citation quality
+- [RAG Techniques](rag-and-beyond.md) - Use citations in RAG
+- [PDF Citations](generating-pdf-citations.md) - Extract from PDFs
+- [Validation Basics](validation-part1.md) - Ensure citation quality
 
 ## Conclusion
 
diff --git a/docs/blog/posts/introduction.md b/docs/blog/posts/introduction.md
index 15347de64..3b0de91fb 100644
--- a/docs/blog/posts/introduction.md
+++ b/docs/blog/posts/introduction.md
@@ -220,6 +220,6 @@ Instructor, with Pydantic, simplifies interaction with language models. It is us
 
 - [Why Instructor is the Best Library](best_framework.md) - Learn about Instructor's philosophy and advantages
 - [Structured Outputs and Prompt Caching with Anthropic](structured-output-anthropic.md) - See how Instructor works with Claude
-- [Chain of Thought Example](../../examples/chain-of-thought.md) - Implement reasoning in your models
+- [Chain of Density Tutorial](../../tutorials/6-chain-of-density.ipynb) - Learn advanced prompting techniques
 
 If you enjoy the content or want to try out `instructor` please check out the [github](https://github.com/jxnl/instructor) and give us a star!
\ No newline at end of file
diff --git a/docs/blog/posts/learn-async.md b/docs/blog/posts/learn-async.md
index 5421dd2fe..167acda3c 100644
--- a/docs/blog/posts/learn-async.md
+++ b/docs/blog/posts/learn-async.md
@@ -251,4 +251,4 @@ Here are typical performance results when processing 7 items:
 
 ---
 
-**Next Steps**: Learn about [error handling patterns](../concepts/error_handling.md) or explore [rate limiting with tenacity](../concepts/retrying.md) for production applications.
\ No newline at end of file
+**Next Steps**: Learn about [error handling patterns](../../concepts/error_handling.md) or explore [rate limiting with tenacity](../../concepts/retrying.md) for production applications.
\ No newline at end of file
diff --git a/docs/blog/posts/llm-as-reranker.md b/docs/blog/posts/llm-as-reranker.md
index 62e15b586..d7c39f438 100644
--- a/docs/blog/posts/llm-as-reranker.md
+++ b/docs/blog/posts/llm-as-reranker.md
@@ -193,6 +193,6 @@ class Label(BaseModel):
 This will automatically check that the `chunk_id` is present in the `chunks` list and raise a `ValueError` if it is not, where `context` is the context dictionary that we passed into the `rerank_results` function.
 
 ## See Also
-- [RAG and Beyond](rag-and-beyond) - Comprehensive RAG guide
-- [Validation Fundamentals](validation-part1) - Validate ranking scores
-- [Performance Monitoring](logfire) - Track reranking performance
+- [RAG and Beyond](rag-and-beyond.md) - Comprehensive RAG guide
+- [Validation Fundamentals](validation-part1.md) - Validate ranking scores
+- [Performance Monitoring](logfire.md) - Track reranking performance
diff --git a/docs/blog/posts/llms-txt-support.md b/docs/blog/posts/llms-txt-support.md
new file mode 100644
index 000000000..f5d6ec0f2
--- /dev/null
+++ b/docs/blog/posts/llms-txt-support.md
@@ -0,0 +1,45 @@
+---
+authors:
+  - jxnl
+categories:
+  - Announcements
+comments: true
+date: 2025-08-29
+description:
+  Instructor now automatically generates llms.txt files for better AI documentation access.
+draft: false
+slug: llms-txt-support
+tags:
+  - Documentation
+  - AI
+---
+
+# Instructor Now Supports llms.txt
+
+We've added automatic `llms.txt` generation to Instructor's documentation using the [`mkdocs-llmstxt`](https://github.com/pawamoy/mkdocs-llmstxt) plugin.
+
+<!-- more -->
+
+## What is llms.txt?
+
+The [`llms.txt` specification](https://github.com/AnswerDotAI/llms-txt) helps AI coding assistants access clean documentation without parsing complex HTML. Think "robots.txt for LLMs."
+
+## What This Means
+
+Your AI coding assistant (Copilot, Claude, Cursor) now gets better access to:
+- Getting started guides
+- Core concepts and patterns  
+- Provider integration docs
+
+This should result in more accurate suggestions and better understanding of Instructor's features.
+
+## Implementation
+
+We're using the `mkdocs-llmstxt` plugin to automatically generate our `llms.txt` from our existing markdown documentation. Every time we update our docs, the `llms.txt` file stays current automatically.
+
+No manual maintenance, always up-to-date.
+
+## Resources
+
+- [llms.txt Specification](https://github.com/AnswerDotAI/llms-txt)
+- [mkdocs-llmstxt Plugin](https://github.com/pawamoy/mkdocs-llmstxt)
\ No newline at end of file
diff --git a/docs/blog/posts/mkdocs-llmstxt-plugin-integration.md b/docs/blog/posts/mkdocs-llmstxt-plugin-integration.md
new file mode 100644
index 000000000..b7a9bcfd1
--- /dev/null
+++ b/docs/blog/posts/mkdocs-llmstxt-plugin-integration.md
@@ -0,0 +1,133 @@
+---
+authors:
+  - jxnl
+categories:
+  - Technical
+  - Documentation
+comments: true
+date: 2025-08-29
+description:
+  Deep dive into how we integrated the mkdocs-llmstxt plugin to automatically generate llms.txt files for better AI documentation consumption.
+draft: false
+slug: mkdocs-llmstxt-plugin-integration
+tags:
+  - MkDocs
+  - Plugins
+  - Documentation
+  - AI
+  - Automation
+---
+
+# Automating llms.txt Generation with mkdocs-llmstxt Plugin
+
+Today we integrated the `mkdocs-llmstxt` plugin into Instructor's documentation pipeline. This powerful plugin automatically generates `llms.txt` files from our MkDocs documentation, making our comprehensive guides instantly accessible to AI language models.
+
+<!-- more -->
+
+## About the mkdocs-llmstxt Plugin
+
+The [`mkdocs-llmstxt` plugin](https://github.com/pawamoy/mkdocs-llmstxt) by Timothée Mazzucotelli is a brilliant solution to a common problem: how do you keep an `llms.txt` file synchronized with your evolving documentation?
+
+### Key Features
+
+**Automatic Generation**: The plugin generates `llms.txt` files directly from your MkDocs source files during the build process. No manual maintenance required.
+
+**Flexible Section Control**: You can specify exactly which parts of your documentation to include:
+
+```yaml
+plugins:
+  - llmstxt:
+      sections:
+        Getting Started:
+          - index.md: Introduction to structured outputs
+          - installation.md: Setup instructions
+        Core Concepts:
+          - concepts/*.md
+```
+
+**Clean Markdown Conversion**: The plugin converts your documentation to clean, LLM-friendly markdown format, removing HTML artifacts and navigation elements.
+
+**Customizable Descriptions**: You can provide both short and long descriptions of your project, giving AI models the context they need.
+
+## Our Implementation
+
+Here's how we configured the plugin for Instructor:
+
+```yaml
+plugins:
+  - llmstxt:
+      markdown_description: >
+        Instructor is a Python library that makes it easy to work with structured outputs 
+        from large language models (LLMs). Built on top of Pydantic, it provides a simple, 
+        type-safe way to extract structured data from LLM responses across multiple providers 
+        including OpenAI, Anthropic, Google, and many others.
+      sections:
+        Getting Started:
+          - index.md: Introduction to structured outputs with LLMs
+          - getting-started.md: Quick start guide
+          - installation.md: Installation instructions
+        Core Concepts:
+          - concepts/*.md
+        Integrations:
+          - integrations/*.md
+```
+
+### Why These Sections?
+
+We carefully selected these sections because they provide AI models with the essential information needed to understand and use Instructor:
+
+- **Getting Started**: Core concepts and installation
+- **Core Concepts**: Deep dive into features like validation, streaming, and patterns
+- **Integrations**: Provider-specific guidance for OpenAI, Anthropic, Google, and others
+
+## Technical Benefits
+
+### Build Integration
+
+The plugin seamlessly integrates into our existing MkDocs build pipeline. Every time we deploy documentation updates, the `llms.txt` file is automatically regenerated with the latest content.
+
+### Content Freshness
+
+Unlike manually maintained `llms.txt` files, our generated version is always up-to-date. When we add new integration guides or update existing concepts, the changes are automatically reflected.
+
+### Glob Pattern Support
+
+The plugin supports glob patterns like `concepts/*.md`, making it easy to include entire directories without manually listing each file.
+
+## Plugin Architecture
+
+The `mkdocs-llmstxt` plugin works by:
+
+1. **Parsing Configuration**: Reading your `sections` configuration during the MkDocs build
+2. **File Processing**: Converting specified markdown files to clean, LLM-friendly format
+3. **Content Assembly**: Combining sections with metadata into the standard llms.txt format
+4. **Output Generation**: Writing the final `llms.txt` file to your site root
+
+## Installation and Setup
+
+Adding the plugin to your own MkDocs project is straightforward:
+
+```bash
+pip install mkdocs-llmstxt
+```
+
+Then add it to your `mkdocs.yml`:
+
+```yaml
+site_url: https://your-site.com/  # Required for the plugin
+
+plugins:
+  - llmstxt:
+      markdown_description: Description of your project
+      sections:
+        Documentation:
+          - docs/*.md
+```
+
+## Resources
+
+- [mkdocs-llmstxt Plugin](https://github.com/pawamoy/mkdocs-llmstxt)
+- [llms.txt Specification](https://github.com/AnswerDotAI/llms-txt)
+- [Instructor Documentation](https://python.useinstructor.com/)
+
+Special thanks to Timothée Mazzucotelli for creating this excellent plugin!
diff --git a/docs/blog/posts/multimodal-gemini.md b/docs/blog/posts/multimodal-gemini.md
index 66d46a555..cc40ee79d 100644
--- a/docs/blog/posts/multimodal-gemini.md
+++ b/docs/blog/posts/multimodal-gemini.md
@@ -218,11 +218,10 @@ To address these limitations and expand the capabilities of our video analysis s
 By addressing these challenges and exploring these new directions, we can create a more comprehensive and nuanced video analysis system, opening up even more possibilities for applications in travel, education, and beyond.
 
 ## Related Documentation
-- [Multimodal Concepts](/concepts/multimodal) - Working with images, video, and audio
-- [Image Processing](/concepts/images) - Image-specific techniques
-- [Google Integration](/integrations/google) - Complete Gemini setup guide
+- [Multimodal Concepts](../../concepts/multimodal.md) - Working with images, video, and audio
+- [Google Integration](../../integrations/google.md) - Complete Gemini setup guide
 
 ## See Also
-- [OpenAI Multimodal](openai-multimodal) - Compare multimodal approaches
-- [Anthropic Structured Output](structured-output-anthropic) - Alternative provider
-- [Chat with PDFs using Gemini](chat-with-your-pdf-with-gemini) - Practical PDF processing
+- [OpenAI Multimodal](openai-multimodal.md) - Compare multimodal approaches
+- [Anthropic Structured Output](structured-output-anthropic.md) - Alternative provider
+- [Chat with PDFs using Gemini](chat-with-your-pdf-with-gemini.md) - Practical PDF processing
diff --git a/docs/blog/posts/native_caching.md b/docs/blog/posts/native_caching.md
index 8f809579a..ed8821292 100644
--- a/docs/blog/posts/native_caching.md
+++ b/docs/blog/posts/native_caching.md
@@ -231,7 +231,7 @@ Native caching delivers the same dramatic performance improvements you'd expect:
 - **DiskCache**: 5-10x improvement with persistence benefits
 - **Cost Reduction**: 50-90% API cost savings depending on cache hit rate
 
-For a comprehensive deep-dive into caching strategies and performance analysis, check out our [complete caching guide](../caching.md).
+For a comprehensive deep-dive into caching strategies and performance analysis, check out our [complete caching guide](caching.md).
 
 ## Getting Started
 
diff --git a/docs/blog/posts/openai-multimodal.md b/docs/blog/posts/openai-multimodal.md
index 1b6e24188..9d166b245 100644
--- a/docs/blog/posts/openai-multimodal.md
+++ b/docs/blog/posts/openai-multimodal.md
@@ -90,10 +90,10 @@ While this new feature is exciting, it's important to note that it's best suited
 As with any AI-powered feature, it's crucial to consider ethical implications and potential biases in audio processing and generation. Always test thoroughly and consider the diversity of your user base when implementing these features.
 
 ## Related Documentation
-- [Multimodal Guide](/concepts/multimodal) - Comprehensive multimodal reference
-- [OpenAI Integration](/integrations/openai) - Full OpenAI setup
+- [Multimodal Guide](../../concepts/multimodal.md) - Comprehensive multimodal reference
+- [OpenAI Integration](../../integrations/openai.md) - Full OpenAI setup
 
 ## See Also
-- [Gemini Multimodal](multimodal-gemini) - Alternative multimodal approach
-- [Prompt Caching](anthropic-prompt-caching) - Cache large audio files
-- [Monitoring with Logfire](logfire) - Track multimodal processing
+- [Gemini Multimodal](multimodal-gemini.md) - Alternative multimodal approach
+- [Prompt Caching](anthropic-prompt-caching.md) - Cache large audio files
+- [Monitoring with Logfire](logfire.md) - Track multimodal processing
diff --git a/docs/blog/posts/pydantic-is-still-all-you-need.md b/docs/blog/posts/pydantic-is-still-all-you-need.md
index a2683cacd..d5fa7f5e1 100644
--- a/docs/blog/posts/pydantic-is-still-all-you-need.md
+++ b/docs/blog/posts/pydantic-is-still-all-you-need.md
@@ -125,10 +125,10 @@ As we continue to refine AI language models, keeping these principles in mind wi
 For more advanced use cases and integrations, check out our [examples](../../examples/index.md) section, which covers various LLM providers and specialized implementations.
 
 ## Related Documentation
-- [Instructor Philosophy](/concepts/philosophy) - Why we chose Pydantic
-- [Validation Guide](/concepts/validation) - Practical validation techniques
+- [Instructor Philosophy](../../concepts/philosophy.md) - Why we chose Pydantic
+- [Validation Guide](../../concepts/validation.md) - Practical validation techniques
 
 ## See Also
-- [Validation Deep Dive](validation-part1) - Advanced validation patterns
-- [Best Framework Comparison](best_framework) - Why Instructor stands out
-- [Introduction to Instructor](introduction) - Getting started guide
+- [Validation Deep Dive](validation-part1.md) - Advanced validation patterns
+- [Best Framework Comparison](best_framework.md) - Why Instructor stands out
+- [Introduction to Instructor](introduction.md) - Getting started guide
diff --git a/docs/blog/posts/rag-and-beyond.md b/docs/blog/posts/rag-and-beyond.md
index f33c0b240..ab2258103 100644
--- a/docs/blog/posts/rag-and-beyond.md
+++ b/docs/blog/posts/rag-and-beyond.md
@@ -238,11 +238,11 @@ This is not about fancy embedding tricks, it's just plain old information retrie
 Here I want to show that `instructor` isn’t just about data extraction. It’s a powerful framework for building a data model and integrating it with your LLM. Structured output is just the beginning - the untapped goldmine is skilled use of tools and APIs.
 
 ## Related Documentation
-- [Validation Concepts](/concepts/validation) - Validate RAG outputs
+- [Validation Concepts](../../concepts/validation.md) - Validate RAG outputs
 
 ## See Also
-- [LLM as Reranker](llm-as-reranker) - Improve search relevance
-- [Citation Extraction](citations) - Verify sources
-- [PDF Processing](chat-with-your-pdf-with-gemini) - Document handling
+- [LLM as Reranker](llm-as-reranker.md) - Improve search relevance
+- [Citation Extraction](citations.md) - Verify sources
+- [PDF Processing](chat-with-your-pdf-with-gemini.md) - Document handling
 
 If you enjoy the content or want to try out `instructor` please check out the [github](https://github.com/jxnl/instructor) and give us a star!
\ No newline at end of file
diff --git a/docs/blog/posts/semantic-validation-structured-outputs.md b/docs/blog/posts/semantic-validation-structured-outputs.md
index b96315198..1ef2c0abe 100644
--- a/docs/blog/posts/semantic-validation-structured-outputs.md
+++ b/docs/blog/posts/semantic-validation-structured-outputs.md
@@ -335,15 +335,15 @@ Semantic validation represents a significant advancement in ensuring the quality
 
 As these techniques mature, we can expect to see semantic validation become a standard part of AI application development, especially in regulated industries where output quality is critical.
 
-To get started with semantic validation in your projects, check out the [Semantic Validation documentation](https://python.useinstructor.com/concepts/semantic_validation/) and explore the various examples and patterns.
+To get started with semantic validation in your projects, check out the [Semantic Validation documentation](https://python.useinstructor.com../../concepts/semantic_validation/.md) and explore the various examples and patterns.
 
 This approach isn't just a technical improvement-it's a fundamental shift in how we think about validation, moving from rigid rules to intelligent understanding of content and context.
 
 ## Related Documentation
-- [Validation Fundamentals](/concepts/validation) - Core validation concepts
-- [LLM Validation](/concepts/llm_validation) - Using LLMs for validation
+- [Validation Fundamentals](../../concepts/validation.md) - Core validation concepts
+- [Semantic Validation](../../concepts/semantic_validation.md) - Using LLMs for validation
 
 ## See Also
-- [Validation Deep Dive](validation-part1) - Foundation validation concepts
-- [Anthropic Prompt Caching](anthropic-prompt-caching) - Optimize validation costs
-- [Monitoring with Logfire](logfire) - Track validation performance
\ No newline at end of file
+- [Validation Deep Dive](validation-part1.md) - Foundation validation concepts
+- [Anthropic Prompt Caching](anthropic-prompt-caching.md) - Optimize validation costs
+- [Monitoring with Logfire](logfire.md) - Track validation performance
\ No newline at end of file
diff --git a/docs/blog/posts/structured-output-anthropic.md b/docs/blog/posts/structured-output-anthropic.md
index 7124f43aa..70a295c06 100644
--- a/docs/blog/posts/structured-output-anthropic.md
+++ b/docs/blog/posts/structured-output-anthropic.md
@@ -141,10 +141,10 @@ By combining Anthropic's Claude with Instructor's structured output capabilities
 As the AI landscape continues to evolve, staying up-to-date with the latest tools and techniques is crucial. We encourage you to explore these features and share your experiences with the community. Happy coding!
 
 ## Related Documentation
-- [How Patching Works](/concepts/patching) - Understand provider integration
-- [Anthropic Integration](/integrations/anthropic) - Complete setup guide
+- [How Patching Works](../../concepts/patching.md) - Understand provider integration
+- [Anthropic Integration](../../integrations/anthropic.md) - Complete setup guide
 
 ## See Also
-- [Anthropic Prompt Caching](anthropic-prompt-caching) - Optimize Anthropic costs
-- [Unified Provider Interface](announcing-unified-provider-interface) - Switch providers easily
-- [Framework Comparison](best_framework) - Why Instructor excels
+- [Anthropic Prompt Caching](anthropic-prompt-caching.md) - Optimize Anthropic costs
+- [Unified Provider Interface](announcing-unified-provider-interface.md) - Switch providers easily
+- [Framework Comparison](best_framework.md) - Why Instructor excels
diff --git a/docs/blog/posts/validation-part1.md b/docs/blog/posts/validation-part1.md
index 513d7dea1..e164a90a1 100644
--- a/docs/blog/posts/validation-part1.md
+++ b/docs/blog/posts/validation-part1.md
@@ -488,10 +488,10 @@ From the simplicity of Pydantic and Instructor to the dynamic validation capabil
 If you enjoy the content or want to try out `Instructor` please check out the [github](https://github.com/jxnl/instructor) and give us a star!
 
 ## Related Documentation
-- [Core Validation Concepts](/concepts/validation) - Learn about validation fundamentals
-- [Reask Validation](/concepts/reask_validation) - Handle validation failures gracefully
+- [Core Validation Concepts](../../concepts/validation.md) - Learn about validation fundamentals
+- [Reask Validation](../../concepts/reask_validation.md) - Handle validation failures gracefully
 
 ## See Also
-- [Semantic Validation with Structured Outputs](semantic-validation-structured-outputs) - Next evolution in validation
-- [Why Bad Schemas Break LLMs](bad-schemas-could-break-llms) - Schema design best practices
-- [Pydantic Is Still All You Need](pydantic-is-still-all-you-need) - Why Pydantic validation matters
\ No newline at end of file
+- [Semantic Validation with Structured Outputs](semantic-validation-structured-outputs.md) - Next evolution in validation
+- [Why Bad Schemas Break LLMs](bad-schemas-could-break-llms.md) - Schema design best practices
+- [Pydantic Is Still All You Need](pydantic-is-still-all-you-need.md) - Why Pydantic validation matters
\ No newline at end of file
diff --git a/docs/blog/posts/why-care-about-mcps.md b/docs/blog/posts/why-care-about-mcps.md
index 57d91be29..e271193ae 100644
--- a/docs/blog/posts/why-care-about-mcps.md
+++ b/docs/blog/posts/why-care-about-mcps.md
@@ -30,7 +30,7 @@ There are three components to the MCP ecosystem:
 
 When interacting with Clients, Hosts have access to two primary options: **Tools**, which are model-controlled functions that retrieve or modify data, and **Resources**, which are application-controlled data like files.
 
-There's also the intention of eventually allowing servers themselves to have the capability of requesting completions/approval from Clients and Hosts while executing their tasks [through the `sampling` endpoint](https://modelcontextprotocol.io/docs/concepts/sampling).
+There's also the intention of eventually allowing servers themselves to have the capability of requesting completions/approval from Clients and Hosts while executing their tasks [through the `sampling` endpoint](https://modelcontextprotocol.io/docs../../concepts/sampling.md).
 
 ### The Integration Problem MCP Solves
 
diff --git a/docs/blog/posts/youtube-flashcards.md b/docs/blog/posts/youtube-flashcards.md
index fce433027..ebff882bc 100644
--- a/docs/blog/posts/youtube-flashcards.md
+++ b/docs/blog/posts/youtube-flashcards.md
@@ -332,11 +332,11 @@ via script, a web app (Streamlit, Gradio, etc.), or as a [web service](https://b
 
 The `ApplicationBuilder` provides many features to productionize your app:
 
-- [Persistence](https://burr.dagworks.io/concepts/state-persistence/): save and restore `State`
+- [Persistence](https://burr.dagworks.io../../concepts/state-persistence/.md): save and restore `State`
   (e.g., store conversation history)
-- [Observability](https://burr.dagworks.io/concepts/additional-visibility/): log and monitor
+- [Observability](https://burr.dagworks.io../../concepts/additional-visibility/.md): log and monitor
   application telemetry (e.g., LLM calls, number of tokens used, errors and retries)
-- [Streaming and async](https://burr.dagworks.io/concepts/streaming-actions/): create snappy
+- [Streaming and async](https://burr.dagworks.io../../concepts/streaming-actions/.md): create snappy
   user interfaces by streaming LLM responses and running actions asynchronously.
 
 For example, you can log telemetry into Burr UI in a few lines of code. First, instrument the
diff --git a/docs/concepts/hooks.md b/docs/concepts/hooks.md
index 1e4134abb..d7339bfff 100644
--- a/docs/concepts/hooks.md
+++ b/docs/concepts/hooks.md
@@ -482,6 +482,145 @@ client.on("completion:last_attempt", monitor.handle_error)
 print(f"Error statistics: {monitor.get_stats()}")
 ```
 
+## Hook Combination
+
+The Hooks system supports powerful hook combination capabilities, allowing you to compose different hook sets for different use cases:
+
+### Basic Hook Combination
+
+```python
+import instructor
+from instructor.core.hooks import Hooks
+
+# Create different hook sets
+logging_hooks = Hooks()
+logging_hooks.on("completion:kwargs", lambda **kw: print("Logging kwargs"))
+logging_hooks.on("completion:response", lambda resp: print("Logging response"))
+
+metrics_hooks = Hooks()
+metrics_hooks.on("completion:kwargs", lambda **kw: print("Recording metrics"))
+
+debug_hooks = Hooks()
+debug_hooks.on("parse:error", lambda err: print(f"Debug: Parse error - {err}"))
+
+# Combine hooks using the + operator
+combined_hooks = logging_hooks + metrics_hooks
+client = instructor.from_provider("openai/gpt-4.1-mini", hooks=combined_hooks)
+
+# Add more hooks in-place using +=
+logging_hooks += debug_hooks
+
+# Combine multiple hooks at once
+all_hooks = Hooks.combine(logging_hooks, metrics_hooks, debug_hooks)
+```
+
+### Hook Combination Methods
+
+The `Hooks` class provides several methods for combining hook instances:
+
+- **`__add__`**: Create a new combined Hooks instance using `+`
+- **`__iadd__`**: Add hooks in-place using `+=`  
+- **`combine()`**: Class method to combine multiple Hooks instances
+- **`copy()`**: Create a deep copy of a Hooks instance
+
+```python
+# Method 1: + operator (creates new instance)
+combined = hooks1 + hooks2
+
+# Method 2: += operator (modifies in-place)
+hooks1 += hooks2
+
+# Method 3: combine() class method (multiple at once)
+all_hooks = Hooks.combine(hooks1, hooks2, hooks3)
+
+# Method 4: copy() for creating independent copies
+hooks_copy = original_hooks.copy()
+```
+
+## Per-Call Hooks
+
+You can now specify hooks for individual API calls, which are automatically combined with client-level hooks:
+
+```python
+import instructor
+from instructor.core.hooks import Hooks
+
+# Set up client with global hooks
+client_hooks = Hooks()
+client_hooks.on("completion:kwargs", lambda **kw: print("Client hook: kwargs"))
+
+client = instructor.from_provider("openai/gpt-4.1-mini", hooks=client_hooks)
+
+# Create per-call hooks for debugging specific requests
+debug_hooks = Hooks()
+debug_hooks.on("completion:response", lambda resp: print("Debug hook: response"))
+debug_hooks.on("parse:error", lambda err: print(f"Debug hook: error - {err}"))
+
+# Use per-call hooks - they combine with client hooks
+user = client.chat.completions.create(
+    model="gpt-3.5-turbo",
+    messages=[{"role": "user", "content": "Extract: Alice is 25"}],
+    response_model=User,
+    hooks=debug_hooks  # Per-call hooks combined with client hooks
+)
+```
+
+### Per-Call Hook Features
+
+- **Automatic Combination**: Per-call hooks are automatically combined with client hooks using the `+` operator
+- **Client Hook Preservation**: Client-level hooks remain unchanged after per-call hook usage
+- **Backward Compatibility**: Existing code continues to work unchanged
+- **Flexible Composition**: Mix and match different hook sets for different requests
+
+### Per-Call Hooks Example
+
+```python
+import instructor
+from instructor.core.hooks import Hooks
+from pydantic import BaseModel
+
+class User(BaseModel):
+    name: str
+    age: int
+
+# Client with standard logging
+client_hooks = Hooks()
+client_hooks.on("completion:kwargs", lambda **kw: print("Standard logging"))
+
+client = instructor.from_provider("openai/gpt-4.1-mini", hooks=client_hooks)
+
+# Performance monitoring hooks for specific calls
+perf_hooks = Hooks()
+perf_hooks.on("completion:response", lambda resp: print(f"Tokens used: {resp.usage}"))
+
+# Debug hooks for troublesome requests
+debug_hooks = Hooks()
+debug_hooks.on("parse:error", lambda err: print(f"Detailed error: {err}"))
+
+# Regular call - only client hooks
+user1 = client.chat.completions.create(
+    model="gpt-3.5-turbo",
+    messages=[{"role": "user", "content": "Extract: Bob is 30"}],
+    response_model=User
+)
+
+# Performance monitoring call - client + perf hooks
+user2 = client.chat.completions.create(
+    model="gpt-3.5-turbo", 
+    messages=[{"role": "user", "content": "Extract: Carol is 25"}],
+    response_model=User,
+    hooks=perf_hooks
+)
+
+# Debug problematic call - client + debug hooks
+user3 = client.chat.completions.create(
+    model="gpt-3.5-turbo",
+    messages=[{"role": "user", "content": "Extract: Invalid data"}],
+    response_model=User,
+    hooks=debug_hooks
+)
+```
+
 ## Hooks in Testing
 
 Hooks are particularly useful for testing, as they allow you to inspect the arguments and responses without modifying your application code:
@@ -511,6 +650,30 @@ class TestMyApp(unittest.TestCase):
         # You can also inspect the arguments
         response_arg = mock_handler.call_args[0][0]
         self.assertEqual(response_arg.model, "gpt-4.1-mini")
+
+    def test_per_call_hooks(self):
+        """Test that per-call hooks work correctly with client hooks."""
+        client = instructor.from_provider("openai/gpt-4.1-mini")
+        client_mock = Mock()
+        per_call_mock = Mock()
+
+        client.on("completion:response", client_mock)
+
+        # Create per-call hooks
+        from instructor.core.hooks import Hooks
+        per_call_hooks = Hooks()
+        per_call_hooks.on("completion:response", per_call_mock)
+
+        # Make a call with per-call hooks
+        result = client.chat.completions.create(
+            messages=[{"role": "user", "content": "Hello"}],
+            response_model=str,
+            hooks=per_call_hooks
+        )
+
+        # Both client and per-call hooks should have been called
+        client_mock.assert_called_once()
+        per_call_mock.assert_called_once()
 ```
 
 This approach allows you to test your code without mocking the entire client.
diff --git a/docs/concepts/retrying.md b/docs/concepts/retrying.md
index 76e4d642f..5c1d8e4fe 100644
--- a/docs/concepts/retrying.md
+++ b/docs/concepts/retrying.md
@@ -314,6 +314,60 @@ def double_retry_extraction(text: str) -> UserInfo:
     )
 ```
 
+## Failed Attempts Tracking
+
+Instructor's retry system now tracks all failed attempts with detailed context for better debugging and error handling.
+
+### Enhanced Error Context
+
+When retries fail, exceptions include comprehensive failure history:
+
+```python
+import instructor
+from instructor.core.exceptions import InstructorRetryException
+from pydantic import BaseModel, field_validator
+
+client = instructor.from_provider("openai/gpt-4.1-mini")
+
+class UserInfo(BaseModel):
+    name: str
+    age: int
+    
+    @field_validator('age')
+    @classmethod
+    def validate_age(cls, v):
+        if v < 0 or v > 150:
+            raise ValueError(f"Age {v} is invalid")
+        return v
+
+try:
+    result = client.chat.completions.create(
+        response_model=UserInfo,
+        messages=[{"role": "user", "content": "Extract: John is -5 years old"}],
+        max_retries=3
+    )
+except InstructorRetryException as e:
+    # Access failed attempts for debugging
+    print(f"Failed after {e.n_attempts} attempts")
+    for attempt in e.failed_attempts:
+        print(f"Attempt {attempt.attempt_number}: {attempt.exception}")
+    
+    # Exception string includes rich context:
+    # <failed_attempts>
+    #   <generation number="1">
+    #     <exception>ValidationError: Age -5 is invalid</exception>
+    #     <completion>{"name": "John", "age": -5}</completion>
+    #   </generation>
+    # </failed_attempts>
+```
+
+### Improved Reask Behavior
+
+Failed attempts are automatically propagated to reask handlers, enabling:
+- **Contextual error messages** - LLMs receive previous failure information
+- **Progressive corrections** - Each retry learns from past mistakes  
+- **Smarter retry strategies** - Better pattern recognition across attempts
+
 ## Best Practices for Tenacity with Instructor
 
 ### 1. Choose Appropriate Retry Strategies
@@ -579,9 +633,9 @@ if __name__ == "__main__":
 - [Tenacity Documentation](https://tenacity.readthedocs.io/)
 - [Instructor Error Handling](./error_handling.md)
 - [Validation Best Practices](./validation.md)
-- [Async Processing Guide](./async.md)
+- [Async Processing Guide](../blog/posts/learn-async.md)
 - [Python Retry Patterns](https://pypi.org/project/tenacity/)
 
 ---
 
-**Next Steps**: Learn about [error handling patterns](./error_handling.md) or explore [async processing](./async.md) for high-performance applications.
+**Next Steps**: Learn about [error handling patterns](./error_handling.md) or explore [async processing](../blog/posts/learn-async.md) for high-performance applications.
diff --git a/docs/contributing.md b/docs/contributing.md
index 903097aed..142c739bb 100644
--- a/docs/contributing.md
+++ b/docs/contributing.md
@@ -259,7 +259,7 @@ python scripts/check_blog_excerpts.py
 python scripts/make_sitemap.py
 ```
 
-For detailed documentation on each script, see [scripts/README.md](../scripts/README.md).
+For detailed documentation on each script, see the `scripts/README.md` file in the project repository.
 
 ## Using Cursor to Build PRs
 
diff --git a/docs/examples/audio_extraction.md b/docs/examples/audio_extraction.md
index d03f16a09..8dbae1287 100644
--- a/docs/examples/audio_extraction.md
+++ b/docs/examples/audio_extraction.md
@@ -52,7 +52,7 @@ print(resp)
 
 ## How It Works
 
-1. First, we import the necessary libraries including the `Audio` class from `instructor.multimodal`.
+1. First, we import the necessary libraries including the `Audio` class from `instructor.processing.multimodal`.
 
 2. We define a Pydantic model `Person` that specifies the structure of the information we want to extract from the audio:
    - `name`: The person's name
diff --git a/docs/examples/sqlmodel.md b/docs/examples/sqlmodel.md
index b95173323..ce4f7feb6 100644
--- a/docs/examples/sqlmodel.md
+++ b/docs/examples/sqlmodel.md
@@ -616,7 +616,7 @@ By following the patterns and best practices outlined in this guide, you can bui
 
 - Explore the [FastAPI integration guide](../concepts/fastapi.md) for advanced API patterns
 - Check out [validation techniques](../concepts/validation.md) for robust data handling
-- Learn about [streaming responses](../concepts/streaming.md) for real-time applications
+- Learn about [streaming responses](partial_streaming.md) for real-time applications
 
 ![Image of hero record in the database](db.png)
 
diff --git a/docs/integrations/truefoundry.md b/docs/integrations/truefoundry.md
index e6412e465..bda721a0a 100644
--- a/docs/integrations/truefoundry.md
+++ b/docs/integrations/truefoundry.md
@@ -12,7 +12,7 @@ TrueFoundry provides an enterprise-ready [AI Gateway](https://www.truefoundry.co
 
 Before integrating Instructor with TrueFoundry, ensure you have:
 
-1. **TrueFoundry Account**: Create a [TrueFoundry account](https://www.truefoundry.com/register) with at least one model provider and generate a Personal Access Token by following the instructions in [Generating Tokens](/gateway/authentication). For a quick setup guide, see our [Gateway Quick Start](https://docs.truefoundry.com/gateway/quick-start)
+1. **TrueFoundry Account**: Create a [TrueFoundry account](https://www.truefoundry.com/register) with at least one model provider and generate a Personal Access Token by following the instructions in [Generating Tokens](https://docs.truefoundry.com/gateway/authentication). For a quick setup guide, see our [Gateway Quick Start](https://docs.truefoundry.com/gateway/quick-start)
 2. **Instructor Installation**: Install Instructor using pip: `pip install instructor`
 3. **OpenAI Library**: Install the OpenAI Python library: `pip install openai`
 4. **Pydantic**: Install Pydantic for data validation: `pip install pydantic`
diff --git a/docs/learning/validation/retry_mechanisms.md b/docs/learning/validation/retry_mechanisms.md
index 5efa85a2c..be6d162fb 100644
--- a/docs/learning/validation/retry_mechanisms.md
+++ b/docs/learning/validation/retry_mechanisms.md
@@ -82,10 +82,42 @@ client = instructor.from_openai(
 
 ## Handling Retry Failures
 
-When all retries fail, depending on your configuration:
+When all retries fail, Instructor raises an `InstructorRetryException` that contains comprehensive information about all failed attempts:
 
-1. With `throw_error=True` (default): An exception is raised
-2. With `throw_error=False`: The last failed response is returned, and you can handle it gracefully
+```python
+from instructor.core.exceptions import InstructorRetryException
+
+try:
+    response = client.chat.completions.create(
+        model="gpt-3.5-turbo",
+        messages=[{"role": "user", "content": "Product: Invalid data"}],
+        response_model=Product,
+        max_retries=3
+    )
+except InstructorRetryException as e:
+    print(f"Failed after {e.n_attempts} attempts")
+    print(f"Total usage: {e.total_usage}")
+    
+    # New: Access detailed information about each failed attempt
+    for attempt in e.failed_attempts:
+        print(f"Attempt {attempt.attempt_number}: {attempt.exception}")
+        if attempt.completion:
+            # Analyze the raw completion that failed validation
+            print(f"Raw response: {attempt.completion}")
+```
+
+The `InstructorRetryException` now includes:
+
+- `failed_attempts`: A list of `FailedAttempt` objects containing:
+  - `attempt_number`: The retry attempt number
+  - `exception`: The specific exception that occurred
+  - `completion`: The raw LLM response (when available)
+- `n_attempts`: Total number of attempts made
+- `total_usage`: Total token usage across all attempts
+- `last_completion`: The final failed completion
+- `messages`: The conversation history
+
+This comprehensive tracking enables better debugging and analysis of retry patterns.
 
 For more on handling validation failures, see [Fallback Strategies](../../concepts/error_handling.md).
 
diff --git a/docs/why.md b/docs/why.md
index 73a092c05..f6e7ea165 100644
--- a/docs/why.md
+++ b/docs/why.md
@@ -234,4 +234,4 @@ Let's be clear - you might not need Instructor if:
 
 For everyone else building production LLM applications, Instructor is the obvious choice.
 
-[Get Started →](../index.md#quick-start){ .md-button .md-button--primary }
\ No newline at end of file
+[Get Started →](index.md#quick-start-extract-structured-data-in-3-lines){ .md-button .md-button--primary }
\ No newline at end of file
diff --git a/instructor/__init__.py b/instructor/__init__.py
index 16c8a6052..e6db3406f 100644
--- a/instructor/__init__.py
+++ b/instructor/__init__.py
@@ -30,6 +30,10 @@
 from .batch import BatchProcessor, BatchRequest, BatchJob
 from .distil import FinetuneFormat, Instructions
 
+# Backward compatibility: Re-export removed functions
+from .processing.response import handle_response_model
+from .dsl.parallel import handle_parallel_model
+
 __all__ = [
     "Instructor",
     "Image",
@@ -59,6 +63,9 @@
     "llm_validator",
     "openai_moderation",
     "hooks",
+    # Backward compatibility exports
+    "handle_response_model",
+    "handle_parallel_model",
 ]
 
 
diff --git a/instructor/client.py b/instructor/client.py
new file mode 100644
index 000000000..a978f5196
--- /dev/null
+++ b/instructor/client.py
@@ -0,0 +1,25 @@
+"""Backwards compatibility module for instructor.client.
+
+This module provides lazy imports to maintain backwards compatibility.
+"""
+
+import warnings
+
+
+def __getattr__(name: str):
+    """Lazy import to provide backward compatibility for client imports."""
+    warnings.warn(
+        f"Importing from 'instructor.client' is deprecated and will be removed in v2.0.0. "
+        f"Please update your imports to use 'instructor.core.client.{name}' instead:\n"
+        "  from instructor.core.client import Instructor, AsyncInstructor, from_openai, from_litellm",
+        DeprecationWarning,
+        stacklevel=2
+    )
+    
+    from .core import client as core_client
+    
+    # Try to get the attribute from the core.client module
+    if hasattr(core_client, name):
+        return getattr(core_client, name)
+    
+    raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
\ No newline at end of file
diff --git a/instructor/core/client.py b/instructor/core/client.py
index dce407b82..0cb64d60d 100644
--- a/instructor/core/client.py
+++ b/instructor/core/client.py
@@ -312,6 +312,7 @@ def create(
         validation_context: dict[str, Any] | None = None,
         context: dict[str, Any] | None = None,  # {{ edit_1 }}
         strict: bool = True,
+        hooks: Hooks | None = None,
         **kwargs: Any,
     ) -> Awaitable[T]: ...
 
@@ -324,6 +325,7 @@ def create(
         validation_context: dict[str, Any] | None = None,
         context: dict[str, Any] | None = None,  # {{ edit_1 }}
         strict: bool = True,
+        hooks: Hooks | None = None,
         **kwargs: Any,
     ) -> T: ...
 
@@ -336,6 +338,7 @@ def create(
         validation_context: dict[str, Any] | None = None,
         context: dict[str, Any] | None = None,  # {{ edit_1 }}
         strict: bool = True,
+        hooks: Hooks | None = None,
         **kwargs: Any,
     ) -> Awaitable[Any]: ...
 
@@ -348,6 +351,7 @@ def create(
         validation_context: dict[str, Any] | None = None,
         context: dict[str, Any] | None = None,  # {{ edit_1 }}
         strict: bool = True,
+        hooks: Hooks | None = None,
         **kwargs: Any,
     ) -> Any: ...
 
@@ -359,10 +363,16 @@ def create(
         validation_context: dict[str, Any] | None = None,
         context: dict[str, Any] | None = None,
         strict: bool = True,
+        hooks: Hooks | None = None,
         **kwargs: Any,
     ) -> T | Any | Awaitable[T] | Awaitable[Any]:
         kwargs = self.handle_kwargs(kwargs)
 
+        # Combine client hooks with per-call hooks
+        combined_hooks = self.hooks
+        if hooks is not None:
+            combined_hooks = self.hooks + hooks
+
         return self.create_fn(
             response_model=response_model,
             messages=messages,
@@ -370,7 +380,7 @@ def create(
             validation_context=validation_context,
             context=context,
             strict=strict,
-            hooks=self.hooks,
+            hooks=combined_hooks,
             **kwargs,
         )
 
@@ -383,6 +393,7 @@ def create_partial(
         validation_context: dict[str, Any] | None = None,
         context: dict[str, Any] | None = None,  # {{ edit_1 }}
         strict: bool = True,
+        hooks: Hooks | None = None,
         **kwargs: Any,
     ) -> AsyncGenerator[T, None]: ...
 
@@ -395,6 +406,7 @@ def create_partial(
         validation_context: dict[str, Any] | None = None,  # Deprecate in 2.0
         context: dict[str, Any] | None = None,
         strict: bool = True,
+        hooks: Hooks | None = None,
         **kwargs: Any,
     ) -> Generator[T, None, None]: ...
 
@@ -406,12 +418,18 @@ def create_partial(
         validation_context: dict[str, Any] | None = None,  # Deprecate in 2.0
         context: dict[str, Any] | None = None,
         strict: bool = True,
+        hooks: Hooks | None = None,
         **kwargs: Any,
     ) -> Generator[T, None, None] | AsyncGenerator[T, None]:
         kwargs["stream"] = True
 
         kwargs = self.handle_kwargs(kwargs)
 
+        # Combine client hooks with per-call hooks
+        combined_hooks = self.hooks
+        if hooks is not None:
+            combined_hooks = self.hooks + hooks
+
         response_model = instructor.Partial[response_model]  # type: ignore
         return self.create_fn(
             messages=messages,
@@ -420,7 +438,7 @@ def create_partial(
             validation_context=validation_context,
             context=context,
             strict=strict,
-            hooks=self.hooks,
+            hooks=combined_hooks,
             **kwargs,
         )
 
@@ -433,6 +451,7 @@ def create_iterable(
         validation_context: dict[str, Any] | None = None,  # Deprecate in 2.0
         context: dict[str, Any] | None = None,
         strict: bool = True,
+        hooks: Hooks | None = None,
         **kwargs: Any,
     ) -> AsyncGenerator[T, None]: ...
 
@@ -445,6 +464,7 @@ def create_iterable(
         validation_context: dict[str, Any] | None = None,  # Deprecate in 2.0
         context: dict[str, Any] | None = None,
         strict: bool = True,
+        hooks: Hooks | None = None,
         **kwargs: Any,
     ) -> Generator[T, None, None]: ...
 
@@ -456,11 +476,17 @@ def create_iterable(
         validation_context: dict[str, Any] | None = None,  # Deprecate in 2.0
         context: dict[str, Any] | None = None,
         strict: bool = True,
+        hooks: Hooks | None = None,
         **kwargs: Any,
     ) -> Generator[T, None, None] | AsyncGenerator[T, None]:
         kwargs["stream"] = True
         kwargs = self.handle_kwargs(kwargs)
 
+        # Combine client hooks with per-call hooks
+        combined_hooks = self.hooks
+        if hooks is not None:
+            combined_hooks = self.hooks + hooks
+
         response_model = Iterable[response_model]  # type: ignore
         return self.create_fn(
             messages=messages,
@@ -469,7 +495,7 @@ def create_iterable(
             validation_context=validation_context,
             context=context,
             strict=strict,
-            hooks=self.hooks,
+            hooks=combined_hooks,
             **kwargs,
         )
 
@@ -482,6 +508,7 @@ def create_with_completion(
         validation_context: dict[str, Any] | None = None,  # Deprecate in 2.0
         context: dict[str, Any] | None = None,
         strict: bool = True,
+        hooks: Hooks | None = None,
         **kwargs: Any,
     ) -> Awaitable[tuple[T, Any]]: ...
 
@@ -494,6 +521,7 @@ def create_with_completion(
         validation_context: dict[str, Any] | None = None,  # Deprecate in 2.0
         context: dict[str, Any] | None = None,
         strict: bool = True,
+        hooks: Hooks | None = None,
         **kwargs: Any,
     ) -> tuple[T, Any]: ...
 
@@ -505,9 +533,16 @@ def create_with_completion(
         validation_context: dict[str, Any] | None = None,  # Deprecate in 2.0
         context: dict[str, Any] | None = None,
         strict: bool = True,
+        hooks: Hooks | None = None,
         **kwargs: Any,
     ) -> tuple[T, Any] | Awaitable[tuple[T, Any]]:
         kwargs = self.handle_kwargs(kwargs)
+
+        # Combine client hooks with per-call hooks
+        combined_hooks = self.hooks
+        if hooks is not None:
+            combined_hooks = self.hooks + hooks
+
         model = self.create_fn(
             messages=messages,
             response_model=response_model,
@@ -515,7 +550,7 @@ def create_with_completion(
             validation_context=validation_context,
             context=context,
             strict=strict,
-            hooks=self.hooks,
+            hooks=combined_hooks,
             **kwargs,
         )
         return model, model._raw_response
@@ -578,10 +613,16 @@ async def create(  # type: ignore[override]
         validation_context: dict[str, Any] | None = None,  # Deprecate in 2.0
         context: dict[str, Any] | None = None,
         strict: bool = True,
+        hooks: Hooks | None = None,
         **kwargs: Any,
     ) -> T | Any:
         kwargs = self.handle_kwargs(kwargs)
 
+        # Combine client hooks with per-call hooks
+        combined_hooks = self.hooks
+        if hooks is not None:
+            combined_hooks = self.hooks + hooks
+
         # Check if the response model is an iterable type
         if (
             get_origin(response_model) in {Iterable}
@@ -601,6 +642,7 @@ async def create(  # type: ignore[override]
                 validation_context=validation_context,
                 context=context,
                 strict=strict,
+                hooks=hooks,  # Pass the per-call hooks to create_iterable
                 **kwargs,
             )
 
@@ -611,7 +653,7 @@ async def create(  # type: ignore[override]
             max_retries=max_retries,
             messages=messages,
             strict=strict,
-            hooks=self.hooks,
+            hooks=combined_hooks,
             **kwargs,
         )
 
@@ -623,10 +665,17 @@ async def create_partial(  # type: ignore[override]
         validation_context: dict[str, Any] | None = None,  # Deprecate in 2.0
         context: dict[str, Any] | None = None,
         strict: bool = True,
+        hooks: Hooks | None = None,
         **kwargs: Any,
     ) -> AsyncGenerator[T, None]:
         kwargs = self.handle_kwargs(kwargs)
         kwargs["stream"] = True
+
+        # Combine client hooks with per-call hooks
+        combined_hooks = self.hooks
+        if hooks is not None:
+            combined_hooks = self.hooks + hooks
+
         async for item in await self.create_fn(
             response_model=instructor.Partial[response_model],  # type: ignore
             validation_context=validation_context,
@@ -634,7 +683,7 @@ async def create_partial(  # type: ignore[override]
             max_retries=max_retries,
             messages=messages,
             strict=strict,
-            hooks=self.hooks,
+            hooks=combined_hooks,
             **kwargs,
         ):
             yield item
@@ -647,10 +696,17 @@ async def create_iterable(  # type: ignore[override]
         validation_context: dict[str, Any] | None = None,  # Deprecate in 2.0
         context: dict[str, Any] | None = None,
         strict: bool = True,
+        hooks: Hooks | None = None,
         **kwargs: Any,
     ) -> AsyncGenerator[T, None]:
         kwargs = self.handle_kwargs(kwargs)
         kwargs["stream"] = True
+
+        # Combine client hooks with per-call hooks
+        combined_hooks = self.hooks
+        if hooks is not None:
+            combined_hooks = self.hooks + hooks
+
         async for item in await self.create_fn(
             response_model=Iterable[response_model],
             validation_context=validation_context,
@@ -658,7 +714,7 @@ async def create_iterable(  # type: ignore[override]
             max_retries=max_retries,
             messages=messages,
             strict=strict,
-            hooks=self.hooks,
+            hooks=combined_hooks,
             **kwargs,
         ):
             yield item
@@ -671,9 +727,16 @@ async def create_with_completion(  # type: ignore[override]
         validation_context: dict[str, Any] | None = None,  # Deprecate in 2.0
         context: dict[str, Any] | None = None,
         strict: bool = True,
+        hooks: Hooks | None = None,
         **kwargs: Any,
     ) -> tuple[T, Any]:
         kwargs = self.handle_kwargs(kwargs)
+
+        # Combine client hooks with per-call hooks
+        combined_hooks = self.hooks
+        if hooks is not None:
+            combined_hooks = self.hooks + hooks
+
         response = await self.create_fn(
             response_model=response_model,
             validation_context=validation_context,
@@ -681,7 +744,7 @@ async def create_with_completion(  # type: ignore[override]
             max_retries=max_retries,
             messages=messages,
             strict=strict,
-            hooks=self.hooks,
+            hooks=combined_hooks,
             **kwargs,
         )
         return response, response._raw_response
@@ -773,13 +836,15 @@ def from_openai(
         return Instructor(
             client=client,
             create=instructor.patch(
-                create=client.chat.completions.create
-                if mode
-                not in {
-                    instructor.Mode.RESPONSES_TOOLS_WITH_INBUILT_TOOLS,
-                    instructor.Mode.RESPONSES_TOOLS,
-                }
-                else partial(map_chat_completion_to_response, client=client),
+                create=(
+                    client.chat.completions.create
+                    if mode
+                    not in {
+                        instructor.Mode.RESPONSES_TOOLS_WITH_INBUILT_TOOLS,
+                        instructor.Mode.RESPONSES_TOOLS,
+                    }
+                    else partial(map_chat_completion_to_response, client=client)
+                ),
                 mode=mode,
             ),
             mode=mode,
@@ -791,13 +856,15 @@ def from_openai(
         return AsyncInstructor(
             client=client,
             create=instructor.patch(
-                create=client.chat.completions.create
-                if mode
-                not in {
-                    instructor.Mode.RESPONSES_TOOLS_WITH_INBUILT_TOOLS,
-                    instructor.Mode.RESPONSES_TOOLS,
-                }
-                else partial(async_map_chat_completion_to_response, client=client),
+                create=(
+                    client.chat.completions.create
+                    if mode
+                    not in {
+                        instructor.Mode.RESPONSES_TOOLS_WITH_INBUILT_TOOLS,
+                        instructor.Mode.RESPONSES_TOOLS,
+                    }
+                    else partial(async_map_chat_completion_to_response, client=client)
+                ),
                 mode=mode,
             ),
             mode=mode,
diff --git a/instructor/core/exceptions.py b/instructor/core/exceptions.py
index 2fa635be1..fbed82609 100644
--- a/instructor/core/exceptions.py
+++ b/instructor/core/exceptions.py
@@ -1,12 +1,68 @@
 from __future__ import annotations
 
-from typing import Any
+from textwrap import dedent
+from typing import Any, NamedTuple
+from jinja2 import Template
 
 
 class InstructorError(Exception):
     """Base exception for all Instructor-specific errors."""
 
-    pass
+    failed_attempts: list[FailedAttempt] | None = None
+
+    @classmethod
+    def from_exception(
+        cls, exception: Exception, failed_attempts: list[FailedAttempt] | None = None
+    ):
+        return cls(str(exception), failed_attempts=failed_attempts)
+
+    def __init__(
+        self,
+        *args: list[Any],
+        failed_attempts: list[FailedAttempt] | None = None,
+        **kwargs: dict[str, Any],
+    ):
+        self.failed_attempts = failed_attempts
+        super().__init__(*args, **kwargs)
+
+    def __str__(self) -> str:
+        # If no failed attempts, use the standard exception string representation
+        if not self.failed_attempts:
+            return super().__str__()
+
+        template = Template(
+            dedent(
+                """
+                <failed_attempts>
+                {% for attempt in failed_attempts %}
+                <generation number="{{ attempt.attempt_number }}">
+                <exception>
+                    {{ attempt.exception }}
+                </exception>
+                <completion>
+                    {{ attempt.completion }}
+                </completion>
+                </generation>
+                {% endfor %}
+                </failed_attempts>
+
+                <last_exception>
+                    {{ last_exception }}
+                </last_exception>
+                """
+            ).strip()
+        )
+        return template.render(
+            last_exception=super().__str__(), failed_attempts=self.failed_attempts
+        )
+
+
+class FailedAttempt(NamedTuple):
+    """Represents a single failed retry attempt."""
+
+    attempt_number: int
+    exception: Exception
+    completion: Any | None = None
 
 
 class IncompleteOutputException(InstructorError):
@@ -34,6 +90,7 @@ def __init__(
         n_attempts: int,
         total_usage: int,
         create_kwargs: dict[str, Any] | None = None,
+        failed_attempts: list[FailedAttempt] | None = None,
         **kwargs: dict[str, Any],
     ):
         self.last_completion = last_completion
@@ -41,7 +98,7 @@ def __init__(
         self.n_attempts = n_attempts
         self.total_usage = total_usage
         self.create_kwargs = create_kwargs
-        super().__init__(*args, **kwargs)
+        super().__init__(*args, failed_attempts=failed_attempts, **kwargs)
 
 
 class ValidationError(InstructorError):
diff --git a/instructor/core/hooks.py b/instructor/core/hooks.py
index bf7672918..733716ae2 100644
--- a/instructor/core/hooks.py
+++ b/instructor/core/hooks.py
@@ -225,3 +225,123 @@ def clear(
             self._handlers.pop(hook_name, None)
         else:
             self._handlers.clear()
+
+    def __add__(self, other: Hooks) -> Hooks:
+        """
+        Combine two Hooks instances into a new one.
+
+        This creates a new Hooks instance that contains all handlers from both
+        the current instance and the other instance. Handlers are combined by
+        appending the other's handlers after the current instance's handlers.
+
+        Args:
+            other: Another Hooks instance to combine with this one.
+
+        Returns:
+            A new Hooks instance containing all handlers from both instances.
+
+        Example:
+            >>> hooks1 = Hooks()
+            >>> hooks2 = Hooks()
+            >>> hooks1.on("completion:kwargs", lambda **kw: print("Hook 1"))
+            >>> hooks2.on("completion:kwargs", lambda **kw: print("Hook 2"))
+            >>> combined = hooks1 + hooks2
+            >>> combined.emit_completion_arguments()  # Prints both "Hook 1" and "Hook 2"
+        """
+        if not isinstance(other, Hooks):
+            return NotImplemented
+
+        combined = Hooks()
+
+        # Copy handlers from self
+        for hook_name, handlers in self._handlers.items():
+            combined._handlers[hook_name].extend(handlers.copy())
+
+        # Add handlers from other
+        for hook_name, handlers in other._handlers.items():
+            combined._handlers[hook_name].extend(handlers.copy())
+
+        return combined
+
+    def __iadd__(self, other: Hooks) -> Hooks:
+        """
+        Add another Hooks instance to this one in-place.
+
+        This modifies the current instance by adding all handlers from the other
+        instance. The other instance's handlers are appended after the current
+        instance's handlers for each event type.
+
+        Args:
+            other: Another Hooks instance to add to this one.
+
+        Returns:
+            This Hooks instance (for method chaining).
+
+        Example:
+            >>> hooks1 = Hooks()
+            >>> hooks2 = Hooks()
+            >>> hooks1.on("completion:kwargs", lambda **kw: print("Hook 1"))
+            >>> hooks2.on("completion:kwargs", lambda **kw: print("Hook 2"))
+            >>> hooks1 += hooks2
+            >>> hooks1.emit_completion_arguments()  # Prints both "Hook 1" and "Hook 2"
+        """
+        if not isinstance(other, Hooks):
+            return NotImplemented
+
+        # Add handlers from other to self
+        for hook_name, handlers in other._handlers.items():
+            self._handlers[hook_name].extend(handlers.copy())
+
+        return self
+
+    @classmethod
+    def combine(cls, *hooks_instances: Hooks) -> Hooks:
+        """
+        Combine multiple Hooks instances into a new one.
+
+        This class method creates a new Hooks instance that contains all handlers
+        from all provided instances. Handlers are combined in the order of the
+        provided instances.
+
+        Args:
+            *hooks_instances: Variable number of Hooks instances to combine.
+
+        Returns:
+            A new Hooks instance containing all handlers from all instances.
+
+        Example:
+            >>> hooks1 = Hooks()
+            >>> hooks2 = Hooks()
+            >>> hooks3 = Hooks()
+            >>> hooks1.on("completion:kwargs", lambda **kw: print("Hook 1"))
+            >>> hooks2.on("completion:kwargs", lambda **kw: print("Hook 2"))
+            >>> hooks3.on("completion:kwargs", lambda **kw: print("Hook 3"))
+            >>> combined = Hooks.combine(hooks1, hooks2, hooks3)
+            >>> combined.emit_completion_arguments()  # Prints all three hooks
+        """
+        combined = cls()
+
+        for hooks_instance in hooks_instances:
+            if not isinstance(hooks_instance, cls):
+                raise TypeError(f"Expected Hooks instance, got {type(hooks_instance)}")
+            combined += hooks_instance
+
+        return combined
+
+    def copy(self) -> Hooks:
+        """
+        Create a deep copy of this Hooks instance.
+
+        Returns:
+            A new Hooks instance with all the same handlers.
+
+        Example:
+            >>> original = Hooks()
+            >>> original.on("completion:kwargs", lambda **kw: print("Hook"))
+            >>> copy = original.copy()
+            >>> copy.emit_completion_arguments()  # Prints "Hook"
+        """
+        new_hooks = Hooks()
+        for hook_name, handlers in self._handlers.items():
+            new_hooks._handlers[hook_name].extend(handlers.copy())
+        return new_hooks
diff --git a/instructor/core/retry.py b/instructor/core/retry.py
index c4e1394a0..5fc7ec52e 100644
--- a/instructor/core/retry.py
+++ b/instructor/core/retry.py
@@ -6,7 +6,7 @@
 from json import JSONDecodeError
 from typing import Any, Callable, TypeVar
 
-from .exceptions import InstructorRetryException, AsyncValidationError
+from .exceptions import InstructorRetryException, AsyncValidationError, FailedAttempt
 from .hooks import Hooks
 from ..mode import Mode
 from ..processing.response import (
@@ -175,6 +175,9 @@ def retry_sync(
     # Pre-extract stream flag to avoid repeated lookup
     stream = kwargs.get("stream", False)
 
+    # Track all failed attempts
+    failed_attempts: list[FailedAttempt] = []
+
     try:
         response = None
         for attempt in max_retries:
@@ -200,6 +203,15 @@ def retry_sync(
                     logger.debug(f"Parse error: {e}")
                     hooks.emit_parse_error(e)
 
+                    # Track this failed attempt
+                    failed_attempts.append(
+                        FailedAttempt(
+                            attempt_number=attempt.retry_state.attempt_number,
+                            exception=e,
+                            completion=response,
+                        )
+                    )
+
                     # Check if this is the last attempt
                     if isinstance(max_retries, Retrying) and hasattr(
                         max_retries, "stop"
@@ -224,6 +236,7 @@ def retry_sync(
                         mode=mode,
                         response=response,
                         exception=e,
+                        failed_attempts=failed_attempts,
                     )
                     raise e
                 except Exception as e:
@@ -231,6 +244,15 @@ def retry_sync(
                     logger.debug(f"Completion error: {e}")
                     hooks.emit_completion_error(e)
 
+                    # Track this failed attempt
+                    failed_attempts.append(
+                        FailedAttempt(
+                            attempt_number=attempt.retry_state.attempt_number,
+                            exception=e,
+                            completion=response,
+                        )
+                    )
+
                     # Check if this is the last attempt for completion errors
                     if isinstance(max_retries, Retrying) and hasattr(
                         max_retries, "stop"
@@ -261,6 +283,7 @@ def retry_sync(
             ),  # Use the optimized function instead of nested lookups
             create_kwargs=kwargs,
             total_usage=total_usage,
+            failed_attempts=failed_attempts,
         ) from e
 
 
@@ -304,6 +327,9 @@ async def retry_async(
     # Pre-extract stream flag to avoid repeated lookup
     stream = kwargs.get("stream", False)
 
+    # Track all failed attempts
+    failed_attempts: list[FailedAttempt] = []
+
     try:
         response = None
         async for attempt in max_retries:
@@ -333,6 +359,15 @@ async def retry_async(
                     logger.debug(f"Parse error: {e}")
                     hooks.emit_parse_error(e)
 
+                    # Track this failed attempt
+                    failed_attempts.append(
+                        FailedAttempt(
+                            attempt_number=attempt.retry_state.attempt_number,
+                            exception=e,
+                            completion=response,
+                        )
+                    )
+
                     # Check if this is the last attempt
                     if isinstance(max_retries, AsyncRetrying) and hasattr(
                         max_retries, "stop"
@@ -357,6 +392,7 @@ async def retry_async(
                         mode=mode,
                         response=response,
                         exception=e,
+                        failed_attempts=failed_attempts,
                     )
                     raise e
                 except Exception as e:
@@ -364,6 +400,15 @@ async def retry_async(
                     logger.debug(f"Completion error: {e}")
                     hooks.emit_completion_error(e)
 
+                    # Track this failed attempt
+                    failed_attempts.append(
+                        FailedAttempt(
+                            attempt_number=attempt.retry_state.attempt_number,
+                            exception=e,
+                            completion=response,
+                        )
+                    )
+
                     # Check if this is the last attempt for completion errors
                     if isinstance(max_retries, AsyncRetrying) and hasattr(
                         max_retries, "stop"
@@ -394,4 +439,5 @@ async def retry_async(
             ),  # Use the optimized function instead of nested lookups
             create_kwargs=kwargs,
             total_usage=total_usage,
+            failed_attempts=failed_attempts,
         ) from e
diff --git a/instructor/dsl/__init__.py b/instructor/dsl/__init__.py
index 04982fc54..838f4473c 100644
--- a/instructor/dsl/__init__.py
+++ b/instructor/dsl/__init__.py
@@ -3,6 +3,7 @@
 from .partial import Partial
 from .citation import CitationMixin
 from .simple_type import is_simple_type, ModelAdapter
+from . import validators  # Backwards compatibility module
 
 __all__ = [  # noqa: F405
     "CitationMixin",
@@ -11,4 +12,5 @@
     "Partial",
     "is_simple_type",
     "ModelAdapter",
+    "validators",
 ]
diff --git a/instructor/dsl/validators.py b/instructor/dsl/validators.py
new file mode 100644
index 000000000..9bf49f254
--- /dev/null
+++ b/instructor/dsl/validators.py
@@ -0,0 +1,20 @@
+"""Backwards compatibility module for instructor.dsl.validators.
+
+This module provides lazy imports to avoid circular import issues.
+"""
+
+
+def __getattr__(name: str):
+    """Lazy import to avoid circular dependencies."""
+    from ..processing import validators as processing_validators
+    from .. import validation
+
+    # Try processing.validators first
+    if hasattr(processing_validators, name):
+        return getattr(processing_validators, name)
+
+    # Then try validation module
+    if hasattr(validation, name):
+        return getattr(validation, name)
+
+    raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
diff --git a/instructor/function_calls.py b/instructor/function_calls.py
new file mode 100644
index 000000000..a4100f720
--- /dev/null
+++ b/instructor/function_calls.py
@@ -0,0 +1,8 @@
+"""Backwards compatibility module for instructor.function_calls.
+
+This module re-exports everything from instructor.processing.function_calls
+for backwards compatibility.
+"""
+
+# Re-export everything from the actual function_calls module
+from .processing.function_calls import *  # noqa: F401, F403
diff --git a/instructor/hooks.py b/instructor/hooks.py
new file mode 100644
index 000000000..0969dcb3f
--- /dev/null
+++ b/instructor/hooks.py
@@ -0,0 +1,25 @@
+"""Backwards compatibility module for instructor.hooks.
+
+This module provides lazy imports to maintain backwards compatibility.
+"""
+
+import warnings
+
+
+def __getattr__(name: str):
+    """Lazy import to provide backward compatibility for hooks imports."""
+    warnings.warn(
+        f"Importing from 'instructor.hooks' is deprecated and will be removed in v2.0.0. "
+        f"Please update your imports to use 'instructor.core.hooks.{name}' instead:\n"
+        "  from instructor.core.hooks import Hooks, HookName",
+        DeprecationWarning,
+        stacklevel=2
+    )
+    
+    from .core import hooks as core_hooks
+    
+    # Try to get the attribute from the core.hooks module
+    if hasattr(core_hooks, name):
+        return getattr(core_hooks, name)
+    
+    raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
\ No newline at end of file
diff --git a/instructor/multimodal.py b/instructor/multimodal.py
new file mode 100644
index 000000000..6943c00d9
--- /dev/null
+++ b/instructor/multimodal.py
@@ -0,0 +1,26 @@
+"""Backwards compatibility module for instructor.multimodal.
+
+This module provides lazy imports to maintain backwards compatibility.
+"""
+
+import warnings
+
+
+def __getattr__(name: str):
+    """Lazy import to provide backward compatibility for multimodal imports."""
+    # Issue deprecation warning when accessing multimodal imports
+    warnings.warn(
+        "Importing from 'instructor.multimodal' is deprecated and will be removed in v2.0.0. "
+        f"Please update your imports to use 'instructor.processing.multimodal.{name}' instead:\n"
+        "  from instructor.processing.multimodal import PDF, Image, Audio",
+        DeprecationWarning,
+        stacklevel=2
+    )
+    
+    from .processing import multimodal as processing_multimodal
+    
+    # Try to get the attribute from the processing.multimodal module
+    if hasattr(processing_multimodal, name):
+        return getattr(processing_multimodal, name)
+    
+    raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
\ No newline at end of file
diff --git a/instructor/patch.py b/instructor/patch.py
new file mode 100644
index 000000000..cf03fac4b
--- /dev/null
+++ b/instructor/patch.py
@@ -0,0 +1,25 @@
+"""Backwards compatibility module for instructor.patch.
+
+This module provides lazy imports to maintain backwards compatibility.
+"""
+
+import warnings
+
+
+def __getattr__(name: str):
+    """Lazy import to provide backward compatibility for patch imports."""
+    warnings.warn(
+        f"Importing from 'instructor.patch' is deprecated and will be removed in v2.0.0. "
+        f"Please update your imports to use 'instructor.core.patch.{name}' instead:\n"
+        "  from instructor.core.patch import patch, apatch",
+        DeprecationWarning,
+        stacklevel=2
+    )
+    
+    from .core import patch as core_patch
+    
+    # Try to get the attribute from the core.patch module
+    if hasattr(core_patch, name):
+        return getattr(core_patch, name)
+    
+    raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
\ No newline at end of file
diff --git a/instructor/process_response.py b/instructor/process_response.py
new file mode 100644
index 000000000..49201b13c
--- /dev/null
+++ b/instructor/process_response.py
@@ -0,0 +1,25 @@
+"""Backwards compatibility module for instructor.process_response.
+
+This module provides lazy imports to maintain backwards compatibility.
+"""
+
+import warnings
+
+
+def __getattr__(name: str):
+    """Lazy import to provide backward compatibility for process_response imports."""
+    warnings.warn(
+        f"Importing from 'instructor.process_response' is deprecated and will be removed in v2.0.0. "
+        f"Please update your imports to use 'instructor.processing.response.{name}' instead:\n"
+        "  from instructor.processing.response import process_response",
+        DeprecationWarning,
+        stacklevel=2
+    )
+    
+    from .processing import response as processing_response
+    
+    # Try to get the attribute from the processing.response module
+    if hasattr(processing_response, name):
+        return getattr(processing_response, name)
+    
+    raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
\ No newline at end of file
diff --git a/instructor/processing/response.py b/instructor/processing/response.py
index dfbcc6d11..ee4fbae7f 100644
--- a/instructor/processing/response.py
+++ b/instructor/processing/response.py
@@ -43,6 +43,8 @@ class User(BaseModel):
 from pydantic import BaseModel
 from typing_extensions import ParamSpec
 
+from instructor.core.exceptions import InstructorError
+
 from ..dsl.iterable import IterableBase
 from ..dsl.parallel import ParallelBase
 from ..dsl.partial import PartialBase
@@ -491,55 +493,113 @@ def handle_reask_kwargs(
     mode: Mode,
     response: Any,
     exception: Exception,
+    failed_attempts: list[Any] | None = None,
 ) -> dict[str, Any]:
     """Handle validation errors by reformatting the request for retry (reask).
 
-    When a response fails validation (e.g., missing required fields, wrong types),
-    this function prepares a new request that includes information about the error.
-    This allows the LLM to understand what went wrong and correct its response.
+    This function serves as the central dispatcher for handling validation failures
+    across all supported LLM providers. When a response fails validation, it prepares
+    a new request that includes detailed error information and retry context, allowing
+    the LLM to understand what went wrong and generate a corrected response.
 
-    The reask logic is provider-specific because each provider has different ways
-    of handling function/tool calls and different message formats.
+    The reask process involves:
+    1. Analyzing the validation error and failed response
+    2. Selecting the appropriate provider-specific reask handler
+    3. Enriching the exception with retry history (failed_attempts)
+    4. Formatting error feedback in the provider's expected message format
+    5. Preserving original request parameters while adding retry context
 
     Args:
         kwargs (dict[str, Any]): The original request parameters that resulted in
-            a validation error. Includes messages, tools, temperature, etc.
+            a validation error. Contains all parameters passed to the LLM API:
+            - messages: conversation history
+            - tools/functions: available function definitions
+            - temperature, max_tokens: generation parameters
+            - model, provider-specific settings
         mode (Mode): The provider/format mode that determines which reask handler
-            to use. Each mode has a specific strategy for formatting error feedback.
+            to use. Each mode implements a specific strategy for formatting error
+            feedback and retry messages. Examples:
+            - Mode.TOOLS: OpenAI function calling
+            - Mode.ANTHROPIC_TOOLS: Anthropic tool use
+            - Mode.JSON: JSON-only responses
         response (Any): The raw response from the LLM that failed validation.
-            Type varies by provider:
-            - OpenAI: ChatCompletion with tool_calls
-            - Anthropic: Message with tool_use blocks
+            Type and structure varies by provider:
+            - OpenAI: ChatCompletion with tool_calls or content
+            - Anthropic: Message with tool_use blocks or text content
             - Google: GenerateContentResponse with function calls
-        exception (Exception): The validation error that occurred. Usually a
-            Pydantic ValidationError with details about which fields failed.
+            - Cohere: NonStreamedChatResponse with tool calls
+        exception (Exception): The validation error that occurred, typically:
+            - Pydantic ValidationError: field validation failures
+            - JSONDecodeError: malformed JSON responses
+            - Custom validation errors from response processors
+            The exception will be enriched with failed_attempts data.
+        failed_attempts (list[FailedAttempt] | None): Historical record of previous
+            retry attempts for this request. Each FailedAttempt contains:
+            - attempt_number: sequential attempt counter
+            - exception: the validation error for that attempt
+            - completion: the raw LLM response that failed
+            Used to provide retry context and prevent repeated mistakes.
 
     Returns:
-        dict[str, Any]: Modified kwargs for the retry request, typically including:
-            - Updated messages with error context
-            - Same tool/function definitions
-            - Preserved generation parameters
-            - Provider-specific formatting
-
-    Reask Strategies by Provider:
-        Each provider has a specific strategy for handling retries:
-
-        **JSON Modes:**
-        - Adds assistant message with failed attempt
-        - Adds user message with error details
-
-        **Tool Calls:**
-        - Preserves tool definitions
-        - Formats the errors as tool calls responses
+        dict[str, Any]: Modified kwargs for the retry request with:
+            - Updated messages including error feedback
+            - Original tool/function definitions preserved
+            - Generation parameters maintained (temperature, etc.)
+            - Provider-specific error formatting applied
+            - Retry context embedded in appropriate message format
+
+    Provider-Specific Reask Strategies:
+        **OpenAI Modes:**
+        - TOOLS/FUNCTIONS: Adds tool response messages with validation errors
+        - JSON modes: Appends user message with correction instructions
+        - Preserves function schemas and conversation context
+
+        **Anthropic Modes:**
+        - TOOLS: Creates tool_result blocks with error details
+        - JSON: Adds user message with structured error feedback
+        - Maintains conversation flow with proper message roles
+
+        **Google/Gemini Modes:**
+        - TOOLS: Formats as function response with error content
+        - JSON: Appends user message with validation feedback
+
+        **Other Providers (Cohere, Mistral, etc.):**
+        - Provider-specific message formatting
+        - Consistent error reporting patterns
+        - Maintained conversation context
+
+    Error Enrichment:
+        The exception parameter is enriched with retry metadata:
+        - exception.failed_attempts: list of previous failures
+        - exception.retry_attempt_number: current attempt number
+        This allows downstream handlers to access full retry context.
+
+    Example:
+        ```python
+        # After a ValidationError occurs during retry attempt #2
+        new_kwargs = handle_reask_kwargs(
+            kwargs=original_request,
+            mode=Mode.TOOLS,
+            response=failed_completion,
+            exception=validation_error,  # Will be enriched with failed_attempts
+            failed_attempts=[attempt1, attempt2]  # Previous failures
+        )
+        # new_kwargs now contains retry messages with error context
+        ```
 
     Note:
-        This function is typically called internally by the retry logic when
-        max_retries > 1. It ensures that each retry attempt includes context
-        about previous failures, helping the LLM learn from its mistakes.
+        This function is called internally by retry_sync() and retry_async()
+        when max_retries > 1. It ensures each retry includes progressively
+        more context about previous failures, helping the LLM learn from
+        mistakes and avoid repeating the same errors.
     """
     # Create a shallow copy of kwargs to avoid modifying the original
     kwargs_copy = kwargs.copy()
 
+    exception = InstructorError.from_exception(
+        exception, failed_attempts=failed_attempts
+    )
+
     # Organized by provider (matching process_response.py structure)
     REASK_HANDLERS = {
         # OpenAI modes
diff --git a/instructor/providers/openai/utils.py b/instructor/providers/openai/utils.py
index a36c14951..a32a27946 100644
--- a/instructor/providers/openai/utils.py
+++ b/instructor/providers/openai/utils.py
@@ -23,6 +23,7 @@ def reask_tools(
     kwargs: dict[str, Any],
     response: Any,
     exception: Exception,
+    failed_attempts: list[Any] | None = None,  # noqa: ARG001
 ):
     """
     Handle reask for OpenAI tools mode when validation fails.
@@ -51,6 +52,7 @@ def reask_responses_tools(
     kwargs: dict[str, Any],
     response: Any,
     exception: Exception,
+    failed_attempts: list[Any] | None = None,  # noqa: ARG001
 ):
     """
     Handle reask for OpenAI responses tools mode when validation fails.
@@ -79,6 +81,7 @@ def reask_md_json(
     kwargs: dict[str, Any],
     response: Any,
     exception: Exception,
+    failed_attempts: list[Any] | None = None,  # noqa: ARG001
 ):
     """
     Handle reask for OpenAI JSON modes when validation fails.
@@ -88,6 +91,7 @@ def reask_md_json(
     """
     kwargs = kwargs.copy()
     reask_msgs = [dump_message(response.choices[0].message)]
+
     reask_msgs.append(
         {
             "role": "user",
@@ -102,6 +106,7 @@ def reask_default(
     kwargs: dict[str, Any],
     response: Any,
     exception: Exception,
+    failed_attempts: list[Any] | None = None,  # noqa: ARG001
 ):
     """
     Handle reask for OpenAI default mode when validation fails.
@@ -111,6 +116,7 @@ def reask_default(
     """
     kwargs = kwargs.copy()
     reask_msgs = [dump_message(response.choices[0].message)]
+
     reask_msgs.append(
         {
             "role": "user",
diff --git a/instructor/validators.py b/instructor/validators.py
new file mode 100644
index 000000000..f4119494b
--- /dev/null
+++ b/instructor/validators.py
@@ -0,0 +1,30 @@
+"""Backwards compatibility module for instructor.validators.
+
+This module provides lazy imports to maintain backwards compatibility.
+"""
+
+import warnings
+
+
+def __getattr__(name: str):
+    """Lazy import to provide backward compatibility for validators imports."""
+    warnings.warn(
+        f"Importing from 'instructor.validators' is deprecated and will be removed in v2.0.0. "
+        f"Please update your imports to use the new location:\n"
+        "  from instructor.validation import llm_validator, openai_moderation",
+        DeprecationWarning,
+        stacklevel=2
+    )
+    
+    from . import validation
+    from .processing import validators as processing_validators
+    
+    # Try validation module first
+    if hasattr(validation, name):
+        return getattr(validation, name)
+        
+    # Then try processing.validators
+    if hasattr(processing_validators, name):
+        return getattr(processing_validators, name)
+    
+    raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
\ No newline at end of file
diff --git a/mkdocs.yml b/mkdocs.yml
index 64b2bb50c..19e18027b 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -340,6 +340,21 @@ nav:
   - Blog:
       - "blog/index.md"
 plugins:
+  - llmstxt:
+      markdown_description: >
+        Instructor is a Python library that makes it easy to work with structured outputs 
+        from large language models (LLMs). Built on top of Pydantic, it provides a simple, 
+        type-safe way to extract structured data from LLM responses across multiple providers 
+        including OpenAI, Anthropic, Google, and many others.
+      sections:
+        Getting Started:
+          - index.md: Introduction to structured outputs with LLMs
+          - getting-started.md: Quick start guide
+          - installation.md: Installation instructions
+        Core Concepts:
+          - concepts/*.md
+        Integrations:
+          - integrations/*.md
   - redirects:
       redirect_maps:
          jobs.md: https://jobs.applied-llms.org/
diff --git a/pyproject.toml b/pyproject.toml
index 849c74efe..b6abf207b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,7 +17,7 @@ dependencies = [
     "diskcache>=5.6.3",
 ]
 name = "instructor"
-version = "1.11.2"
+version = "1.11.3"
 description = "structured outputs for llm"
 readme = "README.md"
 
@@ -123,6 +123,7 @@ dev = [
     "python-dotenv>=1.0.1",
     "pytest-xdist>=3.8.0",
     "pre-commit>=4.2.0",
+    "mkdocs-llmstxt>=0.3.1",
 ]
 docs = [
     "mkdocs<2.0.0,>=1.4.3",
@@ -135,6 +136,7 @@ docs = [
     "mkdocs-minify-plugin<1.0.0,>=0.8.0",
     "mkdocs-redirects<2.0.0,>=1.2.1",
     "mkdocs-material-extensions>=1.3.1",
+    "mkdocs-llmstxt>=0.3.1",
     "material>=0.1",
     "cairosvg>=2.7.1",
     "pillow>=10.4.0",
diff --git a/requirements-doc.txt b/requirements-doc.txt
index 0f373230f..519f48af9 100644
--- a/requirements-doc.txt
+++ b/requirements-doc.txt
@@ -5,4 +5,5 @@ mkdocs-minify-plugin
 mkdocstrings 
 mkdocstrings-python 
 mkdocs-jupyter 
-mkdocs-redirects
\ No newline at end of file
+mkdocs-redirects
+mkdocs-llmstxt
\ No newline at end of file
diff --git a/tests/test_exceptions.py b/tests/test_exceptions.py
index f0f47cee7..6caa86851 100644
--- a/tests/test_exceptions.py
+++ b/tests/test_exceptions.py
@@ -1,6 +1,7 @@
 """Test that all instructor exceptions can be imported and caught properly."""
 
 import pytest
+from json import JSONDecodeError
 from instructor.core.exceptions import (
     InstructorError,
     IncompleteOutputException,
@@ -10,6 +11,7 @@
     ConfigurationError,
     ModeError,
     ClientError,
+    FailedAttempt,
 )
 
 
@@ -220,3 +222,369 @@ def test_exception_import_from_instructor():
         raise ImportedError("test error")
     except InstructorError as e:
         assert str(e) == "test error"
+
+
+def test_instructor_error_from_exception():
+    """Test InstructorError.from_exception() class method."""
+    # Test with basic exception
+    original_exception = ValueError("Original error message")
+    instructor_error = InstructorError.from_exception(original_exception)
+
+    assert isinstance(instructor_error, InstructorError)
+    assert str(instructor_error) == "Original error message"
+    assert instructor_error.failed_attempts is None
+
+    # Test with failed attempts
+    failed_attempts = [
+        FailedAttempt(1, Exception("First failure"), "partial completion"),
+        FailedAttempt(2, Exception("Second failure"), None),
+    ]
+    instructor_error_with_attempts = InstructorError.from_exception(
+        original_exception, failed_attempts=failed_attempts
+    )
+
+    assert isinstance(instructor_error_with_attempts, InstructorError)
+    assert instructor_error_with_attempts.failed_attempts == failed_attempts
+
+    # Test with different exception types
+    runtime_error = RuntimeError("Runtime issue")
+    instructor_error_runtime = InstructorError.from_exception(runtime_error)
+    assert str(instructor_error_runtime) == "Runtime issue"
+
+
+def test_instructor_error_str_with_no_failed_attempts():
+    """Test InstructorError.__str__() with no failed attempts."""
+    error = InstructorError("Simple error message")
+    assert str(error) == "Simple error message"
+
+    error_with_args = InstructorError("Error", "with", "multiple", "args")
+    assert "Error" in str(error_with_args)
+
+
+def test_instructor_error_str_with_failed_attempts():
+    """Test InstructorError.__str__() XML template rendering with failed attempts."""
+    # Create failed attempts
+    failed_attempts = [
+        FailedAttempt(1, ValueError("Validation failed"), "incomplete response"),
+        FailedAttempt(2, KeyError("Missing key"), {"partial": "data"}),
+        FailedAttempt(3, RuntimeError("Process failed"), None),
+    ]
+
+    error = InstructorError("Final error message", failed_attempts=failed_attempts)
+    error_str = str(error)
+
+    # Check that XML structure is present
+    assert "<failed_attempts>" in error_str
+    assert "</failed_attempts>" in error_str
+    assert "<last_exception>" in error_str
+    assert "</last_exception>" in error_str
+
+    # Check that all attempts are included
+    assert 'number="1"' in error_str
+    assert 'number="2"' in error_str
+    assert 'number="3"' in error_str
+
+    # Check that exceptions are included
+    assert "Validation failed" in error_str
+    assert "Missing key" in error_str
+    assert "Process failed" in error_str
+
+    # Check that completions are included
+    assert "incomplete response" in error_str
+    assert "partial" in error_str
+
+    # Check that final exception is included
+    assert "Final error message" in error_str
+
+
+def test_instructor_error_str_xml_structure():
+    """Test detailed XML structure of __str__() output."""
+    failed_attempts = [FailedAttempt(1, Exception("Test error"), "test completion")]
+
+    error = InstructorError("Last error", failed_attempts=failed_attempts)
+    error_str = str(error)
+
+    # Check proper XML nesting
+    lines = error_str.strip().split("\n")
+
+    # Find key XML elements
+    failed_attempts_start = next(
+        i for i, line in enumerate(lines) if "<failed_attempts>" in line
+    )
+    generation_start = next(
+        i for i, line in enumerate(lines) if '<generation number="1">' in line
+    )
+    exception_start = next(i for i, line in enumerate(lines) if "<exception>" in line)
+    completion_start = next(i for i, line in enumerate(lines) if "<completion>" in line)
+
+    # Verify proper nesting order
+    assert failed_attempts_start < generation_start < exception_start < completion_start
+
+
+def test_failed_attempt_namedtuple():
+    """Test FailedAttempt NamedTuple functionality."""
+    # Test with all fields
+    attempt = FailedAttempt(1, Exception("Test error"), "completion data")
+    assert attempt.attempt_number == 1
+    assert str(attempt.exception) == "Test error"
+    assert attempt.completion == "completion data"
+
+    # Test with None completion (default)
+    attempt_no_completion = FailedAttempt(2, ValueError("Another error"))
+    assert attempt_no_completion.attempt_number == 2
+    assert isinstance(attempt_no_completion.exception, ValueError)
+    assert attempt_no_completion.completion is None
+
+    # Test immutability
+    with pytest.raises(AttributeError):
+        attempt.attempt_number = 5
+
+
+def test_instructor_error_failed_attempts_attribute():
+    """Test that failed_attempts attribute is properly handled."""
+    # Test default None
+    error = InstructorError("Test error")
+    assert error.failed_attempts is None
+
+    # Test explicit None
+    error_explicit = InstructorError("Test error", failed_attempts=None)
+    assert error_explicit.failed_attempts is None
+
+    # Test with actual failed attempts
+    attempts = [FailedAttempt(1, Exception("Error"), None)]
+    error_with_attempts = InstructorError("Test error", failed_attempts=attempts)
+    assert error_with_attempts.failed_attempts == attempts
+
+
+def test_instructor_retry_exception_with_failed_attempts():
+    """Test InstructorRetryException inherits failed_attempts functionality."""
+    failed_attempts = [
+        FailedAttempt(1, Exception("First error"), "first completion"),
+        FailedAttempt(2, Exception("Second error"), "second completion"),
+    ]
+
+    retry_exception = InstructorRetryException(
+        "Retry exhausted",
+        n_attempts=3,
+        total_usage=100,
+        failed_attempts=failed_attempts,
+    )
+
+    # Check that it inherits the XML formatting
+    error_str = str(retry_exception)
+    assert "<failed_attempts>" in error_str
+    assert "First error" in error_str
+    assert "Second error" in error_str
+    assert "first completion" in error_str
+    assert "second completion" in error_str
+
+
+def test_multiple_exception_types_with_failed_attempts():
+    """Test that various exception types work with failed attempts."""
+    failed_attempts = [FailedAttempt(1, Exception("Test"), None)]
+
+    # Test various exception types can be created with failed attempts
+    validation_error = ValidationError(
+        "Validation failed", failed_attempts=failed_attempts
+    )
+    assert validation_error.failed_attempts == failed_attempts
+
+    provider_error = ProviderError(
+        "openai", "API error", failed_attempts=failed_attempts
+    )
+    assert provider_error.failed_attempts == failed_attempts
+
+    config_error = ConfigurationError("Config error", failed_attempts=failed_attempts)
+    assert config_error.failed_attempts == failed_attempts
+
+
+def test_failed_attempts_propagation_through_retry_cycles():
+    """Test that failed attempts accumulate and propagate correctly through retry cycles."""
+    # Simulate multiple retry attempts with different exceptions
+    attempt1 = FailedAttempt(1, ValidationError("Invalid format"), "partial response 1")
+    attempt2 = FailedAttempt(2, KeyError("missing_field"), "partial response 2")
+    attempt3 = FailedAttempt(3, ValueError("invalid value"), "partial response 3")
+
+    failed_attempts = [attempt1, attempt2, attempt3]
+
+    # Create final retry exception with accumulated failed attempts
+    final_exception = InstructorRetryException(
+        "All retries exhausted",
+        n_attempts=3,
+        total_usage=250,
+        failed_attempts=failed_attempts,
+    )
+
+    # Verify failed attempts are properly stored
+    assert final_exception.failed_attempts == failed_attempts
+    assert len(final_exception.failed_attempts) == 3
+
+    # Verify attempt numbers are sequential
+    attempt_numbers = [
+        attempt.attempt_number for attempt in final_exception.failed_attempts
+    ]
+    assert attempt_numbers == [1, 2, 3]
+
+    # Verify each attempt has different exceptions
+    exception_types = [
+        type(attempt.exception).__name__ for attempt in final_exception.failed_attempts
+    ]
+    assert exception_types == ["ValidationError", "KeyError", "ValueError"]
+
+    # Verify completions are preserved
+    completions = [attempt.completion for attempt in final_exception.failed_attempts]
+    assert completions == [
+        "partial response 1",
+        "partial response 2",
+        "partial response 3",
+    ]
+
+
+def test_failed_attempts_propagation_in_exception_hierarchy():
+    """Test that failed attempts propagate correctly through exception inheritance."""
+    # Test base class propagation
+    base_failed_attempts = [FailedAttempt(1, Exception("Base error"), None)]
+    base_error = InstructorError("Base error", failed_attempts=base_failed_attempts)
+
+    # Convert to more specific exception type using from_exception
+    specific_error = ValidationError.from_exception(
+        base_error, failed_attempts=base_failed_attempts
+    )
+    assert isinstance(specific_error, ValidationError)
+    assert isinstance(specific_error, InstructorError)  # Should still inherit from base
+    assert specific_error.failed_attempts == base_failed_attempts
+
+    # Test that derived exceptions maintain failed attempts
+    retry_failed_attempts = [
+        FailedAttempt(1, Exception("Retry 1"), "completion 1"),
+        FailedAttempt(2, Exception("Retry 2"), "completion 2"),
+    ]
+    retry_error = InstructorRetryException(
+        "Retries failed",
+        n_attempts=2,
+        total_usage=100,
+        failed_attempts=retry_failed_attempts,
+    )
+
+    # Convert to base type should preserve failed attempts
+    base_from_retry = InstructorError.from_exception(
+        retry_error, failed_attempts=retry_failed_attempts
+    )
+    assert base_from_retry.failed_attempts == retry_failed_attempts
+
+
+def test_failed_attempts_accumulation_simulation():
+    """Test simulation of how failed attempts would accumulate in a real retry scenario."""
+    # Simulate a retry scenario where attempts accumulate
+    attempts = []
+
+    # First attempt fails
+    attempts.append(
+        FailedAttempt(
+            1, ValidationError("Schema validation failed"), {"invalid": "data"}
+        )
+    )
+
+    # Second attempt fails differently
+    attempts.append(
+        FailedAttempt(2, JSONDecodeError("Invalid JSON", "", 0), "malformed json")
+    )
+
+    # Third attempt fails again
+    attempts.append(
+        FailedAttempt(
+            3, ValidationError("Required field missing"), {"partial": "response"}
+        )
+    )
+
+    # Final retry exception with all attempts
+    final_error = InstructorRetryException(
+        "Maximum retries exceeded",
+        n_attempts=3,
+        total_usage=500,
+        failed_attempts=attempts,
+        last_completion={"final": "attempt"},
+        messages=[{"role": "user", "content": "test"}],
+        create_kwargs={"model": "gpt-3.5-turbo", "max_retries": 3},
+    )
+
+    # Verify all data is preserved
+    assert final_error.n_attempts == 3
+    assert final_error.total_usage == 500
+    assert len(final_error.failed_attempts) == 3
+    assert final_error.last_completion == {"final": "attempt"}
+
+    # Test string representation includes all attempts
+    error_str = str(final_error)
+    assert "<failed_attempts>" in error_str
+    assert "Schema validation failed" in error_str
+    assert "Invalid JSON" in error_str
+    assert "Required field missing" in error_str
+    assert "Maximum retries exceeded" in error_str
+
+    # Verify attempt sequence integrity
+    for i, attempt in enumerate(final_error.failed_attempts, 1):
+        assert attempt.attempt_number == i
+
+
+def test_failed_attempts_with_empty_and_none_completions():
+    """Test failed attempts handle various completion states correctly."""
+    # Test with None completion
+    attempt_none = FailedAttempt(1, Exception("Error with None"), None)
+    assert attempt_none.completion is None
+
+    # Test with empty string completion
+    attempt_empty = FailedAttempt(2, Exception("Error with empty"), "")
+    assert attempt_empty.completion == ""
+
+    # Test with empty dict completion
+    attempt_empty_dict = FailedAttempt(3, Exception("Error with empty dict"), {})
+    assert attempt_empty_dict.completion == {}
+
+    # Test with complex completion
+    complex_completion = {
+        "choices": [{"message": {"content": "partial"}}],
+        "usage": {"total_tokens": 50},
+    }
+    attempt_complex = FailedAttempt(
+        4, Exception("Error with complex"), complex_completion
+    )
+    assert attempt_complex.completion == complex_completion
+
+    # Create error with mixed completion types
+    mixed_attempts = [attempt_none, attempt_empty, attempt_empty_dict, attempt_complex]
+    error = InstructorError("Mixed completions", failed_attempts=mixed_attempts)
+
+    # Verify XML rendering handles all types
+    error_str = str(error)
+    assert "<completion>" in error_str
+    assert "</completion>" in error_str
+    # Should handle None, empty string, empty dict, and complex objects
+    assert error_str.count("<completion>") == 4
+
+
+def test_failed_attempts_exception_chaining():
+    """Test that exception chaining works properly with failed attempts."""
+    # Create original exception with failed attempts
+    original_attempts = [
+        FailedAttempt(1, Exception("Original failure"), "original completion")
+    ]
+    original_error = InstructorError(
+        "Original error", failed_attempts=original_attempts
+    )
+
+    try:
+        raise original_error
+    except InstructorError as e:
+        # Create new exception from caught exception, preserving failed attempts
+        chained_error = InstructorRetryException(
+            "Chained error",
+            n_attempts=2,
+            total_usage=150,
+            failed_attempts=e.failed_attempts,
+        )
+
+        # Verify failed attempts are preserved through chaining
+        assert chained_error.failed_attempts == original_attempts
+        assert len(chained_error.failed_attempts) == 1
+        assert chained_error.failed_attempts[0].exception.args[0] == "Original failure"
diff --git a/uv.lock b/uv.lock
index 59f7bf6ce..b2c240adf 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1753,7 +1753,7 @@ wheels = [
 
 [[package]]
 name = "instructor"
-version = "1.11.2"
+version = "1.11.3"
 source = { editable = "." }
 dependencies = [
     { name = "aiohttp" },
@@ -1883,6 +1883,7 @@ cohere = [
 dev = [
     { name = "coverage" },
     { name = "jsonref" },
+    { name = "mkdocs-llmstxt" },
     { name = "pre-commit" },
     { name = "pyright" },
     { name = "pytest" },
@@ -1896,6 +1897,7 @@ docs = [
     { name = "material" },
     { name = "mkdocs" },
     { name = "mkdocs-jupyter" },
+    { name = "mkdocs-llmstxt" },
     { name = "mkdocs-material", extra = ["imaging"] },
     { name = "mkdocs-material-extensions" },
     { name = "mkdocs-minify-plugin" },
@@ -2042,6 +2044,7 @@ cohere = [{ name = "cohere", specifier = ">=5.1.8,<6.0.0" }]
 dev = [
     { name = "coverage", specifier = ">=7.3.2,<8.0.0" },
     { name = "jsonref", specifier = ">=1.1.0,<2.0.0" },
+    { name = "mkdocs-llmstxt", specifier = ">=0.3.1" },
     { name = "pre-commit", specifier = ">=4.2.0" },
     { name = "pyright", specifier = "<2.0.0" },
     { name = "pytest", specifier = ">=8.3.3,<9.0.0" },
@@ -2055,6 +2058,7 @@ docs = [
     { name = "material", specifier = ">=0.1" },
     { name = "mkdocs", specifier = ">=1.4.3,<2.0.0" },
     { name = "mkdocs-jupyter", specifier = ">=0.24.6,<0.26.0" },
+    { name = "mkdocs-llmstxt", specifier = ">=0.3.1" },
     { name = "mkdocs-material", extras = ["imaging"], specifier = ">=9.5.9,<10.0.0" },
     { name = "mkdocs-material-extensions", specifier = ">=1.3.1" },
     { name = "mkdocs-minify-plugin", specifier = ">=0.8.0,<1.0.0" },
@@ -2438,8 +2442,7 @@ name = "jupytext"
 version = "1.17.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "markdown-it-py", version = "3.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
-    { name = "markdown-it-py", version = "4.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
+    { name = "markdown-it-py" },
     { name = "mdit-py-plugins", version = "0.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
     { name = "mdit-py-plugins", version = "0.5.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
     { name = "nbformat" },
@@ -2619,11 +2622,8 @@ wheels = [
 name = "markdown-it-py"
 version = "3.0.0"
 source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version < '3.10'",
-]
 dependencies = [
-    { name = "mdurl", marker = "python_full_version < '3.10'" },
+    { name = "mdurl" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/38/71/3b932df36c1a044d397a1f92d1cf91ee0a503d91e470cbd670aa66b07ed0/markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb", size = 74596, upload-time = "2023-06-03T06:41:14.443Z" }
 wheels = [
@@ -2631,21 +2631,16 @@ wheels = [
 ]
 
 [[package]]
-name = "markdown-it-py"
-version = "4.0.0"
+name = "markdownify"
+version = "1.2.0"
 source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version >= '3.13'",
-    "python_full_version == '3.12.*'",
-    "python_full_version == '3.11.*'",
-    "python_full_version == '3.10.*'",
-]
 dependencies = [
-    { name = "mdurl", marker = "python_full_version >= '3.10'" },
+    { name = "beautifulsoup4" },
+    { name = "six" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/5b/f5/4ec618ed16cc4f8fb3b701563655a69816155e79e24a17b651541804721d/markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3", size = 73070, upload-time = "2025-08-11T12:57:52.854Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/83/1b/6f2697b51eaca81f08852fd2734745af15718fea10222a1d40f8a239c4ea/markdownify-1.2.0.tar.gz", hash = "sha256:f6c367c54eb24ee953921804dfe6d6575c5e5b42c643955e7242034435de634c", size = 18771, upload-time = "2025-08-09T17:44:15.302Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147", size = 87321, upload-time = "2025-08-11T12:57:51.923Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/e2/7af643acb4cae0741dffffaa7f3f7c9e7ab4046724543ba1777c401d821c/markdownify-1.2.0-py3-none-any.whl", hash = "sha256:48e150a1c4993d4d50f282f725c0111bd9eb25645d41fa2f543708fd44161351", size = 15561, upload-time = "2025-08-09T17:44:14.074Z" },
 ]
 
 [[package]]
@@ -2734,6 +2729,33 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/8f/8e/9ad090d3553c280a8060fbf6e24dc1c0c29704ee7d1c372f0c174aa59285/matplotlib_inline-0.1.7-py3-none-any.whl", hash = "sha256:df192d39a4ff8f21b1895d72e6a13f5fcc5099f00fa84384e0ea28c2cc0653ca", size = 9899, upload-time = "2024-04-15T13:44:43.265Z" },
 ]
 
+[[package]]
+name = "mdformat"
+version = "0.7.22"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "importlib-metadata", marker = "python_full_version < '3.10'" },
+    { name = "markdown-it-py" },
+    { name = "tomli", marker = "python_full_version < '3.11'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/fc/eb/b5cbf2484411af039a3d4aeb53a5160fae25dd8c84af6a4243bc2f3fedb3/mdformat-0.7.22.tar.gz", hash = "sha256:eef84fa8f233d3162734683c2a8a6222227a229b9206872e6139658d99acb1ea", size = 34610, upload-time = "2025-01-30T18:00:51.418Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f2/6f/94a7344f6d634fe3563bea8b33bccedee37f2726f7807e9a58440dc91627/mdformat-0.7.22-py3-none-any.whl", hash = "sha256:61122637c9e1d9be1329054f3fa216559f0d1f722b7919b060a8c2a4ae1850e5", size = 34447, upload-time = "2025-01-30T18:00:48.708Z" },
+]
+
+[[package]]
+name = "mdformat-tables"
+version = "1.0.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "mdformat" },
+    { name = "wcwidth" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/64/fc/995ba209096bdebdeb8893d507c7b32b7e07d9a9f2cdc2ec07529947794b/mdformat_tables-1.0.0.tar.gz", hash = "sha256:a57db1ac17c4a125da794ef45539904bb8a9592e80557d525e1f169c96daa2c8", size = 6106, upload-time = "2024-08-23T23:41:33.413Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2a/37/d78e37d14323da3f607cd1af7daf262cb87fe614a245c15ad03bb03a2706/mdformat_tables-1.0.0-py3-none-any.whl", hash = "sha256:94cd86126141b2adc3b04c08d1441eb1272b36c39146bab078249a41c7240a9a", size = 5104, upload-time = "2024-08-23T23:41:31.863Z" },
+]
+
 [[package]]
 name = "mdit-py-plugins"
 version = "0.4.2"
@@ -2742,7 +2764,7 @@ resolution-markers = [
     "python_full_version < '3.10'",
 ]
 dependencies = [
-    { name = "markdown-it-py", version = "3.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
+    { name = "markdown-it-py", marker = "python_full_version < '3.10'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/19/03/a2ecab526543b152300717cf232bb4bb8605b6edb946c845016fa9c9c9fd/mdit_py_plugins-0.4.2.tar.gz", hash = "sha256:5f2cd1fdb606ddf152d37ec30e46101a60512bc0e5fa1a7002c36647b09e26b5", size = 43542, upload-time = "2024-09-09T20:27:49.564Z" }
 wheels = [
@@ -2760,7 +2782,7 @@ resolution-markers = [
     "python_full_version == '3.10.*'",
 ]
 dependencies = [
-    { name = "markdown-it-py", version = "4.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
+    { name = "markdown-it-py", marker = "python_full_version >= '3.10'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/b2/fd/a756d36c0bfba5f6e39a1cdbdbfdd448dc02692467d83816dff4592a1ebc/mdit_py_plugins-0.5.0.tar.gz", hash = "sha256:f4918cb50119f50446560513a8e311d574ff6aaed72606ddae6d35716fe809c6", size = 44655, upload-time = "2025-08-11T07:25:49.083Z" }
 wheels = [
@@ -2887,6 +2909,21 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/08/37/5f1fd5c3f6954b3256f8126275e62af493b96fb6aef6c0dbc4ee326032ad/mkdocs_jupyter-0.25.1-py3-none-any.whl", hash = "sha256:3f679a857609885d322880e72533ef5255561bbfdb13cfee2a1e92ef4d4ad8d8", size = 1456197, upload-time = "2024-10-15T14:56:29.854Z" },
 ]
 
+[[package]]
+name = "mkdocs-llmstxt"
+version = "0.3.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "beautifulsoup4" },
+    { name = "markdownify" },
+    { name = "mdformat" },
+    { name = "mdformat-tables" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/16/c2/a394c26eeb3e967877662748844f032c289688bac71d3b11e1f5e5d99dbb/mkdocs_llmstxt-0.3.1.tar.gz", hash = "sha256:123119d9b984c1d1224ed5af250bfbc49879ad83decdaff59d8b0ebb459ddc54", size = 31329, upload-time = "2025-08-05T13:42:41.412Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b0/77/52514b44c8e73e0a81883270ab6d3e64be7a6f62f54783719437e847a50e/mkdocs_llmstxt-0.3.1-py3-none-any.whl", hash = "sha256:31f5b6aaae6123c09a2b1c32912c3eb21ccb356b5db7abb867f105e8cc392653", size = 11175, upload-time = "2025-08-05T13:42:40.436Z" },
+]
+
 [[package]]
 name = "mkdocs-material"
 version = "9.6.18"
@@ -4628,8 +4665,7 @@ name = "rich"
 version = "14.1.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "markdown-it-py", version = "3.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
-    { name = "markdown-it-py", version = "4.0.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
+    { name = "markdown-it-py" },
     { name = "pygments" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/fe/75/af448d8e52bf1d8fa6a9d089ca6c07ff4453d86c65c145d0a300bb073b9b/rich-14.1.0.tar.gz", hash = "sha256:e497a48b844b0320d45007cdebfeaeed8db2a4f4bcf49f15e455cfc4af11eaa8", size = 224441, upload-time = "2025-07-25T07:32:58.125Z" }