Skip to content

memoir.classifier package

memoir.classifier

Classifier layer for Memoir memory system.

This module contains classification implementations for semantic memory organization.

Submodules

memoir.classifier.intelligent module

memoir.classifier.intelligent

Intelligent Classifier with LLM-based classification and dynamic expansion. Handles memory-worthiness detection, confidence-based expansion, and classification decisions.

ClassificationAction

Bases: Enum

Action to take with classification.

Source code in src/memoir/classifier/intelligent.py
class ClassificationAction(Enum):
    """Action to take with classification."""

    SKIP = "skip"  # Not memory-worthy
    CLASSIFY = "classify"  # Classify to existing path
    EXPAND = "expand"  # Expand taxonomy for better classification
    USE_PARENT = "use_parent"  # Use more generic parent category

MemoryAction

Bases: Enum

Action to take with memory storage.

Source code in src/memoir/classifier/intelligent.py
class MemoryAction(Enum):
    """Action to take with memory storage."""

    SKIP = "skip"  # Not stored
    STORE = "store"  # Store as new memory
    REPLACE = "replace"  # Replace existing memory
    APPEND = "append"  # Append to existing memory
    MERGE = "merge"  # Merge with existing memory

ClassificationConfidence

Bases: Enum

Confidence levels for classification.

Source code in src/memoir/classifier/intelligent.py
class ClassificationConfidence(Enum):
    """Confidence levels for classification."""

    HIGH = "high"  # > 0.8
    MEDIUM = "medium"  # 0.6 - 0.8
    LOW = "low"  # < 0.6

ClassificationResult dataclass

Result of LLM classification.

Source code in src/memoir/classifier/intelligent.py
@dataclass
class ClassificationResult:
    """Result of LLM classification."""

    is_memory: bool
    confidence: float
    confidence_level: ClassificationConfidence
    reasoning: str
    suggested_action: ClassificationAction
    path: str | None = None  # Primary path (for backward compatibility)
    paths: list[str] | None = None  # Multiple paths for multi-label classification
    suggested_expansion: str | None = None  # For low confidence
    use_parent: bool = False  # For low confidence
    profile_updates: list[dict[str, str]] | None = None  # Profile updates detected
    timeline_events: list[dict[str, str]] | None = None  # Timeline events detected
    location_events: list[dict[str, str]] | None = None  # Location events detected
    llm_prompt: str | None = None  # LLM prompt used (if return_prompt=True)

    @property
    def all_paths(self) -> list[str]:
        """Get all classification paths (primary + additional)."""
        if self.paths:
            return self.paths
        elif self.path:
            return [self.path]
        else:
            return []

all_paths property

all_paths: list[str]

Get all classification paths (primary + additional).

MemoryProcessingResult dataclass

Result of complete memory processing including storage.

Source code in src/memoir/classifier/intelligent.py
@dataclass
class MemoryProcessingResult:
    """Result of complete memory processing including storage."""

    classification: ClassificationResult
    memory_action: MemoryAction
    memory_path: str | None = None
    previous_content: str | None = None
    new_content: str | None = None
    expanded_paths: list[str] = None
    success: bool = True
    storage_reasoning: str = ""

IntelligentClassifier

Intelligent classifier with LLM-based classification and dynamic taxonomy expansion. Handles memory-worthiness detection, confidence-based expansion decisions.

Source code in src/memoir/classifier/intelligent.py
  93
  94
  95
  96
  97
  98
  99
 100
 101
 102
 103
 104
 105
 106
 107
 108
 109
 110
 111
 112
 113
 114
 115
 116
 117
 118
 119
 120
 121
 122
 123
 124
 125
 126
 127
 128
 129
 130
 131
 132
 133
 134
 135
 136
 137
 138
 139
 140
 141
 142
 143
 144
 145
 146
 147
 148
 149
 150
 151
 152
 153
 154
 155
 156
 157
 158
 159
 160
 161
 162
 163
 164
 165
 166
 167
 168
 169
 170
 171
 172
 173
 174
 175
 176
 177
 178
 179
 180
 181
 182
 183
 184
 185
 186
 187
 188
 189
 190
 191
 192
 193
 194
 195
 196
 197
 198
 199
 200
 201
 202
 203
 204
 205
 206
 207
 208
 209
 210
 211
 212
 213
 214
 215
 216
 217
 218
 219
 220
 221
 222
 223
 224
 225
 226
 227
 228
 229
 230
 231
 232
 233
 234
 235
 236
 237
 238
 239
 240
 241
 242
 243
 244
 245
 246
 247
 248
 249
 250
 251
 252
 253
 254
 255
 256
 257
 258
 259
 260
 261
 262
 263
 264
 265
 266
 267
 268
 269
 270
 271
 272
 273
 274
 275
 276
 277
 278
 279
 280
 281
 282
 283
 284
 285
 286
 287
 288
 289
 290
 291
 292
 293
 294
 295
 296
 297
 298
 299
 300
 301
 302
 303
 304
 305
 306
 307
 308
 309
 310
 311
 312
 313
 314
 315
 316
 317
 318
 319
 320
 321
 322
 323
 324
 325
 326
 327
 328
 329
 330
 331
 332
 333
 334
 335
 336
 337
 338
 339
 340
 341
 342
 343
 344
 345
 346
 347
 348
 349
 350
 351
 352
 353
 354
 355
 356
 357
 358
 359
 360
 361
 362
 363
 364
 365
 366
 367
 368
 369
 370
 371
 372
 373
 374
 375
 376
 377
 378
 379
 380
 381
 382
 383
 384
 385
 386
 387
 388
 389
 390
 391
 392
 393
 394
 395
 396
 397
 398
 399
 400
 401
 402
 403
 404
 405
 406
 407
 408
 409
 410
 411
 412
 413
 414
 415
 416
 417
 418
 419
 420
 421
 422
 423
 424
 425
 426
 427
 428
 429
 430
 431
 432
 433
 434
 435
 436
 437
 438
 439
 440
 441
 442
 443
 444
 445
 446
 447
 448
 449
 450
 451
 452
 453
 454
 455
 456
 457
 458
 459
 460
 461
 462
 463
 464
 465
 466
 467
 468
 469
 470
 471
 472
 473
 474
 475
 476
 477
 478
 479
 480
 481
 482
 483
 484
 485
 486
 487
 488
 489
 490
 491
 492
 493
 494
 495
 496
 497
 498
 499
 500
 501
 502
 503
 504
 505
 506
 507
 508
 509
 510
 511
 512
 513
 514
 515
 516
 517
 518
 519
 520
 521
 522
 523
 524
 525
 526
 527
 528
 529
 530
 531
 532
 533
 534
 535
 536
 537
 538
 539
 540
 541
 542
 543
 544
 545
 546
 547
 548
 549
 550
 551
 552
 553
 554
 555
 556
 557
 558
 559
 560
 561
 562
 563
 564
 565
 566
 567
 568
 569
 570
 571
 572
 573
 574
 575
 576
 577
 578
 579
 580
 581
 582
 583
 584
 585
 586
 587
 588
 589
 590
 591
 592
 593
 594
 595
 596
 597
 598
 599
 600
 601
 602
 603
 604
 605
 606
 607
 608
 609
 610
 611
 612
 613
 614
 615
 616
 617
 618
 619
 620
 621
 622
 623
 624
 625
 626
 627
 628
 629
 630
 631
 632
 633
 634
 635
 636
 637
 638
 639
 640
 641
 642
 643
 644
 645
 646
 647
 648
 649
 650
 651
 652
 653
 654
 655
 656
 657
 658
 659
 660
 661
 662
 663
 664
 665
 666
 667
 668
 669
 670
 671
 672
 673
 674
 675
 676
 677
 678
 679
 680
 681
 682
 683
 684
 685
 686
 687
 688
 689
 690
 691
 692
 693
 694
 695
 696
 697
 698
 699
 700
 701
 702
 703
 704
 705
 706
 707
 708
 709
 710
 711
 712
 713
 714
 715
 716
 717
 718
 719
 720
 721
 722
 723
 724
 725
 726
 727
 728
 729
 730
 731
 732
 733
 734
 735
 736
 737
 738
 739
 740
 741
 742
 743
 744
 745
 746
 747
 748
 749
 750
 751
 752
 753
 754
 755
 756
 757
 758
 759
 760
 761
 762
 763
 764
 765
 766
 767
 768
 769
 770
 771
 772
 773
 774
 775
 776
 777
 778
 779
 780
 781
 782
 783
 784
 785
 786
 787
 788
 789
 790
 791
 792
 793
 794
 795
 796
 797
 798
 799
 800
 801
 802
 803
 804
 805
 806
 807
 808
 809
 810
 811
 812
 813
 814
 815
 816
 817
 818
 819
 820
 821
 822
 823
 824
 825
 826
 827
 828
 829
 830
 831
 832
 833
 834
 835
 836
 837
 838
 839
 840
 841
 842
 843
 844
 845
 846
 847
 848
 849
 850
 851
 852
 853
 854
 855
 856
 857
 858
 859
 860
 861
 862
 863
 864
 865
 866
 867
 868
 869
 870
 871
 872
 873
 874
 875
 876
 877
 878
 879
 880
 881
 882
 883
 884
 885
 886
 887
 888
 889
 890
 891
 892
 893
 894
 895
 896
 897
 898
 899
 900
 901
 902
 903
 904
 905
 906
 907
 908
 909
 910
 911
 912
 913
 914
 915
 916
 917
 918
 919
 920
 921
 922
 923
 924
 925
 926
 927
 928
 929
 930
 931
 932
 933
 934
 935
 936
 937
 938
 939
 940
 941
 942
 943
 944
 945
 946
 947
 948
 949
 950
 951
 952
 953
 954
 955
 956
 957
 958
 959
 960
 961
 962
 963
 964
 965
 966
 967
 968
 969
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
class IntelligentClassifier:
    """
    Intelligent classifier with LLM-based classification and dynamic taxonomy expansion.
    Handles memory-worthiness detection, confidence-based expansion decisions.
    """

    def __init__(
        self,
        llm: Any,
        memory_store: Any | None = None,
        taxonomy_version: TaxonomyVersion = TaxonomyVersion.GENERAL,
        confidence_thresholds: dict | None = None,
        expansion_strategy: LLMExpansionStrategy = LLMExpansionStrategy.FOCUSED_SUBTREE,
        min_items_for_expansion: int = 3,
        profile_manager: Any | None = None,
        timeline_manager: Any | None = None,
        location_manager: Any | None = None,
        suppress_path_warnings: bool = True,
        enable_metadata_extraction: bool = False,
        taxonomy_loader: TaxonomyLoader | None = None,
    ):
        """
        Initialize the intelligent classifier.

        Args:
            llm: Language model for classification and decisions
            memory_store: Optional memory store for actual storage operations
            taxonomy_version: Taxonomy preset to use
            confidence_thresholds: Custom confidence thresholds
            expansion_strategy: Strategy for taxonomy expansion
            min_items_for_expansion: Minimum items before expansion
            profile_manager: Optional profile manager for handling profile updates
            suppress_path_warnings: Whether to suppress warnings for invalid LLM-suggested paths
            enable_metadata_extraction: Enable profile/timeline/location extraction (slower but richer)
            taxonomy_loader: Optional TaxonomyLoader for loading taxonomy from store.
                             When provided, taxonomy data is loaded from the store's taxonomy namespace.
                             When None, falls back to hardcoded TaxonomyPresets.
        """
        self.llm = llm
        self.memory_store = memory_store
        self.profile_manager = profile_manager
        self.timeline_manager = timeline_manager
        self.location_manager = location_manager
        self.taxonomy_version = taxonomy_version
        self.suppress_path_warnings = suppress_path_warnings
        self.enable_metadata_extraction = enable_metadata_extraction
        self._taxonomy_loader = taxonomy_loader

        # Initialize taxonomy - prefer store-based loading if taxonomy_loader provided
        preset_paths = self._load_taxonomy_paths()

        # Create a simple taxonomy object that provides get_all_paths() method
        class PresetTaxonomy:
            def __init__(self, preset_paths):
                self.preset_paths = preset_paths
                self._all_paths = []
                self._top_level_categories = set(preset_paths.keys())
                for category, paths in preset_paths.items():
                    # Do NOT add single-level categories to valid paths
                    # Only add multi-level paths (2+ levels minimum)
                    for path in paths:
                        full_path = f"{category}.{path}"
                        self._all_paths.append(full_path)

            def get_all_paths(self):
                return sorted(self._all_paths)

            def is_valid_path(self, path):
                return path in self._all_paths

            def get_top_level_categories(self):
                return self._top_level_categories

        self.taxonomy = PresetTaxonomy(preset_paths)

        # Also keep iterative taxonomy for expansion capabilities if needed
        self.iterative_taxonomy = LLMIterativeTaxonomy(
            taxonomy_version=taxonomy_version,
            llm=llm,
            expansion_strategy=expansion_strategy,
            min_items_threshold=min_items_for_expansion,
        )

        # Confidence thresholds
        self.thresholds = confidence_thresholds or {
            "high": 0.8,
            "medium": 0.6,
            "low": 0.0,
        }

        # Track pending expansions
        self.pending_expansions = {}

        # Cache for taxonomy data (loaded once from store)
        self._examples_cache: list[tuple[str, str, str]] | None = None
        self._descriptions_cache: dict[str, str] | None = None

    def _get_confidence_level(self, confidence: float) -> ClassificationConfidence:
        """Determine confidence level from score."""
        if confidence >= self.thresholds["high"]:
            return ClassificationConfidence.HIGH
        elif confidence >= self.thresholds["medium"]:
            return ClassificationConfidence.MEDIUM
        else:
            return ClassificationConfidence.LOW

    def _load_taxonomy_paths(self) -> dict[str, list[str]]:
        """Load taxonomy paths from store or fall back to hardcoded presets.

        When taxonomy_loader is provided and has data in the store,
        loads paths from there. Otherwise falls back to TaxonomyPresets.

        Returns:
            Dict mapping category to list of paths.
        """
        # Try to load from store if taxonomy_loader is available
        if self._taxonomy_loader:
            try:
                store_paths = self._taxonomy_loader.get_preset_paths_from_store()
                if store_paths:
                    total_paths = sum(len(p) for p in store_paths.values())
                    logger.info(
                        f"[Classifier] Loaded taxonomy paths FROM STORE: "
                        f"{len(store_paths)} categories, {total_paths} total paths"
                    )
                    return store_paths
            except Exception as e:
                logger.warning(
                    f"[Classifier] Failed to load taxonomy from store, using fallback: {e}"
                )
        else:
            logger.debug("[Classifier] No taxonomy_loader provided, using fallback")

        # Fallback to hardcoded TaxonomyPresets
        logger.info("[Classifier] Using FALLBACK hardcoded TaxonomyPresets")
        simplified_presets = TaxonomyPresets()
        return simplified_presets.PRESETS[TaxonomyVersion.SIMPLIFIED]

    def _get_classification_examples(
        self, limit: int = 8
    ) -> list[tuple[str, str, str]]:
        """Get classification examples from store or fallback to hardcoded.

        Results are cached after first load for performance.

        Args:
            limit: Maximum number of examples to return.

        Returns:
            List of (input_text, path, reasoning) tuples.
        """
        # Use cache if available
        if self._examples_cache is not None:
            return self._examples_cache[:limit]

        if self._taxonomy_loader:
            try:
                # Load all examples and cache them
                examples = self._taxonomy_loader.get_examples_from_store()
                if examples:
                    logger.info(
                        f"[Classifier] Loaded {len(examples)} examples FROM STORE"
                    )
                    self._examples_cache = examples
                    return examples[:limit]
            except Exception as e:
                logger.warning(f"[Classifier] Failed to load examples from store: {e}")

        # Fallback to hardcoded examples
        logger.info("[Classifier] Using FALLBACK examples")
        self._examples_cache = TaxonomyPresets.CLASSIFICATION_EXAMPLES
        return self._examples_cache[:limit]

    def _get_category_descriptions(self) -> dict[str, str]:
        """Get category descriptions from store or fallback to hardcoded.

        Results are cached after first load for performance.

        Returns:
            Dict mapping category to description.
        """
        # Use cache if available
        if self._descriptions_cache is not None:
            return self._descriptions_cache

        if self._taxonomy_loader:
            try:
                descriptions = self._taxonomy_loader.get_descriptions_from_store()
                if descriptions:
                    logger.info(
                        f"[Classifier] Loaded {len(descriptions)} descriptions FROM STORE"
                    )
                    self._descriptions_cache = descriptions
                    return descriptions
            except Exception as e:
                logger.warning(
                    f"[Classifier] Failed to load descriptions from store: {e}"
                )

        # Fallback to hardcoded descriptions
        logger.info("[Classifier] Using FALLBACK category descriptions")
        self._descriptions_cache = TaxonomyPresets.CATEGORY_DESCRIPTIONS
        return self._descriptions_cache

    async def classify_input(
        self,
        content: str,
        metadata: dict | None = None,
        conversation_context: list[str] | None = None,
        return_prompt: bool = False,
    ) -> ClassificationResult:
        """
        Classify input using LLM to determine if it's memory-worthy and where to store it.

        Args:
            content: The content to classify
            metadata: Optional metadata about the content
            conversation_context: Optional list of previous conversation exchanges for context

        Returns:
            ClassificationResult with classification details
        """
        # Get current taxonomy paths
        all_paths = self.taxonomy.get_all_paths()

        # Build classification prompt
        prompt = self._build_classification_prompt(
            content, all_paths, metadata, conversation_context
        )

        try:
            # Call LLM for classification
            response = await self.llm.ainvoke(prompt)

            # Parse response
            result = self._parse_classification_response(response)

            # Add confidence level
            result.confidence_level = self._get_confidence_level(result.confidence)

            # Add prompt if requested
            if return_prompt:
                result.llm_prompt = prompt

            return result

        except Exception as e:
            logger.error(f"Classification failed: {e}")
            # Return skip action on error
            return ClassificationResult(
                is_memory=False,
                path=None,
                confidence=0.0,
                confidence_level=ClassificationConfidence.LOW,
                reasoning=f"Classification failed: {e!s}",
                suggested_action=ClassificationAction.SKIP,
            )

    def _build_classification_prompt(
        self,
        content: str,
        paths: list[str],
        metadata: dict | None,
        conversation_context: list[str] | None = None,
    ) -> str:
        """
        Build prompt for LLM classification.

        PROMPT STRUCTURE (optimized for prompt caching):
        1. STATIC SECTION (cached): Taxonomy + all guidelines
        2. DYNAMIC SECTION (not cached): User content + context

        The static section is marked with [STATIC_SECTION_START] and [STATIC_SECTION_END]
        to help with prompt caching detection.

        When enable_metadata_extraction=False, uses a minimal prompt for faster inference.
        """
        # Use minimal prompt for fast mode (no metadata extraction)
        if not self.enable_metadata_extraction:
            return self._build_fast_classification_prompt(
                content, paths, metadata, conversation_context
            )

        # Get first-level categories for context
        first_level = [p for p in paths if "." not in p and p != "other"]

        # Get session date for timeline calculations (needed in static section)
        session_date = (
            metadata.get("session_date", "unknown") if metadata else "unknown"
        )

        # =================================================================
        # STATIC SECTION START - This part is cached across requests
        # =================================================================
        prompt_parts = [
            "[STATIC_SECTION_START]",
            "",
            "You are a memory classification system. You will analyze user content and determine:",
            "1. Is this information worth storing as a memory?",
            "2. If yes, which taxonomy path best fits this content?",
            "3. What is your confidence in this classification (0.0 to 1.0)?",
            "",
            "Memory storage guidelines:",
            "   - Skip transient information (greetings, current time, weather forecasts)",
            "   - Skip very general conversations without specific personal details",
            "   - Store personal preferences, facts, skills, relationships, goals, experiences",
            "",
            "Confidence scoring guidelines:",
            "   - 0.9-1.0: Perfect fit, exact match to taxonomy path and clear content",
            "   - 0.7-0.8: Good fit, clearly belongs in this category",
            "   - 0.5-0.6: Moderate fit, somewhat belongs but could fit elsewhere",
            "   - 0.3-0.4: Poor fit, content is vague or path is not ideal",
            "   - 0.0-0.2: Very poor fit, should probably not be stored",
            "",
            "Available top-level categories:",
        ]

        for category in sorted(first_level):
            prompt_parts.append(f"  - {category}")

        # Show ALL available paths to LLM for complete taxonomy coverage
        # NOTE: This approach works for current taxonomy size (~1000 paths)
        # Future scaling: May need chunking/filtering if taxonomy grows to 5K+ paths or hits LLM context limits
        all_non_other_paths = [p for p in paths if not p.endswith(".other")]
        if all_non_other_paths:
            prompt_parts.extend(
                [
                    "",
                    f"Complete taxonomy hierarchy ({len(all_non_other_paths)} available paths):",
                    "",
                ]
            )

            # Show ALL paths - no sampling to avoid missing critical paths like routine.morning or tools.ides
            for path in sorted(all_non_other_paths):
                prompt_parts.append(f"  {path}")

        # Build classification examples (from store or fallback to presets)
        examples_lines = []
        for input_text, path, _reason in self._get_classification_examples(8):
            examples_lines.append(f"  * '{input_text}' → {path}")

        prompt_parts.extend(
            [
                "",
                "CLASSIFICATION RULES:",
                "- EXACTLY 3 levels required: category.subcategory.type (e.g., profile.personal.identity)",
                "- NEVER use 2 levels (profile.personal) or 4+ levels (profile.personal.identity.name)",
                "- CREATE new categories/subcategories/types as needed if existing ones don't fit well",
                "- You can invent new top-level categories (e.g., routine, tools, settings)",
                "- You can invent new subcategories under existing categories",
                "- You can invent new types under existing subcategories",
                "- Examples of GOOD 3-level classifications:",
                *examples_lines,
                "",
                "MEMORY RULES:",
                "- is_memory=false for greetings, weather, transient chat",
                "- is_memory=true for personal facts, preferences, relationships, goals, context, workflows",
                "",
                "CONTEXT USAGE:",
                "- CLASSIFY ONLY the main content (what the user actually said)",
                "- Context is for understanding only - DO NOT extract information from it",
                "",
                "MULTI-LABEL (USE SPARINGLY):",
                "- ONLY use multiple paths when content belongs to DIFFERENT TOP-LEVEL CATEGORIES",
                "- Maximum 2 paths, when in doubt use SINGLE path",
                "",
            ]
        )

        # Conditionally add metadata extraction guidelines (profile/timeline/location)
        if self.enable_metadata_extraction:
            prompt_parts.extend(
                [
                    "",
                    "PROFILE UPDATE DETECTION:",
                    "- Check if the content contains information that would UPDATE a user's PROFILE",
                    "- Profile updates are DEFINITIVE facts about the user (3-level paths)",
                    "- Examples of profile updates (use EXACTLY 3 levels):",
                    "  * 'I'm 25 years old' → profile.personal.demographics",
                    "  * 'I work at Google' → profile.professional.occupation",
                    "  * 'I live in San Francisco' → profile.personal.location",
                    "  * 'I graduated from Stanford' → profile.professional.education",
                    "  * 'My name is John' → profile.personal.identity",
                    "  * 'I'm married to Sarah' → relationships.family.spouse",
                    "- If NO profile updates: return 'no_profile_update'",
                    "- If profile updates exist: list them with 3-level path and new value",
                    "",
                    "",
                    "TIMELINE EVENT DETECTION:",
                    "- Use the session date provided in the dynamic section below for calculating relative dates",
                    "- ALWAYS check if the content describes a PAST or PRESENT EVENT with temporal information",
                    "- Timeline events are specific occurrences that happened at a particular time",
                    "- Examples of timeline events with ACTUAL date calculation:",
                    "  * 'Yesterday was my first day at the new job' (session: 15 March 2023) → date: '20230314'",
                    "  * 'Last week I went to a conference' (session: 20 June 2023) → date: '20230613' (7 days before)",
                    "  * 'I graduated from college in May 2020' → date: '20200501' (first of month)",
                    "  * 'On March 15th, I came out to my parents' (session: 2023) → date: '20230315' (assume current year)",
                    "  * 'Two months ago I started therapy' (session: 10 July 2023) → date: '20230510' (2 months before)",
                    "- CRITICAL: Always provide ACTUAL 8-digit dates in YYYYMMDD format, NOT placeholders",
                    "- Calculate relative dates precisely from the session date:",
                    "  * 'yesterday' = session date minus 1 day",
                    "  * 'last week' = session date minus 7 days",
                    "  * 'last month' = session date minus ~30 days",
                    "  * 'two days ago' = session date minus 2 days",
                    "- Double-check your date arithmetic: if session is July 10, 2025 and content says 'yesterday', result should be July 9, 2025 → '20250709'",
                    "- CRITICAL: If content contains multiple time references, ALWAYS prioritize the more recent/specific one:",
                    "  * SPECIFICITY ORDER (most to least specific): 'yesterday' > 'two days ago' > 'last week' > 'last month'",
                    "  * 'yesterday' is MORE SPECIFIC than 'last week' - use yesterday",
                    "  * 'two days ago' is MORE SPECIFIC than 'last week' - use two days ago",
                    "  * When in doubt, use the time reference that gives the most recent date",
                    "- If only year/month given, use first day: 'May 2020' → '20200501'",
                    "- If NO timeline events: return 'no_timeline_events'",
                    "- If timeline events exist: list them with date and description",
                    "",
                    "",
                    "LOCATION EVENT DETECTION:",
                    "- CRITICAL: ALWAYS check if the content mentions ANY specific PLACES, LOCATIONS, or geographic references",
                    "- Location events are activities, experiences, or events that happened at specific places",
                    "- IMPORTANT: Look for location indicators like 'in', 'at', 'from', 'to' followed by place names",
                    "- Examples of location events (MUST detect these patterns):",
                    "  * 'The support group in Los Angeles has made me feel accepted' → location: 'Los Angeles', description: 'support group attendance'",
                    "  * 'I went to a LGBTQ support group in San Francisco' → location: 'San Francisco', description: 'attended LGBTQ support group'",
                    "  * 'We moved from New York to California last year' → location: 'New York', description: 'lived here previously' + location: 'California', description: 'moved here'",
                    "  * 'I work at the downtown office' → location: 'downtown office', description: 'workplace'",
                    "  * 'The conference was held at the convention center' → location: 'convention center', description: 'attended conference'",
                    "  * 'I love visiting the beach on weekends' → location: 'beach', description: 'recreational visits'",
                    "  * 'I want to visit Canada next year' → location: 'Canada', description: 'planned travel destination'",
                    "  * 'Planning to go to Paris for vacation' → location: 'Paris', description: 'vacation destination'",
                    "  * 'Would love to travel to Japan someday' → location: 'Japan', description: 'desired travel destination'",
                    "- KEY PHRASES to detect: 'in [City]', 'at [Place]', 'from [Location]', 'to [Location]', 'want to visit [Place]', 'plan to go to [Place]', 'travel to [Place]'",
                    "- Extract both specific locations (Los Angeles, San Francisco, New York) and local places (offices, centers, venues)",
                    "- Normalize location names: 'NYC' → 'New York City', 'SF' → 'San Francisco', 'LA' → 'Los Angeles'",
                    "- If NO location events: return 'no_location_events'",
                    "- If location events exist: list them with location name and description",
                    "",
                ]
            )

        # JSON response format (varies based on metadata extraction setting)
        if self.enable_metadata_extraction:
            prompt_parts.extend(
                [
                    "JSON RESPONSE FORMAT:",
                    "{",
                    '  "is_memory": true/false,',
                    '  "paths": ["primary.path.here", "secondary.path.here"] or ["single.path"] or null,',
                    '  "confidence": 0.0-1.0,',
                    '  "reasoning": "explanation of decision and path choices",',
                    '  "profile_updates": "no_profile_update" or [{"path": "profile.path.here", "value": "new value"}],',
                    '  "timeline_events": "no_timeline_events" or [{"date": "YYYYMMDD", "description": "event description"}],',
                    '  "location_events": "no_location_events" or [{"location": "location name", "description": "activity/event description"}]',
                    "}",
                    "",
                    "[STATIC_SECTION_END]",
                ]
            )
        else:
            prompt_parts.extend(
                [
                    "JSON RESPONSE FORMAT:",
                    "{",
                    '  "is_memory": true/false,',
                    '  "paths": ["primary.path.here", "secondary.path.here"] or ["single.path"] or null,',
                    '  "confidence": 0.0-1.0,',
                    '  "reasoning": "explanation of decision and path choices"',
                    "}",
                    "",
                    "[STATIC_SECTION_END]",
                ]
            )

        # =================================================================
        # DYNAMIC SECTION - This part changes with each request
        # =================================================================
        prompt_parts.extend(
            [
                "",
                "[DYNAMIC_SECTION_START]",
                "",
            ]
        )

        # Add threshold guidance (semi-dynamic based on user config)
        threshold_guidance = []
        if self.thresholds["low"] > 0.0:
            threshold_guidance.append(
                f"- IMPORTANT: User requires minimum {self.thresholds['low']:.1f} confidence to store ANY memory"
            )
        if self.thresholds["low"] >= 0.5:
            threshold_guidance.append(
                f"- BE SELECTIVE: Only store memories you're at least {self.thresholds['low']:.1f} confident about"
            )
        if self.thresholds["low"] >= 0.7:
            threshold_guidance.append(
                "- VERY CONSERVATIVE: User wants only high-quality, well-classified memories"
            )

        if threshold_guidance:
            prompt_parts.append("User's confidence thresholds for this session:")
            prompt_parts.extend(threshold_guidance)
            prompt_parts.append("")

        prompt_parts.extend(
            [
                f"User's minimum threshold: {self.thresholds['low']:.1f} (below this = not stored)",
                f"Medium confidence threshold: {self.thresholds['medium']:.1f}",
                f"High confidence threshold: {self.thresholds['high']:.1f}",
                "",
                f"Current session date: {session_date} (use for calculating relative dates)",
                "",
            ]
        )

        # Add conversation context if provided
        if conversation_context:
            prompt_parts.extend(
                [
                    "Previous conversation context (ONLY for understanding, DO NOT classify based on this):",
                    "Speaker Attribution Guide:",
                    "  [SELF] = The person whose memory you're classifying speaking",
                    "  [OTHER] = Someone else speaking to them",
                    "",
                ]
            )
            for i, prev_exchange in enumerate(conversation_context, 1):
                prompt_parts.append(f"  {i}. {prev_exchange}")
            prompt_parts.append("")

        # Add metadata if provided
        if metadata:
            # Filter out session_date as it's already shown above
            display_metadata = {
                k: v for k, v in metadata.items() if k != "session_date"
            }
            if display_metadata:
                prompt_parts.append(
                    f"Additional metadata: {json.dumps(display_metadata)}"
                )
                prompt_parts.append("")

        # Add the actual content to analyze (THE KEY DYNAMIC PART)
        prompt_parts.extend(
            [
                "CONTENT TO ANALYZE:",
                f"(from [SELF]): {content}",
                "",
                "IMPORTANT: The content above is from [SELF] - classify based on their personal perspective/experience.",
                "",
                "Now analyze the content and provide your JSON response:",
            ]
        )

        return "\n".join(prompt_parts)

    def _build_fast_classification_prompt(
        self,
        content: str,
        paths: list[str],
        metadata: dict | None,
        conversation_context: list[str] | None = None,
    ) -> str:
        """
        Build a minimal prompt for fast classification (no metadata extraction).

        This prompt is optimized for speed over features:
        - Classification examples showing the pattern
        - Category descriptions for guidance
        - Simple JSON output

        ~600 tokens vs ~4500 tokens for full prompt = faster inference.
        """
        # Build examples section (from store or fallback to presets)
        # Use all examples for fast classification (no limit)
        examples_lines = []
        for input_text, path, _reason in self._get_classification_examples(limit=500):
            examples_lines.append(f'  "{input_text}" → {path}')

        examples_str = "\n".join(examples_lines)

        # Build categories section (from store or fallback to presets)
        categories_lines = []
        for cat, desc in self._get_category_descriptions().items():
            categories_lines.append(f"  {cat}: {desc}")

        categories_str = "\n".join(categories_lines)

        # Minimal static section
        prompt = f"""[STATIC_SECTION_START]
Classify user content into taxonomy paths. Return JSON only.

CATEGORIES:
{categories_str}

EXAMPLES (MUST be exactly 3 levels: category.subcategory.type):
{examples_str}

RULES:
- EXACTLY 3 levels required: category.subcategory.type (e.g., profile.personal.identity)
- NEVER use 2 levels (profile.personal) or 4+ levels (profile.personal.identity.name)
- CREATE new categories/subcategories/types as needed if existing ones don't fit well
- You can invent new top-level categories (e.g., routine, tools, settings)
- You can invent new subcategories under existing categories
- You can invent new types under existing subcategories
- is_memory=false for greetings, weather, transient chat
- is_memory=true for personal facts, preferences, relationships, goals, context, workflows

FORMAT: {{"is_memory":bool,"paths":["path"],"confidence":float,"reasoning":"brief"}}
[STATIC_SECTION_END]

[DYNAMIC_SECTION_START]
CONTENT: {content}
[DYNAMIC_SECTION_END]"""

        return prompt

    def _fix_common_json_issues(self, json_str: str) -> str:
        """Fix common JSON formatting issues from LLM responses."""
        import re

        # First remove JSON comments specifically - be more aggressive
        json_str = re.sub(r"//.*", "", json_str)  # Remove // comments to end of line
        json_str = re.sub(
            r"/\*.*?\*/", "", json_str, flags=re.DOTALL
        )  # Remove /* */ comments

        # Also remove any trailing content after closing braces that might be comments
        json_str = re.sub(r"}\s*//.*", "}", json_str)

        # Common fixes for LLM JSON issues
        fixes = [
            # Fix trailing commas before closing braces/brackets
            (r",(\s*[}\]])", r"\1"),
            # Fix missing commas between array elements
            (r'"\s*\n\s*"', '",\n"'),
            # Fix missing commas between objects in arrays
            (r"}\s*\n\s*{", "},\n{"),
            # Fix unescaped quotes in strings (basic attempt)
            (r':\s*"([^"]*)"([^",}\]]*)"', r': "\1\2"'),
            # Fix missing quotes around field names
            (r"([{,]\s*)([a-zA-Z_][a-zA-Z0-9_]*)\s*:", r'\1"\2":'),
            # Fix missing commas after string values before next field
            (r'"\s*\n\s*"([a-zA-Z_][a-zA-Z0-9_]*)":', r'",\n"\1":'),
            # Fix array formatting issues
            (r'\[\s*"([^"]+)"\s*"([^"]+)"\s*\]', r'["\1", "\2"]'),
        ]

        original = json_str
        for pattern, replacement in fixes:
            json_str = re.sub(pattern, replacement, json_str)

        if json_str != original:
            logger.info(
                f"Applied JSON repairs: {len([f for f in fixes if re.search(f[0], original)])} fixes"
            )

        return json_str

    def _parse_classification_response(self, response: Any) -> ClassificationResult:
        """Parse LLM classification response."""
        try:
            # Handle different response types
            if hasattr(response, "content"):
                content = response.content
            else:
                content = str(response)

            # Try to parse as JSON
            # Extract JSON from response if wrapped in other text

            # First try to find complete JSON with proper bracket matching
            def extract_json(text):
                # Find the first opening brace
                start_idx = text.find("{")
                if start_idx == -1:
                    return None

                # Count braces to find the matching closing brace
                brace_count = 0
                for i in range(start_idx, len(text)):
                    if text[i] == "{":
                        brace_count += 1
                    elif text[i] == "}":
                        brace_count -= 1
                        if brace_count == 0:
                            return text[start_idx : i + 1]
                return None

            json_str = extract_json(content)
            if json_str:
                try:
                    data = json.loads(json_str)
                    # Debug logging to see what we're getting
                    logger.debug(f"Parsed LLM response: {data}")
                except json.JSONDecodeError as json_error:
                    # Log the malformed JSON for debugging
                    logger.error(f"Malformed JSON from LLM: {json_str}")
                    logger.error(f"JSON parsing error: {json_error}")
                    # Try to fix common JSON issues and retry
                    json_str = self._fix_common_json_issues(json_str)
                    try:
                        data = json.loads(json_str)
                        logger.debug(f"Successfully parsed after JSON repair: {data}")
                    except json.JSONDecodeError:
                        logger.error("JSON repair failed, using fallback")
                        raise  # Re-raise to trigger fallback handling
            else:
                # Fallback parsing - log the content that failed to parse
                logger.warning(
                    f"Failed to parse JSON from LLM response: {content[:200]}..."
                )
                data = {
                    "is_memory": False,
                    "path": None,
                    "confidence": 0.0,
                    "reasoning": "Failed to parse response",
                }

            # Handle both single path (backward compatibility) and multiple paths
            suggested_paths = data.get("paths")
            suggested_path = data.get("path")  # For backward compatibility

            # Normalize to list format
            if suggested_paths:
                paths_to_validate = suggested_paths
            elif suggested_path:
                paths_to_validate = [suggested_path]
            else:
                paths_to_validate = []

            # Validate that all suggested paths exist in taxonomy
            all_paths = self.taxonomy.get_all_paths()
            validated_paths = []

            # Extract top-level categories dynamically from taxonomy
            existing_top_level = {p.split(".")[0] for p in all_paths if "." in p}

            for path in paths_to_validate:
                if path and path in all_paths:
                    # Existing path - use as is
                    validated_paths.append(path)
                elif path:
                    path_parts = path.split(".")
                    top_level_category = path_parts[0]

                    # Accept well-formed paths that follow the EXACTLY 3 levels pattern:
                    # - Known top-level category (profile, entity, preferences, etc.)
                    # - Exactly 3 levels of depth (hard requirement)
                    if (
                        top_level_category in existing_top_level
                        and len(path_parts) == 3
                    ):
                        logger.debug(f"Accepting well-formed 3-level path: {path}")
                        validated_paths.append(path)
                    elif (
                        top_level_category in existing_top_level and len(path_parts) > 3
                    ):
                        # Truncate to 3 levels
                        truncated_path = ".".join(path_parts[:3])
                        logger.debug(f"Truncating {path} to 3 levels: {truncated_path}")
                        validated_paths.append(truncated_path)
                    # Check if this is a new top-level category
                    elif (
                        top_level_category not in existing_top_level
                        and len(path_parts) >= 2
                    ):
                        # This appears to be a new top-level category suggestion
                        logger.debug(f"LLM suggested new top-level category: {path}")
                        validated_paths.append(path)  # Accept new category paths
                    else:
                        # Try to find existing path or fallback
                        if not self.suppress_path_warnings:
                            logger.warning(
                                f"LLM suggested invalid path '{path}'. "
                                f"Available paths that contain relevant keywords: "
                                f"{[p for p in all_paths if any(word in p for word in path.split('.') if word != 'other')][:5]}"
                            )
                        # Try to find a close match or fall back to a more general path
                        found_valid = False
                        for i in range(len(path_parts), 0, -1):
                            partial_path = ".".join(path_parts[:i])
                            if partial_path in all_paths:
                                logger.debug(f"Using valid parent path: {partial_path}")
                                validated_paths.append(partial_path)
                                found_valid = True
                                break

                        if not found_valid:
                            # Reject paths that are too shallow (single-level)
                            if len(path_parts) < 2:
                                logger.warning(
                                    f"Rejecting single-level path: {path}. Minimum 2 levels required."
                                )
                                continue

                            # Try to find a valid 2+ level path in the same domain
                            domain = path_parts[0]
                            valid_domain_paths = [
                                p
                                for p in all_paths
                                if p.startswith(f"{domain}.") and len(p.split(".")) >= 2
                            ]

                            if valid_domain_paths:
                                # Use a sensible default path in this domain as fallback
                                # Build domain defaults dynamically from existing paths
                                domain_defaults = {}
                                for path in valid_domain_paths:
                                    parts = path.split(".")
                                    if (
                                        len(parts) >= 3
                                    ):  # Prefer deeper paths as defaults
                                        domain_defaults[domain] = path
                                        break
                                if domain not in domain_defaults and valid_domain_paths:
                                    # If no deep path found, use first available
                                    domain_defaults[domain] = valid_domain_paths[0]

                                fallback_path = domain_defaults.get(
                                    domain, valid_domain_paths[0]
                                )
                                logger.debug(
                                    f"Single-level '{domain}' converted to specific path: {fallback_path}"
                                )
                                validated_paths.append(fallback_path)
                            else:
                                logger.warning(
                                    f"No valid paths found for domain {domain}, skipping classification"
                                )

            # Enforce top-level category rule for multi-label classification
            if len(validated_paths) > 1:
                top_level_categories = [path.split(".")[0] for path in validated_paths]
                if len(set(top_level_categories)) == 1:
                    # All paths are from the same top-level category, keep only the first (most relevant)
                    logger.debug(
                        f"Multiple paths from same top-level category {top_level_categories[0]}, keeping only primary path: {validated_paths[0]}"
                    )
                    validated_paths = [validated_paths[0]]
                elif len(set(top_level_categories)) > 2:
                    # More than 2 different top-level categories, keep only first 2
                    unique_categories = []
                    filtered_paths = []
                    for path in validated_paths:
                        category = path.split(".")[0]
                        if category not in unique_categories:
                            unique_categories.append(category)
                            filtered_paths.append(path)
                            if len(filtered_paths) == 2:
                                break
                    logger.debug(
                        f"More than 2 top-level categories, keeping first 2: {filtered_paths}"
                    )
                    validated_paths = filtered_paths

            # Set primary path for backward compatibility
            primary_path = validated_paths[0] if validated_paths else None

            # Parse confidence and apply user threshold filtering
            confidence = float(data.get("confidence", 0.0))
            is_memory = data.get("is_memory", False)

            # Override is_memory if confidence doesn't meet user's threshold
            if is_memory and confidence < self.thresholds["low"]:
                is_memory = False
                reasoning_override = f"{data.get('reasoning', '')} | OVERRIDDEN: Confidence {confidence:.2f} < user threshold {self.thresholds['low']:.1f}"
            else:
                reasoning_override = data.get("reasoning", "")

            # Parse profile updates
            profile_updates = None
            profile_data = data.get("profile_updates")
            if (
                profile_data
                and profile_data != "no_profile_update"
                and isinstance(profile_data, list)
            ):
                profile_updates = profile_data
                logger.debug(f"Detected profile updates: {profile_updates}")

            # Parse timeline events
            timeline_events = None
            timeline_data = data.get("timeline_events")
            if timeline_data and timeline_data != "no_timeline_events":
                # Handle both dict and list formats from LLM
                if isinstance(timeline_data, dict):
                    timeline_events = [timeline_data]
                elif isinstance(timeline_data, list):
                    timeline_events = timeline_data
                logger.debug(f"Detected timeline events: {timeline_events}")

            # Parse location events
            location_events = None
            location_data = data.get("location_events")
            if location_data and location_data != "no_location_events":
                # Handle both dict and list formats from LLM
                if isinstance(location_data, dict):
                    location_events = [location_data]
                elif isinstance(location_data, list):
                    location_events = location_data
                logger.debug(f"Detected location events: {location_events}")

            return ClassificationResult(
                is_memory=is_memory,
                path=primary_path if is_memory else None,
                paths=(
                    validated_paths if is_memory and len(validated_paths) > 0 else None
                ),
                confidence=confidence,
                confidence_level=ClassificationConfidence.LOW,  # Will be set later
                reasoning=reasoning_override,
                suggested_action=(
                    ClassificationAction.CLASSIFY
                    if is_memory
                    else ClassificationAction.SKIP
                ),
                profile_updates=profile_updates,
                timeline_events=timeline_events,
                location_events=location_events,
            )

        except Exception as e:
            logger.error(f"Failed to parse classification response: {e}")
            return ClassificationResult(
                is_memory=False,
                path=None,
                confidence=0.0,
                confidence_level=ClassificationConfidence.LOW,
                reasoning=f"Parse error: {e!s}",
                suggested_action=ClassificationAction.SKIP,
            )

    async def handle_low_confidence_classification(
        self,
        content: str,
        classification: ClassificationResult,
        metadata: dict | None = None,
    ) -> ClassificationResult:
        """
        Handle low confidence classification by asking LLM to expand or use parent.

        Args:
            content: Original content
            classification: Initial classification result
            metadata: Optional metadata

        Returns:
            Updated classification result with expansion decision
        """
        if not classification.path:
            return classification

        # Build prompt for expansion decision
        prompt = self._build_expansion_decision_prompt(
            content, classification.path, classification.confidence, metadata
        )

        try:
            response = await self.llm.ainvoke(prompt)
            decision = self._parse_expansion_decision(response)

            if decision["action"] == "expand":
                # Trigger expansion for more specific categorization
                classification.suggested_expansion = decision.get("suggested_path")

                # Add to pending expansions
                parent_path = classification.path
                if parent_path not in self.pending_expansions:
                    self.pending_expansions[parent_path] = []
                self.pending_expansions[parent_path].append(
                    {
                        "content": content,
                        "metadata": metadata,
                        "suggested_expansion": decision.get("suggested_categories", []),
                    }
                )

                # If we have enough items, trigger expansion
                if (
                    len(self.pending_expansions[parent_path])
                    >= self.taxonomy.min_items_threshold
                ):
                    await self._trigger_expansion(parent_path)

            elif decision["action"] == "use_parent":
                # Use more generic category
                classification.use_parent = True
                parts = classification.path.split(".")
                if len(parts) > 1:
                    classification.path = ".".join(parts[:-1])
                    classification.confidence = decision.get("parent_confidence", 0.7)

            classification.reasoning += (
                f" | Expansion decision: {decision.get('reasoning', '')}"
            )

        except Exception as e:
            logger.error(f"Expansion decision failed: {e}")

        return classification

    def _build_expansion_decision_prompt(
        self, content: str, path: str, confidence: float, metadata: dict | None
    ) -> str:
        """Build prompt for expansion decision."""
        prompt_parts = [
            f"The following content was classified to '{path}' with low confidence ({confidence:.2f}):",
            f"Content: {content}",
        ]

        if metadata:
            prompt_parts.append(f"Metadata: {json.dumps(metadata)}")

        prompt_parts.extend(
            [
                "",
                "Should we:",
                "1. EXPAND to more specific subcategories (if content is very detailed/specialized)",
                "   - Use when content has specific technical details, rare skills, or unique activities",
                "   - IMPORTANT: Follow proper hierarchical depth progression:",
                "     * Add ONE intermediate level at a time (don't jump from 'knowledge' to 'knowledge.quantum.entanglement.protocols')",
                "     * Use general-to-specific progression: domain → area → specialty → technique",
                "     * Examples: 'knowledge.music' → 'knowledge.music.piano' → 'knowledge.music.piano.improvisation'",
                "   - Suggest 2-3 subcategory names that follow natural conceptual hierarchies",
                "",
                "2. USE_PARENT category (if content is too vague/general for current specificity)",
                "   - Use when content is general/broad and current path is too specific",
                "   - Move up one level in the taxonomy hierarchy for better fit",
                "",
                "3. KEEP current classification (if confidence is acceptable as-is)",
                "",
                f"Current path depth: {len(path.split('.')) if path else 0} levels",
                "Recommended max depth: 4 levels for most concepts",
                "",
                "Respond in JSON format:",
                "{",
                '  "action": "expand" | "use_parent" | "keep",',
                '  "reasoning": "explanation with depth justification",',
                '  "suggested_categories": ["intermediate_category", "specific_category"] (if expanding),',
                '  "parent_confidence": 0.0-1.0 (if using parent)',
                "}",
            ]
        )

        return "\n".join(prompt_parts)

    def _parse_expansion_decision(self, response: Any) -> dict:
        """Parse expansion decision response."""
        try:
            if hasattr(response, "content"):
                content = response.content
            else:
                content = str(response)

            import re

            json_match = re.search(r"\{[^{}]*\}", content, re.DOTALL)
            if json_match:
                return json.loads(json_match.group())

        except Exception as e:
            logger.error(f"Failed to parse expansion decision: {e}")

        return {"action": "keep", "reasoning": "Parse error, keeping original"}

    async def _trigger_expansion(self, parent_path: str):
        """Trigger taxonomy expansion for a path."""
        try:
            # Use the iterative taxonomy's expansion
            result = await self.taxonomy.expand_subtree_with_llm(parent_path)

            if result.new_paths:
                logger.info(
                    f"Expanded {parent_path} with {len(result.new_paths)} new categories"
                )

                # Reclassify pending items
                if parent_path in self.pending_expansions:
                    for item in self.pending_expansions[parent_path]:
                        # Re-classify with new categories
                        await self.process_classification(
                            item["content"], item["metadata"]
                        )

                    # Clear pending items
                    del self.pending_expansions[parent_path]

        except Exception as e:
            logger.error(f"Expansion failed for {parent_path}: {e}")

    async def process_classification(
        self,
        content: str,
        metadata: dict | None = None,
        conversation_context: list[str] | None = None,
    ) -> ClassificationResult:
        """
        Main entry point for processing classification.

        Args:
            content: Content to classify
            metadata: Optional metadata
            conversation_context: Optional list of previous conversation exchanges for context

        Returns:
            ClassificationResult with classification details and recommended action
        """
        # Step 1: Classify the input
        classification = await self.classify_input(
            content, metadata, conversation_context
        )

        # Step 2: Check if it's memory-worthy
        if not classification.is_memory:
            return classification

        # Step 2.5: Validate suggested path exists in taxonomy
        if (
            classification.path
            and classification.path not in self.taxonomy.get_all_paths()
        ):
            # Path doesn't exist - mark for expansion but preserve original confidence
            # Only lower confidence if it was already very high (likely overconfident)
            if classification.confidence > 0.8:
                classification.confidence = min(classification.confidence, 0.75)
            classification.confidence_level = self._get_confidence_level(
                classification.confidence
            )
            classification.reasoning += (
                f" | Suggested path '{classification.path}' doesn't exist in taxonomy"
            )

        # Step 3: Handle based on confidence level
        if classification.confidence_level == ClassificationConfidence.LOW:
            # Handle low confidence with expansion decision
            classification = await self.handle_low_confidence_classification(
                content, classification, metadata
            )

            # Update suggested action based on expansion decision
            if classification.suggested_expansion:
                classification.suggested_action = ClassificationAction.EXPAND
            elif classification.use_parent:
                classification.suggested_action = ClassificationAction.USE_PARENT
            else:
                # For invalid paths or genuinely low confidence, suggest expansion if we have a path
                if (
                    classification.path
                    and classification.path not in self.taxonomy.get_all_paths()
                ):
                    classification.suggested_action = ClassificationAction.EXPAND
                    classification.reasoning += (
                        " | Invalid path suggests need for expansion"
                    )
                elif classification.confidence < self.thresholds["medium"]:
                    classification.suggested_action = ClassificationAction.SKIP
                    classification.reasoning += (
                        " | Confidence too low after expansion handling"
                    )

        return classification

    async def _generate_entity_storage_key(
        self, path: str, content: str, memory_data: dict
    ) -> str:
        """
        Generate entity-specific storage keys to avoid duplicate records for the same entities.

        For entity paths, we'll ask the LLM to identify the specific entity mentioned.
        For non-entity paths, return the original path.
        """
        if not path.startswith("entity."):
            return path

        # For entity paths, we'll use the LLM to identify the specific entity
        # This is more accurate than regex-based extraction
        try:
            entity_name = await self._get_entity_name_from_llm(content, path)
            if entity_name:
                # Clean and format the entity name for storage key
                clean_name = entity_name.lower().replace(" ", "_").replace("-", "_")
                return f"{path}#{clean_name}"
        except Exception as e:
            logger.warning(f"Failed to get entity name from LLM: {e}")

        # If no specific entity found or LLM failed, use original path
        return path

    async def _get_entity_name_from_llm(self, content: str, path: str) -> str:
        """
        Use the LLM to identify the specific entity mentioned in the content for the given path.
        This is more accurate than regex-based extraction.
        """
        # Determine what type of entity we're looking for based on the path
        entity_type = "entity"
        if "people.mentioned" in path:
            entity_type = "person name"
        elif "places." in path:
            entity_type = "place or location"
        elif "organizations." in path:
            entity_type = "organization or company name"
        elif "time." in path:
            entity_type = "time or date reference"
        elif "objects." in path:
            entity_type = "object or item"

        prompt = f"""Extract the most important {entity_type} mentioned in this text. Return only the name/identifier, nothing else.

Text: {content}
Path: {path}

Requirements:
- Return only the most relevant {entity_type} mentioned
- Use the exact name/phrase as it appears in the text
- If multiple {entity_type}s are mentioned, return the most prominent one
- Return just the name without quotes or explanation
- If no clear {entity_type} is found, return "none"

Examples:
- Text: "I went with my friend Sarah" → Sarah
- Text: "We visited New York City" → New York City
- Text: "I work at Google Inc" → Google Inc
- Text: "Yesterday was great" → yesterday"""

        try:
            response = await self.llm.ainvoke(prompt)
            entity_name = response.content.strip()

            # Clean up the response
            if entity_name.lower() in ["none", "null", "n/a", ""]:
                return None

            # Remove quotes if present
            entity_name = entity_name.strip("\"'")

            return entity_name if entity_name else None

        except Exception as e:
            logger.error(f"LLM entity extraction failed: {e}")
            return None

    async def process_memory_with_storage(
        self,
        content: str,
        metadata: dict | None = None,
        conversation_context: list[str] | None = None,
    ) -> MemoryProcessingResult:
        """
        Complete memory processing including classification and storage.

        Args:
            content: Content to process
            metadata: Optional metadata
            conversation_context: Optional list of previous conversation exchanges for context

        Returns:
            MemoryProcessingResult with classification and storage details
        """
        # Step 1: Classify the content
        classification = await self.process_classification(
            content, metadata, conversation_context
        )

        # Initialize result
        result = MemoryProcessingResult(
            classification=classification,
            memory_action=MemoryAction.SKIP,
            expanded_paths=[],
        )

        # Step 2: Handle based on classification
        if not classification.is_memory:
            result.storage_reasoning = "Content not memory-worthy"
            return result

        # Step 2.1: Check confidence threshold - CRITICAL for user-controlled aggressiveness
        if classification.confidence < self.thresholds["low"]:
            result.storage_reasoning = f"Confidence {classification.confidence:.2f} below threshold {self.thresholds['low']}"
            result.memory_action = MemoryAction.SKIP
            return result

        # Check if we have any paths to store under
        paths_to_store = classification.all_paths
        if not paths_to_store:
            result.storage_reasoning = "No classification paths provided"
            return result

        if not self.memory_store:
            result.storage_reasoning = "No memory store available"
            result.success = False
            return result

        # Step 2.5: Apply profile updates if detected
        if classification.profile_updates and self.profile_manager:
            try:
                await self.profile_manager.apply_profile_updates(
                    classification.profile_updates, metadata
                )
                logger.debug(
                    f"Applied {len(classification.profile_updates)} profile updates"
                )
            except Exception as e:
                logger.error(f"Failed to apply profile updates: {e}")

        # Step 2.6: Apply timeline events if detected
        if classification.timeline_events and self.timeline_manager:
            try:
                await self.timeline_manager.apply_timeline_events(
                    classification.timeline_events, metadata
                )
                logger.debug(
                    f"Applied {len(classification.timeline_events)} timeline events"
                )
            except Exception as e:
                logger.error(f"Failed to apply timeline events: {e}")

        # Step 2.7: Apply location events if detected
        if classification.location_events:
            logger.debug(f"Detected location events: {classification.location_events}")
            if self.location_manager:
                try:
                    await self.location_manager.apply_location_events(
                        classification.location_events, metadata
                    )
                    logger.debug(
                        f"Applied {len(classification.location_events)} location events"
                    )
                except Exception as e:
                    logger.error(f"Failed to apply location events: {e}")
            else:
                logger.warning(
                    "Location manager not configured, skipping location events"
                )

        # Step 3: Handle memory storage under multiple paths
        namespace = ("memory", self.taxonomy_version.value)
        stored_paths = []
        storage_errors = []

        try:
            # Store simplified memory structure with only essential fields
            from datetime import datetime

            # Prepare memory data with conversation context embedded in raw_text
            formatted_content = content
            if conversation_context:
                # Include context directly in raw_text for clear association
                context_lines = []
                for ctx in conversation_context:
                    context_lines.append(f"Context: {ctx}")
                context_section = "\n".join(context_lines) + "\n"
                formatted_content = f"{context_section}{content}"

            memory_data = {
                "raw_text": formatted_content,  # Store raw conversation text with context
                "session_date": (
                    metadata.get("session_date", datetime.now().isoformat())
                    if metadata
                    else datetime.now().isoformat()
                ),  # Use actual session date from JSON
                "confidence": classification.confidence,
                "classification_paths": paths_to_store,  # Store all paths this content was classified under
            }

            # Keep conversation context in metadata for search/retrieval purposes
            if conversation_context:
                memory_data["conversation_context"] = conversation_context

            # Limit to maximum 2 paths for conservative multi-labeling
            paths_to_store = paths_to_store[:2]

            # Store under each classified path
            for path in paths_to_store:
                try:
                    # For entity paths, create more specific storage keys to avoid duplication
                    storage_key = await self._generate_entity_storage_key(
                        path, content, memory_data
                    )

                    # Check for existing content at this storage key
                    existing = self.memory_store.get(namespace, storage_key)

                    if existing is None:
                        # Store new memory
                        self.memory_store.put(namespace, storage_key, memory_data)
                        stored_paths.append(storage_key)
                    else:
                        # Handle existing memory - merge with new content
                        merged_memory = await self._merge_memories(
                            existing, memory_data, content, conversation_context
                        )
                        if merged_memory:
                            self.memory_store.put(namespace, storage_key, merged_memory)
                            stored_paths.append(storage_key)
                            logger.debug(
                                f"Merged new content with existing memory at storage key {storage_key}"
                            )
                        else:
                            logger.debug(
                                f"Conflict detected, skipping merge at storage key {storage_key}"
                            )

                except Exception as e:
                    storage_errors.append(f"Failed to store at {path}: {e}")
                    logger.error(f"Storage error for path {path}: {e}")

            if stored_paths:
                result.memory_action = MemoryAction.STORE
                result.memory_path = stored_paths[
                    0
                ]  # Primary path for backward compatibility
                result.new_content = content
                if len(stored_paths) == 1:
                    result.storage_reasoning = (
                        f"Stored raw memory at {stored_paths[0]} ({len(content)} chars)"
                    )
                else:
                    result.storage_reasoning = f"Stored raw memory at {len(stored_paths)} paths: {', '.join(stored_paths)} ({len(content)} chars)"
            else:
                result.memory_action = MemoryAction.SKIP
                result.storage_reasoning = (
                    f"Failed to store at any path. Errors: {'; '.join(storage_errors)}"
                )

        except Exception as e:
            result.success = False
            result.storage_reasoning = f"Storage failed: {e}"

        return result

    async def _merge_memories(
        self,
        existing_memory: dict,
        new_memory: dict,
        new_content: str,
        conversation_context: list[str] | None = None,
    ) -> dict | None:
        """
        Intelligently merge new memory content with existing memory.

        Args:
            existing_memory: The existing memory data
            new_memory: The new memory data to merge
            new_content: The new raw content text
            conversation_context: Optional conversation context

        Returns:
            Merged memory dict if successful, None if conflict detected
        """
        try:
            # Get existing content
            existing_content = existing_memory.get("raw_text", "")

            # Check for conflicts using LLM
            # Extract the primary subject from metadata if available
            primary_subject = new_memory.get("metadata", {}).get("speaker")
            conflict_check = await self._check_for_conflicts(
                existing_content, new_content, primary_subject
            )

            if conflict_check.get("has_conflict", False):
                logger.warning(
                    f"Conflict detected between existing and new content: {conflict_check.get('reasoning', 'Unknown conflict')}"
                )
                return None

            # No conflict - proceed with merge
            merged_memory = existing_memory.copy()

            # Append new content to existing with clear separation
            if existing_content and new_content:
                # Use the session_date directly - it's already formatted from the JSON
                timestamp = new_memory.get("session_date", "unknown time")

                # Create clear separation with context included in each entry
                new_entry_header = f"--- NEW ENTRY ({timestamp}) ---"

                # Include the conversation context for this new entry if available
                context_section = ""
                if conversation_context:
                    context_lines = []
                    for ctx in conversation_context:
                        context_lines.append(f"  Context: {ctx}")
                    context_section = "\n".join(context_lines) + "\n"

                merged_memory["raw_text"] = (
                    f"{existing_content}\n\n{new_entry_header}\n{context_section}{new_content}"
                )
            elif new_content:
                # For first entry, include context if available
                if conversation_context:
                    context_lines = []
                    for ctx in conversation_context:
                        context_lines.append(f"Context: {ctx}")
                    context_section = "\n".join(context_lines) + "\n"
                    merged_memory["raw_text"] = f"{context_section}{new_content}"
                else:
                    merged_memory["raw_text"] = new_content

            # Update session date to most recent
            merged_memory["session_date"] = new_memory.get(
                "session_date", merged_memory.get("session_date")
            )

            # Update confidence to higher of the two
            existing_confidence = existing_memory.get("confidence", 0.0)
            new_confidence = new_memory.get("confidence", 0.0)
            merged_memory["confidence"] = max(existing_confidence, new_confidence)

            # Keep conversation context for metadata but avoid duplication since it's now in raw_text
            # Store the most recent conversation context for search/retrieval purposes
            if conversation_context:
                merged_memory["conversation_context"] = conversation_context
            else:
                # Keep existing context if no new context provided
                merged_memory["conversation_context"] = existing_memory.get(
                    "conversation_context", []
                )

            # Update classification paths (union of both sets)
            existing_paths = set(existing_memory.get("classification_paths", []))
            new_paths = set(new_memory.get("classification_paths", []))
            merged_memory["classification_paths"] = list(existing_paths | new_paths)

            # Add merge metadata
            merged_memory["merge_count"] = existing_memory.get("merge_count", 0) + 1
            merged_memory["last_merged"] = new_memory.get("session_date")

            return merged_memory

        except Exception as e:
            logger.error(f"Error merging memories: {e}")
            return None

    async def _check_for_conflicts(
        self,
        existing_content: str,
        new_content: str,
        primary_subject: str | None = None,
    ) -> dict:
        """
        Check if new content conflicts with existing content using LLM.

        Args:
            existing_content: The existing memory content
            new_content: The new content to check for conflicts
            primary_subject: The primary person this memory is about (optional)

        Returns:
            Dict with conflict analysis
        """
        if not existing_content or not new_content:
            return {"has_conflict": False, "reasoning": "No content to compare"}

        subject_guidance = ""
        if primary_subject:
            subject_guidance = f"""
IMPORTANT: This memory is about {primary_subject}. Only check for conflicts related to {primary_subject}.
IGNORE information about other people mentioned in the conversation - they are not relevant for conflict detection.
"""

        prompt = f"""Analyze if these two pieces of information conflict with each other.
{subject_guidance}
Existing information: {existing_content}
New information: {new_content}

Look for factual contradictions such as:
- Different values for the same attribute (age, location, job, etc.)
- Contradictory statements about preferences or facts
- Mutually exclusive statements

Do NOT consider these as conflicts:
- Additional details that expand on existing information
- Related but different aspects of the same topic
- Temporal progression (things changing over time)
- Information about other people mentioned in conversations

Respond in JSON format:
{{
  "has_conflict": true/false,
  "reasoning": "explanation of the conflict or why no conflict exists"
}}"""

        try:
            response = await self.llm.ainvoke(prompt)
            content = (
                response.content if hasattr(response, "content") else str(response)
            )

            # Parse JSON response
            import re

            json_match = re.search(r"\{[^{}]*\}", content, re.DOTALL)
            if json_match:
                result = json.loads(json_match.group())
                return result
            else:
                return {
                    "has_conflict": False,
                    "reasoning": "Failed to parse conflict check",
                }

        except Exception as e:
            logger.error(f"Conflict check failed: {e}")
            return {"has_conflict": False, "reasoning": f"Conflict check error: {e}"}

    async def _create_semantic_summary(
        self, content: str, path: str, metadata: dict | None = None
    ) -> dict:
        """
        Create a concise, structured summary of the content based on taxonomy path.

        Args:
            content: Original content to summarize
            path: Taxonomy path for context-aware summarization
            metadata: Optional metadata for additional context

        Returns:
            Dict with 'summary' and 'structured_data' keys
        """
        # Build context-aware summarization prompt
        prompt_parts = [
            "Create a concise, structured summary of the following content.",
            "Extract key information and create both a brief summary and structured data.",
            "",
            f"Content: {content}",
            f"Classification path: {path}",
        ]

        if metadata:
            prompt_parts.append(f"Context: {json.dumps(metadata)}")

        # Add dynamic path-based context instead of hard-coded guidance
        prompt_parts.extend(
            [
                "",
                f"Context: This content is classified under '{path}' in our taxonomy.",
                "Extract the most relevant information for this classification category.",
                "Focus on specific, actionable details rather than general statements.",
                "",
                "Respond in JSON format:",
                "{",
                '  "summary": "1-2 sentence concise summary capturing the essence",',
                '  "structured_data": {',
                '    "key_field_1": "extracted_value_1",',
                '    "key_field_2": "extracted_value_2"',
                "    // Add relevant fields based on the taxonomy path",
                "  }",
                "}",
            ]
        )

        try:
            response = await self.llm.ainvoke("\n".join(prompt_parts))

            # Parse response
            if hasattr(response, "content"):
                content_str = response.content
            else:
                content_str = str(response)

            # Extract JSON from response
            import re

            json_match = re.search(r"\{.*\}", content_str, re.DOTALL)
            if json_match:
                result = json.loads(json_match.group())

                # Validate and clean up the result
                summary = result.get(
                    "summary", content[:200] + "..." if len(content) > 200 else content
                )
                structured_data = result.get("structured_data", {})

                return {"summary": summary, "structured_data": structured_data}
            else:
                logger.warning(
                    f"Failed to parse summarization response: {content_str[:200]}..."
                )
                # Fallback to simple truncation
                return {
                    "summary": content[:200] + "..." if len(content) > 200 else content,
                    "structured_data": {},
                }

        except Exception as e:
            logger.error(f"Summarization failed: {e}")
            # Fallback to simple truncation
            return {
                "summary": content[:200] + "..." if len(content) > 200 else content,
                "structured_data": {},
            }

    async def _handle_memory_update(
        self,
        new_content: str,
        existing_data: dict,
        path: str,
        namespace: tuple,
        metadata: dict | None,
    ) -> dict:
        """Handle updating existing memory content."""
        existing_content = existing_data.get("content", "")

        # Ask LLM how to handle the update
        prompt = [
            f"Memory path: {path}",
            "",
            f"Existing content: {existing_content}",
            "",
            f"New content: {new_content}",
        ]

        if metadata:
            prompt.append(f"New metadata: {json.dumps(metadata)}")

        prompt.extend(
            [
                "",
                "How should we update this memory?",
                "Options:",
                "- replace: Replace old content with new",
                "- append: Add new content to existing",
                "- merge: Intelligently combine both",
                "- skip: Keep existing, ignore new",
                "",
                "Respond in JSON:",
                "{",
                '  "action": "replace" | "append" | "merge" | "skip",',
                '  "reasoning": "explanation",',
                '  "merged_content": "..." (if merge)',
                "}",
            ]
        )

        try:
            response = await self.llm.ainvoke("\n".join(prompt))

            if hasattr(response, "content"):
                content = response.content
            else:
                content = str(response)

            import re

            json_match = re.search(r"\{[^{}]*\}", content, re.DOTALL)
            if json_match:
                decision = json.loads(json_match.group())
            else:
                decision = {
                    "action": "append",
                    "reasoning": "Parse error, defaulting to append",
                }

            action = decision.get("action", "append")
            reasoning = decision.get("reasoning", "")

            if action == "replace":
                # Store simplified memory structure for replacement
                from datetime import datetime

                self.memory_store.put(
                    namespace,
                    path,
                    {
                        "raw_text": new_content,
                        "session_date": datetime.now().isoformat(),
                        "confidence": 0.8,
                    },
                )
                return {
                    "action": MemoryAction.REPLACE,
                    "reasoning": reasoning,
                    "new_content": new_content,
                }

            elif action == "append":
                # Combine existing raw text with new content
                existing_raw = existing_data.get("raw_text", "")
                combined_text = f"{existing_raw}\n\n{new_content}"

                from datetime import datetime

                self.memory_store.put(
                    namespace,
                    path,
                    {
                        "raw_text": combined_text,
                        "session_date": datetime.now().isoformat(),
                        "confidence": 0.8,
                    },
                )
                return {
                    "action": MemoryAction.APPEND,
                    "reasoning": reasoning,
                    "new_content": combined_text,
                }

            elif action == "merge":
                # Get existing raw content
                existing_raw = existing_data.get("raw_text", "")
                merged_content = decision.get(
                    "merged_content", f"{existing_raw}\n{new_content}"
                )

                from datetime import datetime

                self.memory_store.put(
                    namespace,
                    path,
                    {
                        "raw_text": merged_content,
                        "session_date": datetime.now().isoformat(),
                        "confidence": 0.8,
                    },
                )
                return {
                    "action": MemoryAction.MERGE,
                    "reasoning": reasoning,
                    "new_content": merged_content,
                }

            else:  # skip
                return {
                    "action": MemoryAction.SKIP,
                    "reasoning": reasoning,
                    "new_content": existing_data.get("raw_text", ""),
                }

        except Exception as e:
            # Fallback to append on error
            existing_raw = existing_data.get("raw_text", "")
            combined = f"{existing_raw}\n\n{new_content}"

            from datetime import datetime

            self.memory_store.put(
                namespace,
                path,
                {
                    "raw_text": combined,
                    "session_date": datetime.now().isoformat(),
                    "confidence": 0.7,
                },
            )
            return {
                "action": MemoryAction.APPEND,
                "reasoning": f"Error in LLM decision, defaulted to append: {e}",
                "new_content": combined,
            }

    def get_stored_memories(self, limit: int = 10) -> list[dict]:
        """Get stored memories from the memory store."""
        if not self.memory_store:
            return []

        namespace = ("memory", self.taxonomy_version.value)
        try:
            results = self.memory_store.search(namespace, filter={}, limit=limit)
            memories = []
            for result in results:
                # Handle tuple format: (namespace, key, value)
                if isinstance(result, tuple) and len(result) == 3:
                    _, key, value = result
                    memories.append(
                        {
                            "path": key,
                            "content": value,
                            "timestamp": (
                                value.get("timestamp")
                                if isinstance(value, dict)
                                else None
                            ),
                        }
                    )
                # Handle object with attributes
                elif hasattr(result, "key") and hasattr(result, "value"):
                    memories.append(
                        {
                            "path": result.key,
                            "content": result.value,
                            "timestamp": getattr(result, "timestamp", None),
                        }
                    )
            return memories
        except Exception as e:
            logger.error(f"Failed to retrieve memories: {e}")
            return []

    async def get_classification_statistics(self) -> dict:
        """Get statistics about the classification system."""
        return {
            "taxonomy_info": self.taxonomy.get_taxonomy_info(),
            "expansion_stats": self.taxonomy.get_expansion_statistics(),
            "pending_expansions": {
                path: len(items) for path, items in self.pending_expansions.items()
            },
            "confidence_thresholds": self.thresholds,
        }

    def get_category_structure(self) -> dict:
        """Get the current category structure for passing to LLM context."""
        all_paths = self.taxonomy.get_all_paths()

        # Analyze current structure
        depth_analysis = {}
        for depth in range(1, 6):
            paths_at_depth = [p for p in all_paths if len(p.split(".")) == depth]
            depth_analysis[f"depth_{depth}"] = len(paths_at_depth)

        return {
            "version": self.taxonomy_version.value,
            "all_paths": all_paths,
            "first_level_categories": [
                p for p in all_paths if "." not in p and p != "other"
            ],
            "structure_snapshot": self.taxonomy.export_for_llm(),
            "depth_analysis": depth_analysis,
            "total_paths": len(all_paths),
        }

    async def evaluate_semantic_appropriateness(
        self, content: str, path: str, context_paths: list[str] | None = None
    ) -> dict:
        """
        Use LLM to evaluate if content semantically belongs in the assigned path.

        Args:
            content: The memory content to evaluate
            path: The taxonomy path where content is stored
            context_paths: Other similar paths for comparison context

        Returns:
            Dict with appropriateness score, reasoning, and suggestions
        """
        # Build evaluation prompt
        prompt_parts = [
            "You are a taxonomy evaluation expert. Analyze whether the given content semantically belongs in the assigned taxonomy path.",
            "",
            f'Content: "{content}"',
            f"Assigned Path: {path}",
            "",
            "Path Components Analysis:",
        ]

        # Break down path components for LLM understanding
        path_parts = path.split(".")
        for i, part in enumerate(path_parts):
            level_name = ["Domain", "Area", "Category", "Subcategory", "Detail"][
                min(i, 4)
            ]
            prompt_parts.append(f"  {level_name}: {part.replace('_', ' ').title()}")

        # Add context of similar paths if available
        if context_paths:
            prompt_parts.extend(
                [
                    "",
                    "Similar paths in taxonomy for comparison:",
                ]
            )
            for similar_path in context_paths[:5]:
                prompt_parts.append(f"  - {similar_path}")

        prompt_parts.extend(
            [
                "",
                "Evaluate:",
                "1. Does the content conceptually belong in this taxonomy path?",
                "2. Is each level of the hierarchy appropriate for this content?",
                "3. Are there better alternative paths in the taxonomy?",
                "4. Is the path depth appropriate for the content's specificity?",
                "",
                "Consider:",
                "- Semantic meaning and conceptual relationships",
                "- Logical hierarchical progression",
                "- Domain appropriateness",
                "- Content specificity vs path granularity",
                "",
                "Respond in JSON format:",
                "{",
                '  "appropriate": true/false,',
                '  "confidence": 0.0-1.0,',
                '  "score": 0-100,',
                '  "reasoning": "detailed explanation of why this classification is good/bad",',
                '  "issues": ["list", "of", "specific", "problems"] or [],',
                '  "suggested_path": "better.path.if.needed" or null,',
                '  "path_quality": "excellent" | "good" | "acceptable" | "poor" | "completely_wrong"',
                "}",
            ]
        )

        try:
            response = await self.llm.ainvoke("\n".join(prompt_parts))

            # Parse LLM response
            if hasattr(response, "content"):
                response_content = response.content
            else:
                response_content = str(response)

            # Extract JSON from response
            import re

            json_match = re.search(r"\{[^{}]*\}", response_content, re.DOTALL)
            if json_match:
                result = json.loads(json_match.group())
            else:
                # Fallback if JSON parsing fails
                result = {
                    "appropriate": True,
                    "confidence": 0.5,
                    "score": 50,
                    "reasoning": "Could not parse LLM response",
                    "issues": ["Parse error"],
                    "suggested_path": None,
                    "path_quality": "acceptable",
                }

            # Ensure required fields exist
            result.setdefault("appropriate", result.get("score", 50) >= 70)
            result.setdefault("confidence", 0.5)
            result.setdefault("score", 50)
            result.setdefault("reasoning", "No reasoning provided")
            result.setdefault("issues", [])
            result.setdefault("suggested_path", None)
            result.setdefault("path_quality", "acceptable")

            return result

        except Exception as e:
            logger.error(f"Semantic appropriateness evaluation failed: {e}")
            return {
                "appropriate": True,
                "confidence": 0.0,
                "score": 0,
                "reasoning": f"Evaluation failed: {e}",
                "issues": ["Evaluation error"],
                "suggested_path": None,
                "path_quality": "unknown",
            }

    async def batch_evaluate_semantic_appropriateness(
        self, memory_items: list[dict]
    ) -> list[dict]:
        """
        Evaluate semantic appropriateness for multiple memory items.

        Args:
            memory_items: List of dicts with 'path' and 'content' keys

        Returns:
            List of evaluation results
        """
        results = []

        # Group by domain for context
        domain_groups = {}
        for item in memory_items:
            domain = item["path"].split(".")[0]
            if domain not in domain_groups:
                domain_groups[domain] = []
            domain_groups[domain].append(item)

        # Evaluate each item with domain context
        for item in memory_items:
            domain = item["path"].split(".")[0]
            context_paths = [
                other_item["path"]
                for other_item in domain_groups.get(domain, [])
                if other_item["path"] != item["path"]
            ]

            evaluation = await self.evaluate_semantic_appropriateness(
                item["content"], item["path"], context_paths
            )

            evaluation["item"] = item
            results.append(evaluation)

        return results

    async def classify_async(
        self, content: str, metadata: dict | None = None
    ) -> ClassificationResult:
        """Compatibility method for SemanticClassifier interface."""
        return await self.classify_input(content, metadata)

__init__

__init__(llm: Any, memory_store: Any | None = None, taxonomy_version: TaxonomyVersion = TaxonomyVersion.GENERAL, confidence_thresholds: dict | None = None, expansion_strategy: LLMExpansionStrategy = LLMExpansionStrategy.FOCUSED_SUBTREE, min_items_for_expansion: int = 3, profile_manager: Any | None = None, timeline_manager: Any | None = None, location_manager: Any | None = None, suppress_path_warnings: bool = True, enable_metadata_extraction: bool = False, taxonomy_loader: TaxonomyLoader | None = None)

Initialize the intelligent classifier.

Parameters:

Name Type Description Default
llm Any

Language model for classification and decisions

required
memory_store Any | None

Optional memory store for actual storage operations

None
taxonomy_version TaxonomyVersion

Taxonomy preset to use

GENERAL
confidence_thresholds dict | None

Custom confidence thresholds

None
expansion_strategy LLMExpansionStrategy

Strategy for taxonomy expansion

FOCUSED_SUBTREE
min_items_for_expansion int

Minimum items before expansion

3
profile_manager Any | None

Optional profile manager for handling profile updates

None
suppress_path_warnings bool

Whether to suppress warnings for invalid LLM-suggested paths

True
enable_metadata_extraction bool

Enable profile/timeline/location extraction (slower but richer)

False
taxonomy_loader TaxonomyLoader | None

Optional TaxonomyLoader for loading taxonomy from store. When provided, taxonomy data is loaded from the store's taxonomy namespace. When None, falls back to hardcoded TaxonomyPresets.

None
Source code in src/memoir/classifier/intelligent.py
def __init__(
    self,
    llm: Any,
    memory_store: Any | None = None,
    taxonomy_version: TaxonomyVersion = TaxonomyVersion.GENERAL,
    confidence_thresholds: dict | None = None,
    expansion_strategy: LLMExpansionStrategy = LLMExpansionStrategy.FOCUSED_SUBTREE,
    min_items_for_expansion: int = 3,
    profile_manager: Any | None = None,
    timeline_manager: Any | None = None,
    location_manager: Any | None = None,
    suppress_path_warnings: bool = True,
    enable_metadata_extraction: bool = False,
    taxonomy_loader: TaxonomyLoader | None = None,
):
    """
    Initialize the intelligent classifier.

    Args:
        llm: Language model for classification and decisions
        memory_store: Optional memory store for actual storage operations
        taxonomy_version: Taxonomy preset to use
        confidence_thresholds: Custom confidence thresholds
        expansion_strategy: Strategy for taxonomy expansion
        min_items_for_expansion: Minimum items before expansion
        profile_manager: Optional profile manager for handling profile updates
        suppress_path_warnings: Whether to suppress warnings for invalid LLM-suggested paths
        enable_metadata_extraction: Enable profile/timeline/location extraction (slower but richer)
        taxonomy_loader: Optional TaxonomyLoader for loading taxonomy from store.
                         When provided, taxonomy data is loaded from the store's taxonomy namespace.
                         When None, falls back to hardcoded TaxonomyPresets.
    """
    self.llm = llm
    self.memory_store = memory_store
    self.profile_manager = profile_manager
    self.timeline_manager = timeline_manager
    self.location_manager = location_manager
    self.taxonomy_version = taxonomy_version
    self.suppress_path_warnings = suppress_path_warnings
    self.enable_metadata_extraction = enable_metadata_extraction
    self._taxonomy_loader = taxonomy_loader

    # Initialize taxonomy - prefer store-based loading if taxonomy_loader provided
    preset_paths = self._load_taxonomy_paths()

    # Create a simple taxonomy object that provides get_all_paths() method
    class PresetTaxonomy:
        def __init__(self, preset_paths):
            self.preset_paths = preset_paths
            self._all_paths = []
            self._top_level_categories = set(preset_paths.keys())
            for category, paths in preset_paths.items():
                # Do NOT add single-level categories to valid paths
                # Only add multi-level paths (2+ levels minimum)
                for path in paths:
                    full_path = f"{category}.{path}"
                    self._all_paths.append(full_path)

        def get_all_paths(self):
            return sorted(self._all_paths)

        def is_valid_path(self, path):
            return path in self._all_paths

        def get_top_level_categories(self):
            return self._top_level_categories

    self.taxonomy = PresetTaxonomy(preset_paths)

    # Also keep iterative taxonomy for expansion capabilities if needed
    self.iterative_taxonomy = LLMIterativeTaxonomy(
        taxonomy_version=taxonomy_version,
        llm=llm,
        expansion_strategy=expansion_strategy,
        min_items_threshold=min_items_for_expansion,
    )

    # Confidence thresholds
    self.thresholds = confidence_thresholds or {
        "high": 0.8,
        "medium": 0.6,
        "low": 0.0,
    }

    # Track pending expansions
    self.pending_expansions = {}

    # Cache for taxonomy data (loaded once from store)
    self._examples_cache: list[tuple[str, str, str]] | None = None
    self._descriptions_cache: dict[str, str] | None = None

classify_input async

classify_input(content: str, metadata: dict | None = None, conversation_context: list[str] | None = None, return_prompt: bool = False) -> ClassificationResult

Classify input using LLM to determine if it's memory-worthy and where to store it.

Parameters:

Name Type Description Default
content str

The content to classify

required
metadata dict | None

Optional metadata about the content

None
conversation_context list[str] | None

Optional list of previous conversation exchanges for context

None

Returns:

Type Description
ClassificationResult

ClassificationResult with classification details

Source code in src/memoir/classifier/intelligent.py
async def classify_input(
    self,
    content: str,
    metadata: dict | None = None,
    conversation_context: list[str] | None = None,
    return_prompt: bool = False,
) -> ClassificationResult:
    """
    Classify input using LLM to determine if it's memory-worthy and where to store it.

    Args:
        content: The content to classify
        metadata: Optional metadata about the content
        conversation_context: Optional list of previous conversation exchanges for context

    Returns:
        ClassificationResult with classification details
    """
    # Get current taxonomy paths
    all_paths = self.taxonomy.get_all_paths()

    # Build classification prompt
    prompt = self._build_classification_prompt(
        content, all_paths, metadata, conversation_context
    )

    try:
        # Call LLM for classification
        response = await self.llm.ainvoke(prompt)

        # Parse response
        result = self._parse_classification_response(response)

        # Add confidence level
        result.confidence_level = self._get_confidence_level(result.confidence)

        # Add prompt if requested
        if return_prompt:
            result.llm_prompt = prompt

        return result

    except Exception as e:
        logger.error(f"Classification failed: {e}")
        # Return skip action on error
        return ClassificationResult(
            is_memory=False,
            path=None,
            confidence=0.0,
            confidence_level=ClassificationConfidence.LOW,
            reasoning=f"Classification failed: {e!s}",
            suggested_action=ClassificationAction.SKIP,
        )

handle_low_confidence_classification async

handle_low_confidence_classification(content: str, classification: ClassificationResult, metadata: dict | None = None) -> ClassificationResult

Handle low confidence classification by asking LLM to expand or use parent.

Parameters:

Name Type Description Default
content str

Original content

required
classification ClassificationResult

Initial classification result

required
metadata dict | None

Optional metadata

None

Returns:

Type Description
ClassificationResult

Updated classification result with expansion decision

Source code in src/memoir/classifier/intelligent.py
async def handle_low_confidence_classification(
    self,
    content: str,
    classification: ClassificationResult,
    metadata: dict | None = None,
) -> ClassificationResult:
    """
    Handle low confidence classification by asking LLM to expand or use parent.

    Args:
        content: Original content
        classification: Initial classification result
        metadata: Optional metadata

    Returns:
        Updated classification result with expansion decision
    """
    if not classification.path:
        return classification

    # Build prompt for expansion decision
    prompt = self._build_expansion_decision_prompt(
        content, classification.path, classification.confidence, metadata
    )

    try:
        response = await self.llm.ainvoke(prompt)
        decision = self._parse_expansion_decision(response)

        if decision["action"] == "expand":
            # Trigger expansion for more specific categorization
            classification.suggested_expansion = decision.get("suggested_path")

            # Add to pending expansions
            parent_path = classification.path
            if parent_path not in self.pending_expansions:
                self.pending_expansions[parent_path] = []
            self.pending_expansions[parent_path].append(
                {
                    "content": content,
                    "metadata": metadata,
                    "suggested_expansion": decision.get("suggested_categories", []),
                }
            )

            # If we have enough items, trigger expansion
            if (
                len(self.pending_expansions[parent_path])
                >= self.taxonomy.min_items_threshold
            ):
                await self._trigger_expansion(parent_path)

        elif decision["action"] == "use_parent":
            # Use more generic category
            classification.use_parent = True
            parts = classification.path.split(".")
            if len(parts) > 1:
                classification.path = ".".join(parts[:-1])
                classification.confidence = decision.get("parent_confidence", 0.7)

        classification.reasoning += (
            f" | Expansion decision: {decision.get('reasoning', '')}"
        )

    except Exception as e:
        logger.error(f"Expansion decision failed: {e}")

    return classification

process_classification async

process_classification(content: str, metadata: dict | None = None, conversation_context: list[str] | None = None) -> ClassificationResult

Main entry point for processing classification.

Parameters:

Name Type Description Default
content str

Content to classify

required
metadata dict | None

Optional metadata

None
conversation_context list[str] | None

Optional list of previous conversation exchanges for context

None

Returns:

Type Description
ClassificationResult

ClassificationResult with classification details and recommended action

Source code in src/memoir/classifier/intelligent.py
async def process_classification(
    self,
    content: str,
    metadata: dict | None = None,
    conversation_context: list[str] | None = None,
) -> ClassificationResult:
    """
    Main entry point for processing classification.

    Args:
        content: Content to classify
        metadata: Optional metadata
        conversation_context: Optional list of previous conversation exchanges for context

    Returns:
        ClassificationResult with classification details and recommended action
    """
    # Step 1: Classify the input
    classification = await self.classify_input(
        content, metadata, conversation_context
    )

    # Step 2: Check if it's memory-worthy
    if not classification.is_memory:
        return classification

    # Step 2.5: Validate suggested path exists in taxonomy
    if (
        classification.path
        and classification.path not in self.taxonomy.get_all_paths()
    ):
        # Path doesn't exist - mark for expansion but preserve original confidence
        # Only lower confidence if it was already very high (likely overconfident)
        if classification.confidence > 0.8:
            classification.confidence = min(classification.confidence, 0.75)
        classification.confidence_level = self._get_confidence_level(
            classification.confidence
        )
        classification.reasoning += (
            f" | Suggested path '{classification.path}' doesn't exist in taxonomy"
        )

    # Step 3: Handle based on confidence level
    if classification.confidence_level == ClassificationConfidence.LOW:
        # Handle low confidence with expansion decision
        classification = await self.handle_low_confidence_classification(
            content, classification, metadata
        )

        # Update suggested action based on expansion decision
        if classification.suggested_expansion:
            classification.suggested_action = ClassificationAction.EXPAND
        elif classification.use_parent:
            classification.suggested_action = ClassificationAction.USE_PARENT
        else:
            # For invalid paths or genuinely low confidence, suggest expansion if we have a path
            if (
                classification.path
                and classification.path not in self.taxonomy.get_all_paths()
            ):
                classification.suggested_action = ClassificationAction.EXPAND
                classification.reasoning += (
                    " | Invalid path suggests need for expansion"
                )
            elif classification.confidence < self.thresholds["medium"]:
                classification.suggested_action = ClassificationAction.SKIP
                classification.reasoning += (
                    " | Confidence too low after expansion handling"
                )

    return classification

process_memory_with_storage async

process_memory_with_storage(content: str, metadata: dict | None = None, conversation_context: list[str] | None = None) -> MemoryProcessingResult

Complete memory processing including classification and storage.

Parameters:

Name Type Description Default
content str

Content to process

required
metadata dict | None

Optional metadata

None
conversation_context list[str] | None

Optional list of previous conversation exchanges for context

None

Returns:

Type Description
MemoryProcessingResult

MemoryProcessingResult with classification and storage details

Source code in src/memoir/classifier/intelligent.py
async def process_memory_with_storage(
    self,
    content: str,
    metadata: dict | None = None,
    conversation_context: list[str] | None = None,
) -> MemoryProcessingResult:
    """
    Complete memory processing including classification and storage.

    Args:
        content: Content to process
        metadata: Optional metadata
        conversation_context: Optional list of previous conversation exchanges for context

    Returns:
        MemoryProcessingResult with classification and storage details
    """
    # Step 1: Classify the content
    classification = await self.process_classification(
        content, metadata, conversation_context
    )

    # Initialize result
    result = MemoryProcessingResult(
        classification=classification,
        memory_action=MemoryAction.SKIP,
        expanded_paths=[],
    )

    # Step 2: Handle based on classification
    if not classification.is_memory:
        result.storage_reasoning = "Content not memory-worthy"
        return result

    # Step 2.1: Check confidence threshold - CRITICAL for user-controlled aggressiveness
    if classification.confidence < self.thresholds["low"]:
        result.storage_reasoning = f"Confidence {classification.confidence:.2f} below threshold {self.thresholds['low']}"
        result.memory_action = MemoryAction.SKIP
        return result

    # Check if we have any paths to store under
    paths_to_store = classification.all_paths
    if not paths_to_store:
        result.storage_reasoning = "No classification paths provided"
        return result

    if not self.memory_store:
        result.storage_reasoning = "No memory store available"
        result.success = False
        return result

    # Step 2.5: Apply profile updates if detected
    if classification.profile_updates and self.profile_manager:
        try:
            await self.profile_manager.apply_profile_updates(
                classification.profile_updates, metadata
            )
            logger.debug(
                f"Applied {len(classification.profile_updates)} profile updates"
            )
        except Exception as e:
            logger.error(f"Failed to apply profile updates: {e}")

    # Step 2.6: Apply timeline events if detected
    if classification.timeline_events and self.timeline_manager:
        try:
            await self.timeline_manager.apply_timeline_events(
                classification.timeline_events, metadata
            )
            logger.debug(
                f"Applied {len(classification.timeline_events)} timeline events"
            )
        except Exception as e:
            logger.error(f"Failed to apply timeline events: {e}")

    # Step 2.7: Apply location events if detected
    if classification.location_events:
        logger.debug(f"Detected location events: {classification.location_events}")
        if self.location_manager:
            try:
                await self.location_manager.apply_location_events(
                    classification.location_events, metadata
                )
                logger.debug(
                    f"Applied {len(classification.location_events)} location events"
                )
            except Exception as e:
                logger.error(f"Failed to apply location events: {e}")
        else:
            logger.warning(
                "Location manager not configured, skipping location events"
            )

    # Step 3: Handle memory storage under multiple paths
    namespace = ("memory", self.taxonomy_version.value)
    stored_paths = []
    storage_errors = []

    try:
        # Store simplified memory structure with only essential fields
        from datetime import datetime

        # Prepare memory data with conversation context embedded in raw_text
        formatted_content = content
        if conversation_context:
            # Include context directly in raw_text for clear association
            context_lines = []
            for ctx in conversation_context:
                context_lines.append(f"Context: {ctx}")
            context_section = "\n".join(context_lines) + "\n"
            formatted_content = f"{context_section}{content}"

        memory_data = {
            "raw_text": formatted_content,  # Store raw conversation text with context
            "session_date": (
                metadata.get("session_date", datetime.now().isoformat())
                if metadata
                else datetime.now().isoformat()
            ),  # Use actual session date from JSON
            "confidence": classification.confidence,
            "classification_paths": paths_to_store,  # Store all paths this content was classified under
        }

        # Keep conversation context in metadata for search/retrieval purposes
        if conversation_context:
            memory_data["conversation_context"] = conversation_context

        # Limit to maximum 2 paths for conservative multi-labeling
        paths_to_store = paths_to_store[:2]

        # Store under each classified path
        for path in paths_to_store:
            try:
                # For entity paths, create more specific storage keys to avoid duplication
                storage_key = await self._generate_entity_storage_key(
                    path, content, memory_data
                )

                # Check for existing content at this storage key
                existing = self.memory_store.get(namespace, storage_key)

                if existing is None:
                    # Store new memory
                    self.memory_store.put(namespace, storage_key, memory_data)
                    stored_paths.append(storage_key)
                else:
                    # Handle existing memory - merge with new content
                    merged_memory = await self._merge_memories(
                        existing, memory_data, content, conversation_context
                    )
                    if merged_memory:
                        self.memory_store.put(namespace, storage_key, merged_memory)
                        stored_paths.append(storage_key)
                        logger.debug(
                            f"Merged new content with existing memory at storage key {storage_key}"
                        )
                    else:
                        logger.debug(
                            f"Conflict detected, skipping merge at storage key {storage_key}"
                        )

            except Exception as e:
                storage_errors.append(f"Failed to store at {path}: {e}")
                logger.error(f"Storage error for path {path}: {e}")

        if stored_paths:
            result.memory_action = MemoryAction.STORE
            result.memory_path = stored_paths[
                0
            ]  # Primary path for backward compatibility
            result.new_content = content
            if len(stored_paths) == 1:
                result.storage_reasoning = (
                    f"Stored raw memory at {stored_paths[0]} ({len(content)} chars)"
                )
            else:
                result.storage_reasoning = f"Stored raw memory at {len(stored_paths)} paths: {', '.join(stored_paths)} ({len(content)} chars)"
        else:
            result.memory_action = MemoryAction.SKIP
            result.storage_reasoning = (
                f"Failed to store at any path. Errors: {'; '.join(storage_errors)}"
            )

    except Exception as e:
        result.success = False
        result.storage_reasoning = f"Storage failed: {e}"

    return result

get_stored_memories

get_stored_memories(limit: int = 10) -> list[dict]

Get stored memories from the memory store.

Source code in src/memoir/classifier/intelligent.py
def get_stored_memories(self, limit: int = 10) -> list[dict]:
    """Get stored memories from the memory store."""
    if not self.memory_store:
        return []

    namespace = ("memory", self.taxonomy_version.value)
    try:
        results = self.memory_store.search(namespace, filter={}, limit=limit)
        memories = []
        for result in results:
            # Handle tuple format: (namespace, key, value)
            if isinstance(result, tuple) and len(result) == 3:
                _, key, value = result
                memories.append(
                    {
                        "path": key,
                        "content": value,
                        "timestamp": (
                            value.get("timestamp")
                            if isinstance(value, dict)
                            else None
                        ),
                    }
                )
            # Handle object with attributes
            elif hasattr(result, "key") and hasattr(result, "value"):
                memories.append(
                    {
                        "path": result.key,
                        "content": result.value,
                        "timestamp": getattr(result, "timestamp", None),
                    }
                )
        return memories
    except Exception as e:
        logger.error(f"Failed to retrieve memories: {e}")
        return []

get_classification_statistics async

get_classification_statistics() -> dict

Get statistics about the classification system.

Source code in src/memoir/classifier/intelligent.py
async def get_classification_statistics(self) -> dict:
    """Get statistics about the classification system."""
    return {
        "taxonomy_info": self.taxonomy.get_taxonomy_info(),
        "expansion_stats": self.taxonomy.get_expansion_statistics(),
        "pending_expansions": {
            path: len(items) for path, items in self.pending_expansions.items()
        },
        "confidence_thresholds": self.thresholds,
    }

get_category_structure

get_category_structure() -> dict

Get the current category structure for passing to LLM context.

Source code in src/memoir/classifier/intelligent.py
def get_category_structure(self) -> dict:
    """Get the current category structure for passing to LLM context."""
    all_paths = self.taxonomy.get_all_paths()

    # Analyze current structure
    depth_analysis = {}
    for depth in range(1, 6):
        paths_at_depth = [p for p in all_paths if len(p.split(".")) == depth]
        depth_analysis[f"depth_{depth}"] = len(paths_at_depth)

    return {
        "version": self.taxonomy_version.value,
        "all_paths": all_paths,
        "first_level_categories": [
            p for p in all_paths if "." not in p and p != "other"
        ],
        "structure_snapshot": self.taxonomy.export_for_llm(),
        "depth_analysis": depth_analysis,
        "total_paths": len(all_paths),
    }

evaluate_semantic_appropriateness async

evaluate_semantic_appropriateness(content: str, path: str, context_paths: list[str] | None = None) -> dict

Use LLM to evaluate if content semantically belongs in the assigned path.

Parameters:

Name Type Description Default
content str

The memory content to evaluate

required
path str

The taxonomy path where content is stored

required
context_paths list[str] | None

Other similar paths for comparison context

None

Returns:

Type Description
dict

Dict with appropriateness score, reasoning, and suggestions

Source code in src/memoir/classifier/intelligent.py
async def evaluate_semantic_appropriateness(
    self, content: str, path: str, context_paths: list[str] | None = None
) -> dict:
    """
    Use LLM to evaluate if content semantically belongs in the assigned path.

    Args:
        content: The memory content to evaluate
        path: The taxonomy path where content is stored
        context_paths: Other similar paths for comparison context

    Returns:
        Dict with appropriateness score, reasoning, and suggestions
    """
    # Build evaluation prompt
    prompt_parts = [
        "You are a taxonomy evaluation expert. Analyze whether the given content semantically belongs in the assigned taxonomy path.",
        "",
        f'Content: "{content}"',
        f"Assigned Path: {path}",
        "",
        "Path Components Analysis:",
    ]

    # Break down path components for LLM understanding
    path_parts = path.split(".")
    for i, part in enumerate(path_parts):
        level_name = ["Domain", "Area", "Category", "Subcategory", "Detail"][
            min(i, 4)
        ]
        prompt_parts.append(f"  {level_name}: {part.replace('_', ' ').title()}")

    # Add context of similar paths if available
    if context_paths:
        prompt_parts.extend(
            [
                "",
                "Similar paths in taxonomy for comparison:",
            ]
        )
        for similar_path in context_paths[:5]:
            prompt_parts.append(f"  - {similar_path}")

    prompt_parts.extend(
        [
            "",
            "Evaluate:",
            "1. Does the content conceptually belong in this taxonomy path?",
            "2. Is each level of the hierarchy appropriate for this content?",
            "3. Are there better alternative paths in the taxonomy?",
            "4. Is the path depth appropriate for the content's specificity?",
            "",
            "Consider:",
            "- Semantic meaning and conceptual relationships",
            "- Logical hierarchical progression",
            "- Domain appropriateness",
            "- Content specificity vs path granularity",
            "",
            "Respond in JSON format:",
            "{",
            '  "appropriate": true/false,',
            '  "confidence": 0.0-1.0,',
            '  "score": 0-100,',
            '  "reasoning": "detailed explanation of why this classification is good/bad",',
            '  "issues": ["list", "of", "specific", "problems"] or [],',
            '  "suggested_path": "better.path.if.needed" or null,',
            '  "path_quality": "excellent" | "good" | "acceptable" | "poor" | "completely_wrong"',
            "}",
        ]
    )

    try:
        response = await self.llm.ainvoke("\n".join(prompt_parts))

        # Parse LLM response
        if hasattr(response, "content"):
            response_content = response.content
        else:
            response_content = str(response)

        # Extract JSON from response
        import re

        json_match = re.search(r"\{[^{}]*\}", response_content, re.DOTALL)
        if json_match:
            result = json.loads(json_match.group())
        else:
            # Fallback if JSON parsing fails
            result = {
                "appropriate": True,
                "confidence": 0.5,
                "score": 50,
                "reasoning": "Could not parse LLM response",
                "issues": ["Parse error"],
                "suggested_path": None,
                "path_quality": "acceptable",
            }

        # Ensure required fields exist
        result.setdefault("appropriate", result.get("score", 50) >= 70)
        result.setdefault("confidence", 0.5)
        result.setdefault("score", 50)
        result.setdefault("reasoning", "No reasoning provided")
        result.setdefault("issues", [])
        result.setdefault("suggested_path", None)
        result.setdefault("path_quality", "acceptable")

        return result

    except Exception as e:
        logger.error(f"Semantic appropriateness evaluation failed: {e}")
        return {
            "appropriate": True,
            "confidence": 0.0,
            "score": 0,
            "reasoning": f"Evaluation failed: {e}",
            "issues": ["Evaluation error"],
            "suggested_path": None,
            "path_quality": "unknown",
        }

batch_evaluate_semantic_appropriateness async

batch_evaluate_semantic_appropriateness(memory_items: list[dict]) -> list[dict]

Evaluate semantic appropriateness for multiple memory items.

Parameters:

Name Type Description Default
memory_items list[dict]

List of dicts with 'path' and 'content' keys

required

Returns:

Type Description
list[dict]

List of evaluation results

Source code in src/memoir/classifier/intelligent.py
async def batch_evaluate_semantic_appropriateness(
    self, memory_items: list[dict]
) -> list[dict]:
    """
    Evaluate semantic appropriateness for multiple memory items.

    Args:
        memory_items: List of dicts with 'path' and 'content' keys

    Returns:
        List of evaluation results
    """
    results = []

    # Group by domain for context
    domain_groups = {}
    for item in memory_items:
        domain = item["path"].split(".")[0]
        if domain not in domain_groups:
            domain_groups[domain] = []
        domain_groups[domain].append(item)

    # Evaluate each item with domain context
    for item in memory_items:
        domain = item["path"].split(".")[0]
        context_paths = [
            other_item["path"]
            for other_item in domain_groups.get(domain, [])
            if other_item["path"] != item["path"]
        ]

        evaluation = await self.evaluate_semantic_appropriateness(
            item["content"], item["path"], context_paths
        )

        evaluation["item"] = item
        results.append(evaluation)

    return results

classify_async async

classify_async(content: str, metadata: dict | None = None) -> ClassificationResult

Compatibility method for SemanticClassifier interface.

Source code in src/memoir/classifier/intelligent.py
async def classify_async(
    self, content: str, metadata: dict | None = None
) -> ClassificationResult:
    """Compatibility method for SemanticClassifier interface."""
    return await self.classify_input(content, metadata)

memoir.classifier.semantic module

memoir.classifier.semantic

Semantic classifier for mapping memories to taxonomy paths. Uses LLM-based classification with caching and optimization.

ClassificationResult

Bases: BaseModel

Result of semantic classification.

Source code in src/memoir/classifier/semantic.py
class ClassificationResult(BaseModel):
    """Result of semantic classification."""

    primary_path: str = Field(description="Primary taxonomy path for the memory")
    confidence: float = Field(description="Confidence score (0-1)")
    alternative_paths: list[str] = Field(description="Alternative relevant paths")
    reasoning: str = Field(description="Brief reasoning for classification")

SemanticClassifier

Classifies memories into semantic taxonomy paths. Optimized for low-latency classification with caching.

Source code in src/memoir/classifier/semantic.py
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
class SemanticClassifier:
    """
    Classifies memories into semantic taxonomy paths.
    Optimized for low-latency classification with caching.
    """

    def __init__(
        self,
        llm: Any | None = None,
        taxonomy: TaxonomyInterface | None = None,
        cache_size: int = DEFAULT_CACHE_SIZE,
        use_examples: bool = True,
        fallback_path: str | None = None,
    ):
        """
        Initialize the semantic classifier.

        Args:
            llm: Language model for classification (optional, will use default)
            taxonomy: Taxonomy instance implementing TaxonomyInterface
                     If None, uses default SemanticTaxonomy
            cache_size: Size of the classification cache
            use_examples: Whether to include examples in prompts
            fallback_path: Custom fallback path when classification fails
        """
        self.taxonomy = taxonomy if taxonomy is not None else get_taxonomy()
        self.llm = llm
        self.use_examples = use_examples
        self.fallback_path = fallback_path or self._determine_fallback_path()
        self._cache = {}
        self._setup_classification_prompt()

    def _determine_fallback_path(self) -> str:
        """Determine appropriate fallback path based on available taxonomy."""
        try:
            all_paths = self.taxonomy.get_all_paths()

            # First, try to find the exact default fallback path for backwards compatibility
            if DEFAULT_FALLBACK_PATH in all_paths:
                return DEFAULT_FALLBACK_PATH

            # Try to find a context-related path that's reasonably specific
            context_paths = [path for path in all_paths if path.startswith("context.")]
            if context_paths:
                # Prefer paths with depth similar to the default (4-5 levels)
                preferred_paths = [
                    p for p in context_paths if 4 <= len(p.split(".")) <= 5
                ]
                if preferred_paths:
                    preferred_paths.sort(key=len)
                    return preferred_paths[0]

                # Fallback to any context path (prefer longer ones for backwards compatibility)
                context_paths.sort(key=len, reverse=True)
                return context_paths[0]

            # Try to find any 'other' category
            other_paths = [path for path in all_paths if path.endswith(".other")]
            if other_paths:
                # Prefer shorter 'other' paths
                other_paths.sort(key=len)
                return other_paths[0]

            # Use the first available path as last resort
            if all_paths:
                return all_paths[0]

        except Exception:
            pass

        # Ultimate fallback to the default path
        return DEFAULT_FALLBACK_PATH

    def _get_taxonomy_structure_info(self) -> str:
        """Generate taxonomy structure information for the prompt.

        Includes ALL paths (excluding 'other' paths) to ensure the static section
        meets the minimum token requirement for prompt caching (2048 tokens for Haiku).
        """
        try:
            # All taxonomies should implement TaxonomyInterface
            all_paths = self.taxonomy.get_all_paths()

            if not all_paths:
                return "The taxonomy structure is available but paths could not be enumerated."

            # Filter out 'other' paths for cleaner output (they're implied)
            non_other_paths = [p for p in all_paths if not p.endswith(".other")]

            # Group paths by top-level category for better organization
            categories: dict[str, list[str]] = {}
            for path in non_other_paths:
                parts = path.split(".")
                if parts:
                    category = parts[0]
                    if category not in categories:
                        categories[category] = []
                    categories[category].append(path)

            # Generate structured description with ALL paths for prompt caching
            structure_lines = [
                f"Complete taxonomy hierarchy ({len(non_other_paths)} available paths):",
                "",
            ]

            for category, paths in sorted(categories.items()):
                structure_lines.append(f"## {category.upper()}")
                for path in sorted(paths):
                    structure_lines.append(f"  - {path}")
                structure_lines.append("")

            # Add info about 'other' categories if this is an AdvancedTaxonomy
            if isinstance(self.taxonomy, AdvancedTaxonomyInterface):
                structure_lines.append(
                    "NOTE: Each category also has 'other' subcategories for unclassified content."
                )
                structure_lines.append(
                    "Use 'other' categories when content doesn't fit existing specific paths."
                )

            return "\n".join(structure_lines)

        except Exception as e:
            logger.warning(f"Could not generate taxonomy structure info: {e}")
            return "Taxonomy structure is available. Please classify using the most appropriate path."

    def _is_valid_path(self, path: str) -> bool:
        """Check if a path is valid in the current taxonomy."""
        try:
            # All taxonomies should implement TaxonomyInterface
            return self.taxonomy.is_valid_path(path)
        except Exception as e:
            logger.warning(f"Error validating path {path}: {e}")
            return False

    def _setup_classification_prompt(self):
        """Setup the classification prompt template.

        The prompt is structured with STATIC content FIRST (for prompt caching)
        and DYNAMIC content LAST. This allows LLM providers like Anthropic to
        cache the static prefix and reduce costs by up to 90%.
        """
        # Static content first, dynamic content last for optimal prompt caching
        self.classification_template = """[STATIC_SECTION_START]
You are a semantic memory classifier. Your task is to classify the given memory content into the most appropriate path(s) from the provided taxonomy.

AVAILABLE TAXONOMY STRUCTURE:
{taxonomy_structure}

{examples}

CLASSIFICATION GUIDELINES:
1. Match content to the MOST SPECIFIC appropriate path from the available taxonomy
2. Consider the semantic meaning and context of the content
3. AVOID generic paths like 'context.current' unless content is truly about the current conversation
4. Consider confidence level:
   - High confidence (0.8-1.0): Very specific and accurate path match
   - Medium confidence (0.5-0.7): Reasonable fit but could be broader
   - Low confidence (0.0-0.4): Content is unclear or doesn't fit well
5. When unsure, use the most specific relevant category available in the taxonomy
6. Use 'other' categories when content doesn't fit existing specific paths - this helps the system learn and expand

IMPORTANT:
- Only use paths that exist in the provided taxonomy
- Prefer accuracy over specificity
- Return a valid JSON response with the required fields
- 'Other' categories help the system learn and expand over time

Return your classification as pure JSON (no markdown, no code blocks, just JSON) with:
- primary_path: The best matching taxonomy path (can be an 'other' path)
- confidence: Confidence score from 0 to 1
- alternative_paths: List of other relevant paths (max 3)
- reasoning: Brief explanation of your choice (1-2 sentences)

Think step by step:
1. Can this be clearly categorized into existing paths?
2. If uncertain, what's the closest parent category?
3. Should this go to a specific path or an 'other' category?

CRITICAL: Return ONLY the JSON object, no explanations, no markdown formatting.
[STATIC_SECTION_END]

[DYNAMIC_SECTION_START]
{context_info}

{classification_hints}

MEMORY CONTENT TO CLASSIFY:
{memory_content}
[DYNAMIC_SECTION_END]"""

    def _get_classification_examples(self) -> str:
        """Get few-shot examples for classification."""
        if not self.use_examples:
            return ""

        # Generate dynamic examples based on available taxonomy paths
        examples = self._generate_dynamic_examples()

        examples_text = "EXAMPLES:\n"
        for ex in examples:
            examples_text += f"\nMemory: {ex['memory']}\n"
            examples_text += f"Classification: {ex['path']}\n"
            examples_text += f"Confidence: {ex['confidence']}\n"
            examples_text += f"Reasoning: {ex['reasoning']}\n"

        return examples_text

    def _generate_dynamic_examples(self) -> list[dict]:
        """Generate classification examples dynamically based on available taxonomy."""
        try:
            all_paths = self.taxonomy.get_all_paths()
            if not all_paths:
                return []

            # Select diverse paths for examples (avoid being too specific to any domain)
            example_templates = [
                {
                    "memory": "My name is {example_name} and I'm 28 years old",
                    "pattern": "profile.personal.identity",
                    "confidence": 0.95,
                    "reasoning": "Personal identity information - name and age",
                },
                {
                    "memory": "I work as a software engineer at Google",
                    "pattern": "profile.professional.current",
                    "confidence": 0.90,
                    "reasoning": "Current professional role and company",
                },
                {
                    "memory": "I graduated from MIT with a CS degree",
                    "pattern": "profile.professional.education.formal",
                    "confidence": 0.90,
                    "reasoning": "Formal education history",
                },
                {
                    "memory": "My favorite IDE is {example_tool}",
                    "pattern": "preferences.technology.programming.tools",
                    "confidence": 0.85,
                    "reasoning": "Tool/IDE preference",
                },
                {
                    "memory": "I have 5 years of experience in {example_skill}",
                    "pattern": "profile.professional.skills.technical",
                    "confidence": 0.85,
                    "reasoning": "Professional skill with experience duration",
                },
                {
                    "memory": "I prefer {example_preference} for my morning routine",
                    "pattern": "preferences.personal.lifestyle",
                    "confidence": 0.80,
                    "reasoning": "Personal lifestyle preference",
                },
            ]

            examples = []
            for template in example_templates:
                # Find a suitable path that matches the pattern
                matching_path = self._find_example_path(all_paths, template["pattern"])
                if matching_path:
                    examples.append(
                        {
                            "memory": template["memory"].format(
                                example_name="John Smith",
                                example_tool="VS Code",
                                example_skill="machine learning",
                                example_preference="coffee",
                            ),
                            "path": matching_path,
                            "confidence": template["confidence"],
                            "reasoning": template["reasoning"],
                        }
                    )

            return examples

        except Exception as e:
            logger.warning(f"Could not generate dynamic examples: {e}")
            # Return minimal fallback examples if dynamic generation fails
            return [
                {
                    "memory": "User's name is John Smith",
                    "path": "profile.personal.identity",
                    "confidence": 0.9,
                    "reasoning": "Personal identity information",
                }
            ]

    def _find_example_path(self, all_paths: list[str], pattern: str) -> str | None:
        """Find a suitable taxonomy path for example generation."""
        # Look for paths that contain the pattern
        candidates = [path for path in all_paths if pattern.lower() in path.lower()]

        if candidates:
            # Prefer paths that are not too deep (3-4 levels) and not 'other' categories
            good_candidates = [
                path
                for path in candidates
                if 3 <= len(path.split(".")) <= 4 and "other" not in path
            ]
            if good_candidates:
                return good_candidates[0]
            return candidates[0]

        # Fallback: find any path with appropriate top-level category
        if "identity" in pattern:
            candidates = [path for path in all_paths if path.startswith("profile.")]
        elif "preferences" in pattern:
            candidates = [path for path in all_paths if path.startswith("preferences.")]
        elif "skills" in pattern:
            candidates = [path for path in all_paths if "skill" in path.lower()]
        else:
            # For 'other' pattern, find any 'other' category
            candidates = [path for path in all_paths if path.endswith(".other")]

        return candidates[0] if candidates else None

    def _get_context_info(self, context: dict | None = None) -> str:
        """Format context information for classification."""
        if not context:
            return ""

        context_parts = []
        if "user_id" in context:
            context_parts.append(f"User: {context['user_id']}")
        if "session_id" in context:
            context_parts.append(f"Session: {context['session_id']}")
        if "timestamp" in context:
            context_parts.append(f"Time: {context['timestamp']}")
        if "conversation_topic" in context:
            context_parts.append(f"Topic: {context['conversation_topic']}")
        if "available_memory_paths" in context:
            paths = context["available_memory_paths"]
            if paths:
                context_parts.append("AVAILABLE STORED MEMORY PATHS:")
                context_parts.append(
                    "You should prioritize matching to these existing paths:"
                )
                for path in sorted(paths):
                    context_parts.append(f"  - {path}")
                context_parts.append(
                    "If the query relates to stored memories, try to match one of these paths."
                )

        if context_parts:
            return "CONTEXT:\n" + "\n".join(context_parts)
        return ""

    def _compute_cache_key(
        self, memory_content: str, context: dict | None = None
    ) -> str:
        """Compute a cache key for the classification."""
        content_hash = hashlib.sha256(memory_content.encode()).hexdigest()
        context_str = json.dumps(context, sort_keys=True) if context else ""
        context_hash = hashlib.sha256(context_str.encode()).hexdigest()
        return f"{content_hash}:{context_hash}"

    async def classify_async(
        self,
        memory_content: str,
        context: dict | None = None,
        use_cache: bool = True,
    ) -> ClassificationResult:
        """
        Classify memory content into taxonomy path asynchronously.

        Args:
            memory_content: The memory content to classify
            context: Optional context information
            use_cache: Whether to use cached results

        Returns:
            ClassificationResult with path and metadata
        """
        # Check cache
        if use_cache:
            cache_key = self._compute_cache_key(memory_content, context)
            if cache_key in self._cache:
                # logger.debug(f"Cache hit for classification: {cache_key}")
                pass
                return self._cache[cache_key]

        # Get iterative taxonomy hints to include in prompt
        classification_hints = ""
        if hasattr(self.taxonomy, "get_classification_hints"):
            hints = self.taxonomy.get_classification_hints(memory_content)
            if hints.get("suggested_paths") or hints.get("expansion_candidates"):
                classification_hints = "\nCLASSIFICATION HINTS:\n"
                if hints.get("suggested_paths"):
                    classification_hints += f"Similar content previously found in: {', '.join(hints['suggested_paths'][:3])}\n"
                if hints.get("expansion_candidates"):
                    candidates = [
                        f"{item['path']} ({item['item_count']} items)"
                        for item in hints["expansion_candidates"][:3]
                    ]
                    classification_hints += (
                        f"Paths ready for expansion: {', '.join(candidates)}\n"
                    )
                classification_hints += (
                    "Consider these hints when choosing the most appropriate path.\n"
                )

        # Prepare prompt
        prompt_vars = {
            "memory_content": memory_content,
            "context_info": self._get_context_info(context),
            "taxonomy_structure": self._get_taxonomy_structure_info(),
            "examples": self._get_classification_examples(),
            "classification_hints": classification_hints,
        }

        # Run classification
        try:
            if self.llm:
                # Use provided LLM
                prompt_text = self.classification_template.format(**prompt_vars)
                response = await self.llm.ainvoke(prompt_text)

                # Extract content from response
                if hasattr(response, "content"):
                    content = response.content
                elif isinstance(response, str):
                    content = response
                else:
                    content = str(response)

                # Clean up the response - handle markdown code blocks
                content = content.strip()
                if "```json" in content:
                    # Extract JSON from markdown code block
                    start = content.find("```json") + 7
                    end = content.find("```", start)
                    if end > start:
                        content = content[start:end].strip()
                elif "```" in content:
                    # Extract from generic code block
                    start = content.find("```") + 3
                    end = content.find("```", start)
                    if end > start:
                        content = content[start:end].strip()

                # Parse JSON
                result_dict = json.loads(content)
            else:
                # No LLM provided - must have one for production use
                raise ValueError(
                    "No LLM provided for classification. Cannot classify without language model."
                )

            result = ClassificationResult(**result_dict)

            # Get classification hints from iterative taxonomy before processing
            hints = None
            if hasattr(self.taxonomy, "get_classification_hints"):
                hints = self.taxonomy.get_classification_hints(memory_content)

                # Apply hints to improve classification
                if hints.get("suggested_paths"):
                    # If LLM suggested a path that matches a hint, boost confidence
                    if result.primary_path in hints["suggested_paths"]:
                        result.confidence = min(1.0, result.confidence + 0.1)

                    # If no good match but we have suggestions, consider the best suggestion
                    elif result.confidence < 0.6 and hints["suggested_paths"]:
                        best_suggestion = hints["suggested_paths"][0]
                        if self._is_valid_path(best_suggestion):
                            result.alternative_paths.insert(0, best_suggestion)
                            result.reasoning += (
                                f" (Hint: similar content found in {best_suggestion})"
                            )

            # Use advanced taxonomy logic if available
            if isinstance(self.taxonomy, AdvancedTaxonomyInterface):
                # Advanced taxonomy (e.g., DynamicTaxonomy) - use smart path selection
                selected_path, final_confidence = (
                    self.taxonomy.select_path_with_fallback(
                        classification_result=result,
                        memory_content=memory_content,
                        metadata=context.get("metadata") if context else None,
                    )
                )

                # Update result with advanced taxonomy's selection
                result.primary_path = selected_path
                result.confidence = final_confidence

            else:
                # Standard taxonomy - just validate paths
                if not self._is_valid_path(result.primary_path):
                    # Find closest valid path
                    result.primary_path = self._find_closest_valid_path(
                        result.primary_path
                    )

            # Track the classification in iterative taxonomy for learning
            if hasattr(self.taxonomy, "track_classification"):
                expansion_triggered = self.taxonomy.track_classification(
                    result.primary_path,
                    memory_content,
                    {
                        "confidence": result.confidence,
                        "reasoning": result.reasoning,
                        "alternatives": result.alternative_paths,
                        "hints_used": hints is not None,
                    },
                )

                if expansion_triggered:
                    # logger.info(
                    #     f"Triggered taxonomy expansion for path: {result.primary_path}"
                    # )
                    pass

            # Cache result
            if use_cache:
                self._cache[cache_key] = result

            return result

        except Exception as e:
            logger.error(f"Classification failed: {e}")
            # Return fallback classification
            return self._fallback_classification(memory_content)

    def classify(
        self,
        memory_content: str,
        context: dict | None = None,
        use_cache: bool = True,
    ) -> ClassificationResult:
        """
        Synchronous version of classify_async.
        """
        import asyncio

        return asyncio.run(self.classify_async(memory_content, context, use_cache))

    def _find_closest_valid_path(self, invalid_path: str) -> str:
        """Find the closest valid path in the taxonomy."""
        parts = invalid_path.split(".")

        # Try progressively shorter paths
        for i in range(len(parts), 0, -1):
            test_path = ".".join(parts[:i])
            if self._is_valid_path(test_path):
                return test_path

        # Fallback to configured fallback path, but validate it exists first
        if self._is_valid_path(self.fallback_path):
            return self.fallback_path

        # Ultimate fallback: find any valid path from the first category
        all_paths = self.taxonomy.get_all_paths()
        if all_paths:
            return all_paths[0]

        # Should never reach here if taxonomy is properly initialized
        raise RuntimeError("No valid paths found in taxonomy")

    def _fallback_classification(self, memory_content: str) -> ClassificationResult:
        """Provide a fallback classification when normal classification fails."""
        fallback_path = self._find_closest_valid_path(self.fallback_path)
        return ClassificationResult(
            primary_path=fallback_path,
            confidence=0.5,
            alternative_paths=[],
            reasoning="Fallback classification due to processing error",
        )

    def batch_classify(
        self, memories: list[str], context: dict | None = None
    ) -> list[ClassificationResult]:
        """
        Classify multiple memories in batch.

        Args:
            memories: List of memory contents to classify
            context: Optional shared context

        Returns:
            List of ClassificationResults
        """
        results = []
        for memory in memories:
            result = self.classify(memory, context)
            results.append(result)
        return results

    def get_statistics(self) -> dict:
        """Get classifier statistics."""
        # Get taxonomy path count using the interface
        try:
            path_count = len(self.taxonomy.get_all_paths())
        except Exception:
            path_count = 0

        return {
            "cache_size": len(self._cache),
            "taxonomy_paths": path_count,
            "taxonomy_type": type(self.taxonomy).__name__,
            "categories": len(list(TaxonomyCategory)),
        }

__init__

__init__(llm: Any | None = None, taxonomy: TaxonomyInterface | None = None, cache_size: int = DEFAULT_CACHE_SIZE, use_examples: bool = True, fallback_path: str | None = None)

Initialize the semantic classifier.

Parameters:

Name Type Description Default
llm Any | None

Language model for classification (optional, will use default)

None
taxonomy TaxonomyInterface | None

Taxonomy instance implementing TaxonomyInterface If None, uses default SemanticTaxonomy

None
cache_size int

Size of the classification cache

DEFAULT_CACHE_SIZE
use_examples bool

Whether to include examples in prompts

True
fallback_path str | None

Custom fallback path when classification fails

None
Source code in src/memoir/classifier/semantic.py
def __init__(
    self,
    llm: Any | None = None,
    taxonomy: TaxonomyInterface | None = None,
    cache_size: int = DEFAULT_CACHE_SIZE,
    use_examples: bool = True,
    fallback_path: str | None = None,
):
    """
    Initialize the semantic classifier.

    Args:
        llm: Language model for classification (optional, will use default)
        taxonomy: Taxonomy instance implementing TaxonomyInterface
                 If None, uses default SemanticTaxonomy
        cache_size: Size of the classification cache
        use_examples: Whether to include examples in prompts
        fallback_path: Custom fallback path when classification fails
    """
    self.taxonomy = taxonomy if taxonomy is not None else get_taxonomy()
    self.llm = llm
    self.use_examples = use_examples
    self.fallback_path = fallback_path or self._determine_fallback_path()
    self._cache = {}
    self._setup_classification_prompt()

classify_async async

classify_async(memory_content: str, context: dict | None = None, use_cache: bool = True) -> ClassificationResult

Classify memory content into taxonomy path asynchronously.

Parameters:

Name Type Description Default
memory_content str

The memory content to classify

required
context dict | None

Optional context information

None
use_cache bool

Whether to use cached results

True

Returns:

Type Description
ClassificationResult

ClassificationResult with path and metadata

Source code in src/memoir/classifier/semantic.py
async def classify_async(
    self,
    memory_content: str,
    context: dict | None = None,
    use_cache: bool = True,
) -> ClassificationResult:
    """
    Classify memory content into taxonomy path asynchronously.

    Args:
        memory_content: The memory content to classify
        context: Optional context information
        use_cache: Whether to use cached results

    Returns:
        ClassificationResult with path and metadata
    """
    # Check cache
    if use_cache:
        cache_key = self._compute_cache_key(memory_content, context)
        if cache_key in self._cache:
            # logger.debug(f"Cache hit for classification: {cache_key}")
            pass
            return self._cache[cache_key]

    # Get iterative taxonomy hints to include in prompt
    classification_hints = ""
    if hasattr(self.taxonomy, "get_classification_hints"):
        hints = self.taxonomy.get_classification_hints(memory_content)
        if hints.get("suggested_paths") or hints.get("expansion_candidates"):
            classification_hints = "\nCLASSIFICATION HINTS:\n"
            if hints.get("suggested_paths"):
                classification_hints += f"Similar content previously found in: {', '.join(hints['suggested_paths'][:3])}\n"
            if hints.get("expansion_candidates"):
                candidates = [
                    f"{item['path']} ({item['item_count']} items)"
                    for item in hints["expansion_candidates"][:3]
                ]
                classification_hints += (
                    f"Paths ready for expansion: {', '.join(candidates)}\n"
                )
            classification_hints += (
                "Consider these hints when choosing the most appropriate path.\n"
            )

    # Prepare prompt
    prompt_vars = {
        "memory_content": memory_content,
        "context_info": self._get_context_info(context),
        "taxonomy_structure": self._get_taxonomy_structure_info(),
        "examples": self._get_classification_examples(),
        "classification_hints": classification_hints,
    }

    # Run classification
    try:
        if self.llm:
            # Use provided LLM
            prompt_text = self.classification_template.format(**prompt_vars)
            response = await self.llm.ainvoke(prompt_text)

            # Extract content from response
            if hasattr(response, "content"):
                content = response.content
            elif isinstance(response, str):
                content = response
            else:
                content = str(response)

            # Clean up the response - handle markdown code blocks
            content = content.strip()
            if "```json" in content:
                # Extract JSON from markdown code block
                start = content.find("```json") + 7
                end = content.find("```", start)
                if end > start:
                    content = content[start:end].strip()
            elif "```" in content:
                # Extract from generic code block
                start = content.find("```") + 3
                end = content.find("```", start)
                if end > start:
                    content = content[start:end].strip()

            # Parse JSON
            result_dict = json.loads(content)
        else:
            # No LLM provided - must have one for production use
            raise ValueError(
                "No LLM provided for classification. Cannot classify without language model."
            )

        result = ClassificationResult(**result_dict)

        # Get classification hints from iterative taxonomy before processing
        hints = None
        if hasattr(self.taxonomy, "get_classification_hints"):
            hints = self.taxonomy.get_classification_hints(memory_content)

            # Apply hints to improve classification
            if hints.get("suggested_paths"):
                # If LLM suggested a path that matches a hint, boost confidence
                if result.primary_path in hints["suggested_paths"]:
                    result.confidence = min(1.0, result.confidence + 0.1)

                # If no good match but we have suggestions, consider the best suggestion
                elif result.confidence < 0.6 and hints["suggested_paths"]:
                    best_suggestion = hints["suggested_paths"][0]
                    if self._is_valid_path(best_suggestion):
                        result.alternative_paths.insert(0, best_suggestion)
                        result.reasoning += (
                            f" (Hint: similar content found in {best_suggestion})"
                        )

        # Use advanced taxonomy logic if available
        if isinstance(self.taxonomy, AdvancedTaxonomyInterface):
            # Advanced taxonomy (e.g., DynamicTaxonomy) - use smart path selection
            selected_path, final_confidence = (
                self.taxonomy.select_path_with_fallback(
                    classification_result=result,
                    memory_content=memory_content,
                    metadata=context.get("metadata") if context else None,
                )
            )

            # Update result with advanced taxonomy's selection
            result.primary_path = selected_path
            result.confidence = final_confidence

        else:
            # Standard taxonomy - just validate paths
            if not self._is_valid_path(result.primary_path):
                # Find closest valid path
                result.primary_path = self._find_closest_valid_path(
                    result.primary_path
                )

        # Track the classification in iterative taxonomy for learning
        if hasattr(self.taxonomy, "track_classification"):
            expansion_triggered = self.taxonomy.track_classification(
                result.primary_path,
                memory_content,
                {
                    "confidence": result.confidence,
                    "reasoning": result.reasoning,
                    "alternatives": result.alternative_paths,
                    "hints_used": hints is not None,
                },
            )

            if expansion_triggered:
                # logger.info(
                #     f"Triggered taxonomy expansion for path: {result.primary_path}"
                # )
                pass

        # Cache result
        if use_cache:
            self._cache[cache_key] = result

        return result

    except Exception as e:
        logger.error(f"Classification failed: {e}")
        # Return fallback classification
        return self._fallback_classification(memory_content)

classify

classify(memory_content: str, context: dict | None = None, use_cache: bool = True) -> ClassificationResult

Synchronous version of classify_async.

Source code in src/memoir/classifier/semantic.py
def classify(
    self,
    memory_content: str,
    context: dict | None = None,
    use_cache: bool = True,
) -> ClassificationResult:
    """
    Synchronous version of classify_async.
    """
    import asyncio

    return asyncio.run(self.classify_async(memory_content, context, use_cache))

batch_classify

batch_classify(memories: list[str], context: dict | None = None) -> list[ClassificationResult]

Classify multiple memories in batch.

Parameters:

Name Type Description Default
memories list[str]

List of memory contents to classify

required
context dict | None

Optional shared context

None

Returns:

Type Description
list[ClassificationResult]

List of ClassificationResults

Source code in src/memoir/classifier/semantic.py
def batch_classify(
    self, memories: list[str], context: dict | None = None
) -> list[ClassificationResult]:
    """
    Classify multiple memories in batch.

    Args:
        memories: List of memory contents to classify
        context: Optional shared context

    Returns:
        List of ClassificationResults
    """
    results = []
    for memory in memories:
        result = self.classify(memory, context)
        results.append(result)
    return results

get_statistics

get_statistics() -> dict

Get classifier statistics.

Source code in src/memoir/classifier/semantic.py
def get_statistics(self) -> dict:
    """Get classifier statistics."""
    # Get taxonomy path count using the interface
    try:
        path_count = len(self.taxonomy.get_all_paths())
    except Exception:
        path_count = 0

    return {
        "cache_size": len(self._cache),
        "taxonomy_paths": path_count,
        "taxonomy_type": type(self.taxonomy).__name__,
        "categories": len(list(TaxonomyCategory)),
    }

memoir.classifier.base module

memoir.classifier.base

Base interfaces and protocols for taxonomy systems.

TaxonomyInterface

Bases: Protocol

Protocol defining the interface that all taxonomy implementations must support.

This allows SemanticClassifier to work with any taxonomy type without using hasattr() checks or duck typing.

Source code in src/memoir/classifier/base.py
@runtime_checkable
class TaxonomyInterface(Protocol):
    """
    Protocol defining the interface that all taxonomy implementations must support.

    This allows SemanticClassifier to work with any taxonomy type without
    using hasattr() checks or duck typing.
    """

    def is_valid_path(self, path: str) -> bool:
        """Check if a taxonomy path is valid."""
        ...

    def get_all_paths(self) -> list[str]:
        """Get all available taxonomy paths."""
        ...

is_valid_path

is_valid_path(path: str) -> bool

Check if a taxonomy path is valid.

Source code in src/memoir/classifier/base.py
def is_valid_path(self, path: str) -> bool:
    """Check if a taxonomy path is valid."""
    ...

get_all_paths

get_all_paths() -> list[str]

Get all available taxonomy paths.

Source code in src/memoir/classifier/base.py
def get_all_paths(self) -> list[str]:
    """Get all available taxonomy paths."""
    ...

AdvancedTaxonomyInterface

Bases: TaxonomyInterface, Protocol

Extended interface for advanced taxonomy implementations like DynamicTaxonomy.

Includes features like fallback logic, expansion tracking, and confidence-based path selection.

Source code in src/memoir/classifier/base.py
@runtime_checkable
class AdvancedTaxonomyInterface(TaxonomyInterface, Protocol):
    """
    Extended interface for advanced taxonomy implementations like DynamicTaxonomy.

    Includes features like fallback logic, expansion tracking, and confidence-based
    path selection.
    """

    def select_path_with_fallback(
        self,
        classification_result: Any,
        memory_content: str,
        metadata: dict | None = None,
    ) -> tuple[str, float]:
        """
        Select taxonomy path with intelligent fallback logic.

        Args:
            classification_result: Result from classification
            memory_content: Original memory content
            metadata: Optional metadata

        Returns:
            Tuple of (selected_path, final_confidence)
        """
        ...

select_path_with_fallback

select_path_with_fallback(classification_result: Any, memory_content: str, metadata: dict | None = None) -> tuple[str, float]

Select taxonomy path with intelligent fallback logic.

Parameters:

Name Type Description Default
classification_result Any

Result from classification

required
memory_content str

Original memory content

required
metadata dict | None

Optional metadata

None

Returns:

Type Description
tuple[str, float]

Tuple of (selected_path, final_confidence)

Source code in src/memoir/classifier/base.py
def select_path_with_fallback(
    self,
    classification_result: Any,
    memory_content: str,
    metadata: dict | None = None,
) -> tuple[str, float]:
    """
    Select taxonomy path with intelligent fallback logic.

    Args:
        classification_result: Result from classification
        memory_content: Original memory content
        metadata: Optional metadata

    Returns:
        Tuple of (selected_path, final_confidence)
    """
    ...

BaseTaxonomy

Bases: ABC

Abstract base class for taxonomy implementations. Provides common functionality and enforces the interface.

Source code in src/memoir/classifier/base.py
class BaseTaxonomy(ABC):
    """
    Abstract base class for taxonomy implementations.
    Provides common functionality and enforces the interface.
    """

    @abstractmethod
    def is_valid_path(self, path: str) -> bool:
        """Check if a taxonomy path is valid."""
        pass

    @abstractmethod
    def get_all_paths(self) -> list[str]:
        """Get all available taxonomy paths."""
        pass

    def get_statistics(self) -> dict[str, Any]:
        """Get taxonomy statistics. Override in subclasses for specific stats."""
        return {
            "total_paths": len(self.get_all_paths()),
            "type": self.__class__.__name__,
        }

is_valid_path abstractmethod

is_valid_path(path: str) -> bool

Check if a taxonomy path is valid.

Source code in src/memoir/classifier/base.py
@abstractmethod
def is_valid_path(self, path: str) -> bool:
    """Check if a taxonomy path is valid."""
    pass

get_all_paths abstractmethod

get_all_paths() -> list[str]

Get all available taxonomy paths.

Source code in src/memoir/classifier/base.py
@abstractmethod
def get_all_paths(self) -> list[str]:
    """Get all available taxonomy paths."""
    pass

get_statistics

get_statistics() -> dict[str, Any]

Get taxonomy statistics. Override in subclasses for specific stats.

Source code in src/memoir/classifier/base.py
def get_statistics(self) -> dict[str, Any]:
    """Get taxonomy statistics. Override in subclasses for specific stats."""
    return {
        "total_paths": len(self.get_all_paths()),
        "type": self.__class__.__name__,
    }