/* DFX SQL Playbook — FXPedia Content Scrub v1.0 Scope: WordPress wp_posts.post_content (FXPedia ~6,000 items) Engine: MySQL 8.0+ (REGEXP_REPLACE available) or MariaDB (fallback blocks provided) Safety: ALWAYS run the SELECT previews first. Take a snapshot backup before UPDATEs. */ /* ========================================================== 0) QUICK BACKUP (minimal, content only) ========================================================== */ -- Full table snapshot (fast, structure-clone + data) CREATE TABLE IF NOT EXISTS wp_posts_backup_yyyyMMdd AS SELECT * FROM wp_posts; -- Optional: Only glossary posts backup (smaller) CREATE TABLE IF NOT EXISTS wp_posts_glossary_bak_yyyyMMdd AS SELECT * FROM wp_posts WHERE post_type='glossary'; /* ========================================================== 1) PREVIEW FILTER — Target just FXPedia/glossary Use this WHERE template in all UPDATEs to avoid touching other post types. ========================================================== */ -- Preview affected rows SELECT ID, post_title FROM wp_posts WHERE post_type='glossary' AND post_status IN ('publish','pending','draft'); /* ========================================================== 2) CASE A — Remove an EXACT sentence/phrase everywhere Example: remove a known boilerplate sentence ========================================================== */ -- PREVIEW SELECT ID, REPLACE(post_content, 'This content was generated by DFX Robot.', '') AS preview FROM wp_posts WHERE post_type='glossary' AND post_content LIKE '%This content was generated by DFX Robot.%' LIMIT 50; -- APPLY UPDATE wp_posts SET post_content = REPLACE(post_content, 'This content was generated by DFX Robot.', '') WHERE post_type='glossary' AND post_content LIKE '%This content was generated by DFX Robot.%'; /* ========================================================== 3) CASE B — Replace a phrase with a new one Example: old marker -> new marker ========================================================== */ -- PREVIEW SELECT ID, REPLACE(post_content, 'See Also:', 'Related:') AS preview FROM wp_posts WHERE post_type='glossary' AND post_content LIKE '%See Also:%' LIMIT 50; -- APPLY UPDATE wp_posts SET post_content = REPLACE(post_content, 'See Also:', 'Related:') WHERE post_type='glossary' AND post_content LIKE '%See Also:%'; /* ========================================================== 4) CASE C — Delete everything AFTER a marker (keep marker or remove it) Example marker: '## References' ========================================================== */ -- PREVIEW (keep marker) SELECT ID, CONCAT(SUBSTRING_INDEX(post_content, '## References', 1), '## References') AS preview FROM wp_posts WHERE post_type='glossary' AND post_content LIKE '%## References%' LIMIT 50; -- APPLY (keep marker) UPDATE wp_posts SET post_content = CONCAT(SUBSTRING_INDEX(post_content, '## References', 1), '## References') WHERE post_type='glossary' AND post_content LIKE '%## References%'; -- APPLY (remove marker too) UPDATE wp_posts SET post_content = SUBSTRING_INDEX(post_content, '## References', 1) WHERE post_type='glossary' AND post_content LIKE '%## References%'; /* ========================================================== 5) CASE D — Remove a whole SECTION by start & end markers Example: remove from '' to '' MySQL 8.0+: Use REGEXP_REPLACE; MariaDB fallback below. ========================================================== */ -- PREVIEW (MySQL 8.0+) SELECT ID, REGEXP_REPLACE( post_content, '(?s).*?', '' ) AS preview FROM wp_posts WHERE post_type='glossary' AND post_content REGEXP ''; -- APPLY (MySQL 8.0+) UPDATE wp_posts SET post_content = REGEXP_REPLACE( post_content, '(?s).*?', '' ) WHERE post_type='glossary' AND post_content REGEXP ''; /* MariaDB Fallback (no REGEXP_REPLACE) — two-step using LOCATE */ -- TEMP PREVIEW: show ranges found SELECT ID, LOCATE('', post_content) AS s, LOCATE('', post_content) AS e FROM wp_posts WHERE post_type='glossary' AND post_content LIKE '%%' AND post_content LIKE '%%' LIMIT 50; -- APPLY (MariaDB safe delete when both markers exist) UPDATE wp_posts SET post_content = CONCAT( LEFT(post_content, LOCATE('', post_content) - 1), SUBSTRING(post_content, LOCATE('', post_content) + LENGTH('')) ) WHERE post_type='glossary' AND LOCATE('', post_content) > 0 AND LOCATE('', post_content) > LOCATE('', post_content); /* ========================================================== 6) CASE E — Remove any line that starts with a label (e.g., 'See also', 'Further Reading') MySQL 8.0+ solution with REGEXP_REPLACE; MariaDB fallback with SUBSTRING-based splits. ========================================================== */ -- PREVIEW (MySQL 8.0+): drop lines that begin with label (case-insensitive) SELECT ID, REGEXP_REPLACE( post_content, '(?mi)^\s*(See also|Further Reading)\s*:?.*$\n?', '' ) AS preview FROM wp_posts WHERE post_type='glossary' AND post_content REGEXP '(?i)^\\s*(See also|Further Reading)'; -- APPLY (MySQL 8.0+) UPDATE wp_posts SET post_content = REGEXP_REPLACE( post_content, '(?mi)^\s*(See also|Further Reading)\s*:?.*$\n?', '' ) WHERE post_type='glossary' AND post_content REGEXP '(?i)^\\s*(See also|Further Reading)'; /* MariaDB fallback idea: target specific exact phrases per REPLACE (repeat per label) */ -- APPLY (MariaDB simple) UPDATE wp_posts SET post_content = REPLACE(post_content, 'See also:', '') WHERE post_type='glossary' AND post_content LIKE 'See also:%'; /* ========================================================== 7) CASE F — Normalize extra whitespace after removals ========================================================== */ -- Multiple blank lines -> single blank line (MySQL 8.0+) UPDATE wp_posts SET post_content = REGEXP_REPLACE(post_content, '(\n\s*){3,}', '\n\n') WHERE post_type='glossary'; -- Trim leading/trailing spaces per line (MySQL 8.0+) UPDATE wp_posts SET post_content = REGEXP_REPLACE(post_content, '(?m)^\s+|\s+$', '') WHERE post_type='glossary'; /* ========================================================== 8) CASE G — Remove everything AFTER Nth occurrence of a marker Example: keep content up to 1st '---' horizontal rule; delete rest ========================================================== */ -- PREVIEW (first occurrence) SELECT ID, LEFT(post_content, LOCATE('---', post_content) - 1) AS preview FROM wp_posts WHERE post_type='glossary' AND LOCATE('---', post_content) > 0 LIMIT 50; -- APPLY (first occurrence) UPDATE wp_posts SET post_content = LEFT(post_content, LOCATE('---', post_content) - 1) WHERE post_type='glossary' AND LOCATE('---', post_content) > 0; /* ========================================================== 9) BATCHING — Process in chunks to reduce lock time ========================================================== */ -- Example: update 500 rows per batch (repeat until 0 rows affected) UPDATE wp_posts SET post_content = REPLACE(post_content, 'OldBot v1', 'OldBot') WHERE post_type='glossary' AND post_content LIKE '%OldBot v1%' ORDER BY ID LIMIT 500; /* ========================================================== 10) AUDIT — Count how many posts still contain a trace ========================================================== */ SELECT COUNT(*) AS remaining FROM wp_posts WHERE post_type='glossary' AND post_content LIKE '%ROBOT TRACE%'; /* ========================================================== 11) ROLLBACK — Single post or pattern ========================================================== */ -- Single post rollback from backup UPDATE wp_posts p JOIN wp_posts_backup_yyyyMMdd b ON b.ID = p.ID SET p.post_content = b.post_content WHERE p.ID = 12345; -- Pattern rollback (dangerous; test first) UPDATE wp_posts p JOIN wp_posts_backup_yyyyMMdd b ON b.ID = p.ID SET p.post_content = b.post_content WHERE p.post_type='glossary' AND p.post_content NOT LIKE '%SOME_VERIFICATION_AFTER_EDIT%'; /* ========================================================== 12) COMMON ROBOT TRACE PATTERNS (fill & run one by one) ========================================================== */ -- A) Remove trailing generator badge UPDATE wp_posts SET post_content = REGEXP_REPLACE(post_content, '\\s*\\[Generated by DFX\\]\\s*$', '') WHERE post_type='glossary' AND post_content REGEXP '\\[Generated by DFX\\]\\s*$'; -- B) Remove HTML comments UPDATE wp_posts SET post_content = REGEXP_REPLACE(post_content, '(?s)', '') WHERE post_type='glossary' AND post_content LIKE '%