← Back to Library Pop-Up Tools
Weeding Screener (weeding_screener.py)
This tool takes a circulation/export CSV and adds a weeding_candidate flag based on simple rules
(years since last checkout, years since publication, total checkouts).
How It Works (In Plain Language)
- Finds key columns by looking for words like "year", "pub", "last", and "total checkouts" in the column names.
- Reads each row and tries to understand: how long ago it was last checked out, how old it is, and how many times it has been checked out.
- Uses simple rules at the top of the file (for example "5+ years since last checkout AND 10+ years old AND very few checkouts"). You can edit those numbers.
- If at least two conditions are true, it marks
weeding_candidate = "yes"for that row. - Saves a new CSV with all your original columns plus the
weeding_candidatecolumn so you can sort/filter in Excel.
How to Use
- Place your circ/export CSV (for example
circ_data.csv) in this folder. - Open Terminal and run:
cd ~/Desktop/library_pop_up_tools - Then run:
python weeding_screener.py circ_data.csv weeding_candidates.csv - Open
weeding_candidates.csvand look forweeding_candidate=yes.
library_pop_up_tools % python weeding_screener.py circ_data.csv weeding_candidates.csv
Weeding candidates written to: weeding_candidates.csv
Weeding candidates written to: weeding_candidates.csv
Optional: Adjust the Rules
You can use this tool without changing anything. At the top of the script there are a few numbers you can edit to match your local policy (for example how many years since last checkout).
MAX_YEARS_SINCE_LAST_CHECKOUT = 5
MAX_YEARS_SINCE_PUBLICATION = 10
MIN_TOTAL_CHECKOUTS = 0
If you want to be stricter or looser, open weeding_screener.py, change these numbers, save, and run it again.
Full Python Source (Optional)
Click to show the full script
#!/usr/bin/env python3
"""
weeding_screener.py
Pop-up tool to flag potential weeding candidates from a circulation export (CSV).
You can adjust the defaults at the top of this file to match your local policy.
Example:
python weeding_screener.py circ_export.csv weeding_candidates.csv
"""
import csv
import sys
from datetime import datetime
from pathlib import Path
MAX_YEARS_SINCE_LAST_CHECKOUT = 5
MAX_YEARS_SINCE_PUBLICATION = 10
MIN_TOTAL_CHECKOUTS = 0
DATE_FORMATS = [
"%Y-%m-%d",
"%m/%d/%Y",
"%Y/%m/%d",
]
def parse_year(value: str) -> int | None:
if not value:
return None
digits = "".join(ch for ch in value if ch.isdigit())
if len(digits) >= 4:
try:
return int(digits[:4])
except ValueError:
return None
return None
def parse_date(value: str) -> datetime | None:
if not value:
return None
value = value.strip()
for fmt in DATE_FORMATS:
try:
return datetime.strptime(value, fmt)
except ValueError:
continue
return None
def years_between(start: datetime, end: datetime) -> float:
return (end - start).days / 365.25
def should_flag(row: dict, today: datetime) -> bool:
last_checkout_keys = [k for k in row if "last" in k.lower() and "check" in k.lower()]
pub_year_keys = [k for k in row if "year" in k.lower() or "pub" in k.lower()]
total_chk_keys = [k for k in row if "total" in k.lower() and "check" in k.lower()]
last_checkout_years = None
pub_years = None
total_checkouts = None
for k in last_checkout_keys:
d = parse_date(row.get(k, ""))
if d:
last_checkout_years = years_between(d, today)
break
for k in pub_year_keys:
y = parse_year(row.get(k, ""))
if y:
pub_years = years_between(datetime(y, 1, 1), today)
break
for k in total_chk_keys:
try:
total_checkouts = int(row.get(k, "").strip() or "0")
break
except ValueError:
continue
conditions = []
if last_checkout_years is not None:
conditions.append(last_checkout_years >= MAX_YEARS_SINCE_LAST_CHECKOUT)
if pub_years is not None:
conditions.append(pub_years >= MAX_YEARS_SINCE_PUBLICATION)
if total_checkouts is not None:
conditions.append(total_checkouts <= MIN_TOTAL_CHECKOUTS)
known_true = [c for c in conditions if c is not None and c]
return len(known_true) >= 2
def screen_weeding(input_path: Path, output_path: Path) -> None:
today = datetime.today()
with input_path.open(newline="", encoding="utf-8-sig") as infile, output_path.open(
"w", newline="", encoding="utf-8"
) as outfile:
reader = csv.DictReader(infile)
fieldnames = list(reader.fieldnames or [])
if "weeding_candidate" not in fieldnames:
fieldnames.append("weeding_candidate")
writer = csv.DictWriter(outfile, fieldnames=fieldnames)
writer.writeheader()
for row in reader:
for key in row:
if row[key] is None:
row[key] = ""
row["weeding_candidate"] = "yes" if should_flag(row, today) else ""
writer.writerow(row)
def main(argv: list[str]) -> int:
if len(argv) != 3:
print("Usage: python weeding_screener.py circ_export.csv weeding_candidates.csv")
return 1
input_path = Path(argv[1]).expanduser()
output_path = Path(argv[2]).expanduser()
if not input_path.exists():
print(f\'Input file not found: {input_path}")
return 1
screen_weeding(input_path, output_path)
print(f\'Weeding candidates written to: {output_path}")
return 0
if __name__ == "__main__":
raise SystemExit(main(sys.argv))
#!/usr/bin/env python3
"""
weeding_screener.py
Pop-up tool to flag potential weeding candidates from a circulation export (CSV).
You can adjust the defaults at the top of this file to match your local policy.
Example:
python weeding_screener.py circ_export.csv weeding_candidates.csv
"""
import csv
import sys
from datetime import datetime
from pathlib import Path
MAX_YEARS_SINCE_LAST_CHECKOUT = 5
MAX_YEARS_SINCE_PUBLICATION = 10
MIN_TOTAL_CHECKOUTS = 0
DATE_FORMATS = [
"%Y-%m-%d",
"%m/%d/%Y",
"%Y/%m/%d",
]
def parse_year(value: str) -> int | None:
if not value:
return None
digits = "".join(ch for ch in value if ch.isdigit())
if len(digits) >= 4:
try:
return int(digits[:4])
except ValueError:
return None
return None
def parse_date(value: str) -> datetime | None:
if not value:
return None
value = value.strip()
for fmt in DATE_FORMATS:
try:
return datetime.strptime(value, fmt)
except ValueError:
continue
return None
def years_between(start: datetime, end: datetime) -> float:
return (end - start).days / 365.25
def should_flag(row: dict, today: datetime) -> bool:
last_checkout_keys = [k for k in row if "last" in k.lower() and "check" in k.lower()]
pub_year_keys = [k for k in row if "year" in k.lower() or "pub" in k.lower()]
total_chk_keys = [k for k in row if "total" in k.lower() and "check" in k.lower()]
last_checkout_years = None
pub_years = None
total_checkouts = None
for k in last_checkout_keys:
d = parse_date(row.get(k, ""))
if d:
last_checkout_years = years_between(d, today)
break
for k in pub_year_keys:
y = parse_year(row.get(k, ""))
if y:
pub_years = years_between(datetime(y, 1, 1), today)
break
for k in total_chk_keys:
try:
total_checkouts = int(row.get(k, "").strip() or "0")
break
except ValueError:
continue
conditions = []
if last_checkout_years is not None:
conditions.append(last_checkout_years >= MAX_YEARS_SINCE_LAST_CHECKOUT)
if pub_years is not None:
conditions.append(pub_years >= MAX_YEARS_SINCE_PUBLICATION)
if total_checkouts is not None:
conditions.append(total_checkouts <= MIN_TOTAL_CHECKOUTS)
known_true = [c for c in conditions if c is not None and c]
return len(known_true) >= 2
def screen_weeding(input_path: Path, output_path: Path) -> None:
today = datetime.today()
with input_path.open(newline="", encoding="utf-8-sig") as infile, output_path.open(
"w", newline="", encoding="utf-8"
) as outfile:
reader = csv.DictReader(infile)
fieldnames = list(reader.fieldnames or [])
if "weeding_candidate" not in fieldnames:
fieldnames.append("weeding_candidate")
writer = csv.DictWriter(outfile, fieldnames=fieldnames)
writer.writeheader()
for row in reader:
for key in row:
if row[key] is None:
row[key] = ""
row["weeding_candidate"] = "yes" if should_flag(row, today) else ""
writer.writerow(row)
def main(argv: list[str]) -> int:
if len(argv) != 3:
print("Usage: python weeding_screener.py circ_export.csv weeding_candidates.csv")
return 1
input_path = Path(argv[1]).expanduser()
output_path = Path(argv[2]).expanduser()
if not input_path.exists():
print(f\'Input file not found: {input_path}")
return 1
screen_weeding(input_path, output_path)
print(f\'Weeding candidates written to: {output_path}")
return 0
if __name__ == "__main__":
raise SystemExit(main(sys.argv))