text_length
word_count
line_count
non_empty_lines
avg_line_len
short_lines
long_lines
very_long_lines
blow_count_matches
sample_type_matches
soil_hits
uscs_matches
unique_uscs
depth_matches
has_recovery
has_continuation
has_title
has_title_at_top
hard_disqualifier_hit
soft_disqualifier_hit
is_appendix_divider
distinct_boring_ids
has_boring_id
drawing_count
di_has_tables
numeric_tokens
numeric_ratio
water_level_mentions
elevation_mentions
has_page_of
foreign_word_count
photo_mentions
lab_test_mentions
cross_section_signal
has_sheet_of
has_firm_name
european_decimals
non_ascii_ratio
safety_factor_mentions
coordinate_pairs
calc_keywords
has_file_path
scientific_notation
section_numbers
paragraphs
words_per_line
has_project_label
content_density
toc_header
page_ref_lines
letter_signals
has_date_line
has_prepared_by
has_langan_header
location_plan_signal
appendix_label
is_short_page
infiltration_signal
test_pit_signal
astm_count
has_lab_header
sieve_data
moisture_data
lab_vocab_hits
has_calc_header
structural_keywords
calc_vocab_hits
scale_mentions
has_reference_list
page_position
page_number_raw
total_pages
is_first_10pct
is_last_25pct
tfidf_the
tfidf_appendix
tfidf_pm_pm
tfidf_photo
tfidf_test_pit
tfidf_rate_in
tfidf_pit
tfidf_date_taken
tfidf_photo_number
tfidf_hour
tfidf_taken_location
tfidf_el_offset
tfidf_drilling
tfidf_mpa
tfidf_be
tfidf_should
tfidf_brown
tfidf_scale_in
tfidf_taken
tfidf_comments
tfidf_cross
tfidf_cross_section
tfidf_ss
tfidf_number_date
tfidf_location_test
tfidf_water_ft
tfidf_st
tfidf_offset
tfidf_tp_tp
tfidf_should_be
tfidf_casing
tfidf_tp_comments
tfidf_and
tfidf_photograph
tfidf_rate
tfidf_trace
tfidf_tp
tfidf_am_am
tfidf_bw
tfidf_in_feet
tfidf_log_page
tfidf_moist
tfidf_kn
tfidf_pile
tfidf_log
tfidf_drop_shaft
tfidf_ncc
tfidf_of_the
tfidf_pit_log
tfidf_pit_at
tfidf_elevation_feet
tfidf_rec_cm
tfidf_map
tfidf_infiltration_rate
tfidf_logistics_center
tfidf_logistics
tfidf_of_boring
tfidf_lpfe
tfidf_to
tfidf_fluid
