Impresso PyCommons
Contents:
Input/Output
Text Rebuild
Utilities
Image handling
Data Versioning
Impresso PyCommons
Index
Index
A
|
B
|
C
|
D
|
E
|
F
|
G
|
H
|
I
|
J
|
L
|
M
|
N
|
O
|
P
|
R
|
S
|
T
|
U
|
V
|
W
A
add_by_ci_id() (impresso_commons.versioning.data_manifest.DataManifest method)
add_by_title_year() (impresso_commons.versioning.data_manifest.DataManifest method)
add_count_list_by_title_year() (impresso_commons.versioning.data_manifest.DataManifest method)
add_counts() (impresso_commons.versioning.data_statistics.DataStatistics method)
agg() (in module impresso_commons.versioning.helpers)
aggregate_stats_for_title() (impresso_commons.versioning.data_manifest.DataManifest method)
alternative_read_text() (in module impresso_commons.utils.s3)
append_to_notes() (impresso_commons.versioning.data_manifest.DataManifest method)
B
Base (class in impresso_commons.utils.config_loader)
BoxStrategy (class in impresso_commons.images.img_utils)
bytes_to() (in module impresso_commons.utils.utils)
C
CANONICAL (impresso_commons.versioning.helpers.DataStage attribute)
canonical_path() (in module impresso_commons.path.path_fs)
check_bucket() (impresso_commons.utils.config_loader.Base method)
check_filenaming() (in module impresso_commons.path.path_fs)
check_params() (impresso_commons.utils.config_loader.Base method)
chunk() (in module impresso_commons.utils.utils)
(in module impresso_commons.versioning.helpers)
cleanup() (in module impresso_commons.text.rebuilder)
clone_git_repo() (in module impresso_commons.versioning.helpers)
compose() (in module impresso_commons.images.img_utils)
compress() (in module impresso_commons.text.rebuilder)
compute() (impresso_commons.versioning.data_manifest.DataManifest method)
compute_box() (in module impresso_commons.images.olive_boxes)
compute_image_links() (in module impresso_commons.utils.uima)
compute_scale_factor() (in module impresso_commons.images.olive_boxes)
compute_stats_for_stage() (in module impresso_commons.versioning.compute_manifest)
compute_stats_in_canonical_bag() (in module impresso_commons.versioning.helpers)
compute_stats_in_entities_bag() (in module impresso_commons.versioning.helpers)
compute_stats_in_langident_bag() (in module impresso_commons.versioning.helpers)
compute_stats_in_rebuilt_bag() (in module impresso_commons.versioning.helpers)
compute_stats_in_solr_text_bag() (in module impresso_commons.versioning.helpers)
ContentItem (in module impresso_commons.path.path_fs)
convert_box() (in module impresso_commons.images.olive_boxes)
count_keys (impresso_commons.versioning.data_statistics.DataStatistics attribute)
(impresso_commons.versioning.data_statistics.NewspaperStatistics attribute)
counts (impresso_commons.versioning.data_statistics.DataStatistics attribute)
(impresso_commons.versioning.data_statistics.NewspaperStatistics attribute)
counts_for_canonical_issue() (in module impresso_commons.versioning.helpers)
counts_for_rebuilt() (in module impresso_commons.versioning.helpers)
create_even_partitions() (in module impresso_commons.utils.daskutils)
create_manifest() (in module impresso_commons.versioning.compute_manifest)
D
DataManifest (class in impresso_commons.versioning.data_manifest)
DataStage (class in impresso_commons.versioning.helpers)
DataStatistics (class in impresso_commons.versioning.data_statistics)
date (impresso_commons.path.path_fs.IssueDir attribute)
define_update_info_for_title() (impresso_commons.versioning.data_manifest.DataManifest method)
detect_canonical_issues() (in module impresso_commons.path.path_fs)
detect_issues() (in module impresso_commons.path.path_fs)
detect_journal_issues() (in module impresso_commons.path.path_fs)
E
edition (impresso_commons.path.path_fs.IssueDir attribute)
element (impresso_commons.versioning.data_statistics.DataStatistics attribute)
(impresso_commons.versioning.data_statistics.NewspaperStatistics attribute)
EMBEDDINGS (impresso_commons.versioning.helpers.DataStage attribute)
ENTITIES (impresso_commons.versioning.helpers.DataStage attribute)
EVENIZED (impresso_commons.versioning.helpers.DataStage attribute)
extract_np_key() (in module impresso_commons.versioning.compute_manifest)
extract_version() (in module impresso_commons.versioning.helpers)
F
fetch_files() (in module impresso_commons.path.path_s3)
filter_new_or_modified_media() (in module impresso_commons.versioning.helpers)
finalize() (in module impresso_commons.versioning.helpers)
find_s3_data_manifest_path() (in module impresso_commons.versioning.helpers)
fixed_s3fs_glob() (in module impresso_commons.utils.s3)
from_json() (impresso_commons.utils.config_loader.Base class method)
G
generate_media_dict() (impresso_commons.versioning.data_manifest.DataManifest method)
get_boto3_bucket() (in module impresso_commons.utils.s3)
get_bucket() (in module impresso_commons.utils.s3)
get_bucket_boto3() (in module impresso_commons.utils.s3)
get_count_keys() (impresso_commons.versioning.data_manifest.DataManifest method)
get_files_to_consider() (in module impresso_commons.versioning.compute_manifest)
get_head_commit_url() (in module impresso_commons.versioning.helpers)
get_iiif_and_coords() (in module impresso_commons.text.helpers)
get_iiif_links() (in module impresso_commons.utils.uima)
get_iiif_url() (in module impresso_commons.images.olive_boxes)
get_img_from_archive() (in module impresso_commons.images.img_utils)
get_imgdimensions() (in module impresso_commons.images.img_utils)
get_issueshortpath() (in module impresso_commons.path.path_fs)
get_jpg() (in module impresso_commons.images.img_utils)
get_list_intersection() (in module impresso_commons.utils.utils)
get_media_item_years() (in module impresso_commons.versioning.helpers)
get_media_titles() (in module impresso_commons.versioning.helpers)
get_or_create_bucket() (in module impresso_commons.utils.s3)
get_page_folders() (in module impresso_commons.images.img_utils)
get_pkg_resource() (in module impresso_commons.utils.utils)
get_png() (in module impresso_commons.images.img_utils)
get_s3_client() (in module impresso_commons.utils.s3)
get_s3_connection() (in module impresso_commons.utils.s3)
get_s3_object_size() (in module impresso_commons.utils.s3)
get_s3_resource() (in module impresso_commons.utils.s3)
get_s3_versions() (in module impresso_commons.utils.s3)
get_s3_versions_client() (in module impresso_commons.utils.s3)
get_scale_factor() (in module impresso_commons.images.olive_boxes)
get_storage_options() (in module impresso_commons.utils.s3)
get_tif() (in module impresso_commons.images.img_utils)
git_commit_push() (in module impresso_commons.versioning.helpers)
glob_with_size() (in module impresso_commons.utils.utils)
granularity (impresso_commons.versioning.data_statistics.DataStatistics attribute)
(impresso_commons.versioning.data_statistics.NewspaperStatistics attribute)
H
has_title_year_key() (impresso_commons.versioning.data_manifest.DataManifest method)
has_value() (impresso_commons.versioning.helpers.DataStage class method)
I
id2IssueDir() (in module impresso_commons.path)
impresso_commons.images.img_utils
module
impresso_commons.images.olive_boxes
module
impresso_commons.path
module
impresso_commons.path.path_fs
module
impresso_commons.path.path_s3
module
impresso_commons.text.helpers
module
impresso_commons.text.rebuilder
module
impresso_commons.utils.config_loader
module
impresso_commons.utils.daskutils
module
impresso_commons.utils.s3
module
impresso_commons.utils.uima
module
impresso_commons.utils.utils
module
impresso_commons.versioning.compute_manifest
module
impresso_commons.versioning.data_manifest
module
impresso_commons.versioning.data_statistics
module
impresso_commons.versioning.helpers
module
impresso_iter_bucket() (in module impresso_commons.path.path_s3)
increment_version() (in module impresso_commons.versioning.helpers)
init_counts() (impresso_commons.versioning.data_statistics.DataStatistics method)
init_logger() (in module impresso_commons.utils.utils)
init_logging() (in module impresso_commons.text.rebuilder)
init_media_info() (in module impresso_commons.versioning.helpers)
init_yearly_count_dict() (impresso_commons.versioning.data_manifest.DataManifest method)
insert_whitespace() (in module impresso_commons.text.helpers)
is_git_repo() (in module impresso_commons.versioning.helpers)
IssueDir (class in impresso_commons.path.path_fs)
(in module impresso_commons.path.path_s3)
J
journal (impresso_commons.path.path_fs.IssueDir attribute)
jpg_highest (impresso_commons.images.img_utils.BoxStrategy attribute)
jpg_uniq (impresso_commons.images.img_utils.BoxStrategy attribute)
L
LANGIDENT (impresso_commons.versioning.helpers.DataStage attribute)
LINGUISTIC_PROCESSING (impresso_commons.versioning.helpers.DataStage attribute)
list_files() (in module impresso_commons.path.path_s3)
list_local_directories() (in module impresso_commons.utils.utils)
list_newspapers() (in module impresso_commons.path.path_s3)
list_s3_directories() (in module impresso_commons.utils.s3)
M
main() (in module impresso_commons.text.rebuilder)
(in module impresso_commons.utils.config_loader)
(in module impresso_commons.utils.daskutils)
(in module impresso_commons.versioning.compute_manifest)
manifest_summary() (in module impresso_commons.versioning.helpers)
media_list_from_mft_json() (in module impresso_commons.versioning.helpers)
module
impresso_commons.images.img_utils
impresso_commons.images.olive_boxes
impresso_commons.path
impresso_commons.path.path_fs
impresso_commons.path.path_s3
impresso_commons.text.helpers
impresso_commons.text.rebuilder
impresso_commons.utils.config_loader
impresso_commons.utils.daskutils
impresso_commons.utils.s3
impresso_commons.utils.uima
impresso_commons.utils.utils
impresso_commons.versioning.compute_manifest
impresso_commons.versioning.data_manifest
impresso_commons.versioning.data_statistics
impresso_commons.versioning.helpers
MYSQL_CIS (impresso_commons.versioning.helpers.DataStage attribute)
N
new_media() (impresso_commons.versioning.data_manifest.DataManifest method)
NewspaperStatistics (class in impresso_commons.versioning.data_statistics)
O
OCRQA (impresso_commons.versioning.helpers.DataStage attribute)
output_mft_s3_path (impresso_commons.versioning.data_manifest.DataManifest property)
overall_stats() (impresso_commons.versioning.data_manifest.DataManifest method)
P
pages_to_article() (in module impresso_commons.text.helpers)
pair_issue() (in module impresso_commons.path.path_fs)
parse_canonical_filename() (in module impresso_commons.path)
parse_json() (in module impresso_commons.utils.utils)
partitioner() (in module impresso_commons.utils.daskutils)
PartitionerConfig (class in impresso_commons.utils.config_loader)
PASSIM (impresso_commons.versioning.helpers.DataStage attribute)
path (impresso_commons.path.path_fs.IssueDir attribute)
png_highest (impresso_commons.images.img_utils.BoxStrategy attribute)
png_uniq (impresso_commons.images.img_utils.BoxStrategy attribute)
possible_count_keys (impresso_commons.versioning.data_statistics.NewspaperStatistics attribute)
pretty_print() (impresso_commons.versioning.data_statistics.DataStatistics method)
(impresso_commons.versioning.data_statistics.NewspaperStatistics method)
R
read_issue() (in module impresso_commons.text.helpers)
read_issue_pages() (in module impresso_commons.text.helpers)
read_jsonlines() (in module impresso_commons.utils.s3)
read_manifest_from_s3() (in module impresso_commons.versioning.helpers)
read_manifest_from_s3_path() (in module impresso_commons.versioning.helpers)
read_page() (in module impresso_commons.text.helpers)
read_s3_issues() (in module impresso_commons.path.path_s3)
readtext_jsonlines() (in module impresso_commons.utils.s3)
rebuild_for_passim() (in module impresso_commons.text.rebuilder)
rebuild_for_solr() (in module impresso_commons.text.rebuilder)
rebuild_issues() (in module impresso_commons.text.rebuilder)
rebuild_text() (in module impresso_commons.text.rebuilder)
rebuild_text_passim() (in module impresso_commons.text.rebuilder)
REBUILT (impresso_commons.versioning.helpers.DataStage attribute)
rebuilt2xmi() (in module impresso_commons.utils.uima)
reconstruct_iiif_link() (in module impresso_commons.text.helpers)
rejoin_articles() (in module impresso_commons.text.helpers)
remove_media_in_manifest() (in module impresso_commons.versioning.helpers)
replace_by_ci_id() (impresso_commons.versioning.data_manifest.DataManifest method)
replace_by_title_year() (impresso_commons.versioning.data_manifest.DataManifest method)
run_cmd() (in module impresso_commons.images.img_utils)
S
s3_filter_archives() (in module impresso_commons.path.path_s3)
s3_get_articles() (in module impresso_commons.utils.s3)
s3_get_pages() (in module impresso_commons.utils.s3)
s3_glob_with_size() (in module impresso_commons.utils.s3)
s3_iter_bucket() (in module impresso_commons.path.path_s3)
s3ContentItem (class in impresso_commons.path.path_s3)
same_counts() (impresso_commons.versioning.data_statistics.DataStatistics method)
(impresso_commons.versioning.data_statistics.NewspaperStatistics method)
select_issues() (in module impresso_commons.path.path_fs)
SOLR_EMBS (impresso_commons.versioning.helpers.DataStage attribute)
SOLR_ENTITIES (impresso_commons.versioning.helpers.DataStage attribute)
SOLR_TEXT (impresso_commons.versioning.helpers.DataStage attribute)
stage (impresso_commons.versioning.data_statistics.DataStatistics attribute)
(impresso_commons.versioning.data_statistics.NewspaperStatistics attribute)
T
test() (in module impresso_commons.images.olive_boxes)
text_apply_breaks() (in module impresso_commons.text.helpers)
TEXT_REUSE (impresso_commons.versioning.helpers.DataStage attribute)
tif (impresso_commons.images.img_utils.BoxStrategy attribute)
title_level_stats() (impresso_commons.versioning.data_manifest.DataManifest method)
to_dict() (impresso_commons.utils.config_loader.Base method)
TOPICS (impresso_commons.versioning.helpers.DataStage attribute)
U
update_media_stats() (impresso_commons.versioning.data_manifest.DataManifest method)
upload() (in module impresso_commons.text.rebuilder)
(in module impresso_commons.utils.s3)
upload_to_s3() (in module impresso_commons.utils.s3)
V
validate_against_schema() (in module impresso_commons.utils.utils)
validate_and_export_manifest() (impresso_commons.versioning.data_manifest.DataManifest method)
validate_config() (in module impresso_commons.versioning.compute_manifest)
validate_granularity() (in module impresso_commons.versioning.helpers)
validate_stage() (in module impresso_commons.versioning.helpers)
validate_version() (in module impresso_commons.versioning.helpers)
version_as_list() (in module impresso_commons.versioning.helpers)
W
write_and_push_to_git() (in module impresso_commons.versioning.helpers)
write_dump_to_fs() (in module impresso_commons.versioning.helpers)