#!C:\Perl\bin\perl.exe # # The #!C:\Perl\bin\perl.exe is Perl location at my office winbox. # use strict; # use warnings; # turn this on for debugonly use Cwd; use Encode; # I use it to encode and decode strings in/from different encodings use File::Find; use File::Copy; # I use it use Time::HiRes qw( gettimeofday tv_interval ); # I use it $|=1; #turn off stdout buffering my $cwd = getcwd(); my $i = 0; # my @ftypes_G = ("fb2","zip","rar"); # List of filetypes of interest my @ftypes_G = ("fb2"); # List of filetypes of interest #my @dirlist_G = ($cwd); # List of directories of interest #my @dirlist_G = ("E:\\k"); # List of directories of interest #my @dirlist_G = ("E:\\k\\Aldebaran\\2008.07.1"); # List of directories of interest #my @dirlist_G = ("E:\\k\\Aldebaran\\2008.07.1","E:\\k\\litres4415\\2008.07"); # List of directories of interest #my @dirlist_G = ("E:\\k\\Aldebaran","E:\\k\\litres4415"); # List of directories of interest my @dirlist_G = (); # List of directories of interest my $unzip_G = "\"C:\\Program Files\\7-Zip\\7z.exe\""; my $unzip_tmp_out_fname_G = "tmp-unzip-out.txt"; my $t0_G = [gettimeofday]; my $t1_G = [gettimeofday]; my $t_running_G = [gettimeofday]; #print "\n-------------------------------------------------------\n"; #print @ARGV; print "\n-------------------------------------------------------\n"; print "cwd=$cwd"; print "\n-------------------------------------------------------\n"; #print $abs_path; #print "\n-------------------------------------------------------\n"; #debugonly &subzil_print_hash_as_txt(\%hashOfFiles); # pass reference to hash as argument ### ### Read in directories given as parameters, if no parameters given - exit ### if( ($i=@ARGV) >0 ) { @dirlist_G = (); # reset the list my $dir = ""; for($i=0;$i<@ARGV;$i++) { $dir = $ARGV[$i]; push(@dirlist_G,$dir); printf("DEBUG: arg[%2d]=\"%s\"\n",$i,$dir); }; # forr }else { # @dirlist_G = ($cwd); # reset the list printf("ERROR:123: You must specify in command line at least one directory to process.\n"); printf("USAGE: This script takes fb2-files from the specified directory(s) (recursively) and makes copy of them in the currnet directory according to tag. \"ru\" goes into (created) subdir \"lang-ru\", others go into (created) subdir \"lang-other\".\n"); printf("NOTE: everything in the current directory will be silently overwritten. So beware.\n"); exit 0; }; # iffelse for($i=0;$i<@dirlist_G;$i++) { printf("DEBUG:[%2d] dir=\"%s\"\n",$i,$dirlist_G[$i]); }; # forr for($i=0;$i<@ftypes_G;$i++) { printf("DEBUG:[%2d] type=\"%s\"\n",$i,$ftypes_G[$i]); }; # forr print "\n-------------------------------------------------------\n"; ######### Main loop my $ii = 0; my $fullfname_G = ""; my $fname_G = ""; my $outfname_G = ""; my $outfname_marked_G = ""; my $origin_mark_G = ""; my $buf_G = ""; my %HAUT_G = (); # by author my %HTIT_G = (); # by title my @FL_G = (); # List of files my @FL_RU__G = (); # List of files my @FL_OTH_G = (); # List of files my @FL_BAD_G = (); # List of files my $listsize_G = 0; my $curnum_G = 0; my $prev_curnum_G = 0; my $numtot_G = 0; my $numall_G = 0; my $numeng_G = 0; my $numwin_G = 0; my $numiso_G = 0; my $numkoi_G = 0; my $numutf_G = 0; my $numbad_G = 0; my $numru__G = 0; my $numoth_G = 0; my $dirname_ru__G = "lang-ru"; my $dirname_oth_G = "lang-other"; ### Action! mkdir($dirname_ru__G); mkdir($dirname_oth_G); my $numdir_G = 0; foreach my $dir (@dirlist_G) { $numdir_G++; printf("Processing directory: %s (%d of %d)\n",$dir,$numdir_G,my $n=@dirlist_G); my $t1 = [gettimeofday]; my @ll_single_dir_list = ($dir); @FL_G = &koplib_generate_filelist(\@ll_single_dir_list,\@ftypes_G); # note: you must pass list as reference: \@list, not @list my $listsize = @FL_G; printf(" FL_G contains %d files.\n",$listsize); my $elapsed = tv_interval($t1,[gettimeofday]); printf("Completed: building filelist for dir=\"%s\": elapsed=%f seconds (%f seconds per file)\n",$dir,$elapsed,($elapsed/$listsize)); print "-------------------------------------------------------\n"; ### ### ### @FL_RU__G = (); # clean up @FL_OTH_G = (); # clean up @FL_BAD_G = (); # clean up $prev_curnum_G = 0; $t_running_G = [gettimeofday]; for($i=0,$listsize=@FL_G;$i<@FL_G && ($fullfname_G = $FL_G[$i]);$i++) { $curnum_G++; $numall_G++; $numtot_G++; ### my $rawdes = &koplib_fb2_load_raw_description_from_file($fullfname_G); my $enc = &koplib_fb2_get_encoding($rawdes); my $lan = &koplib_fb2_get_tag_content(&koplib_fb2_get_tag_content($rawdes,"title-info",$enc),"lang",$enc); my $newf = substr($fullfname_G,rindex($fullfname_G,"/")+1); # filename only, strip out directories if( uc($lan) eq "RU") { ### copy this file into lang-ru $newf = $dirname_ru__G . "/" . $newf; # form new full filename ### Action! copy($fullfname_G,$newf); $numru__G++; push(@FL_RU__G,$fullfname_G); }else{ ### copy this file into lang-other $newf = $dirname_oth_G . "/" . $newf; # form new full filename ### Action! copy($fullfname_G,$newf); $numoth_G++; push(@FL_OTH_G,$fullfname_G); # printf("[%6d of %6d] Bad enc=\"%s\" $fullfname_G\n",$i+1,$listsize,$enc); if(length($lan)<2) { my $tii = &koplib_fb2_get_tag_content($rawdes,"title-info",$enc); printf("DEBUG:lan=\"%s\" enc=%s fil=%s tii=%s\n",$lan,$enc,$newf); }; # iff }; # iffelse ### ### Progress indicator ### if( ($curnum_G % 250)==0 && ($curnum_G != $prev_curnum_G)) # Modulus % returns the remainder of a division progress indicator { # printf("DEBUG: cur=%d prevcur=%d \n",$curnum_G,$prev_curnum_G); my $percent = ($curnum_G * 100.0) / $listsize_G; my $elapsed = tv_interval($t_running_G,[gettimeofday]); $t_running_G = [gettimeofday]; my $perfile = $elapsed / ($curnum_G - $prev_curnum_G); $prev_curnum_G = $curnum_G; my $todo = ($listsize_G - $curnum_G); my $eta = $perfile * $todo; my $eta_min = $eta / 60; my $eta_sec = $eta % 60; printf("[%6d of %6d]",$curnum_G,$listsize_G); printf(" (%4.1f\%)(ETA=%3d min %2d sec)(%f per file)",$percent,$eta_min,$eta_sec,$perfile); printf("\n"); }; # iff }; # forr FL_G ### ### ### print "\n-------------------------------------------------------\n"; printf("\tTotal : %6d files for directory %s\n",my $n=@FL_G,$dir); printf("\tru : %6d files.\n",my $n=@FL_RU__G); printf("\tother : %6d files.\n",my $n=@FL_OTH_G); printf("-------------------------------------------------------\n"); my $elapsed = tv_interval($t0_G,[gettimeofday]); printf("Completed processing for dir=\"%s\": %d files, elapsed %.1f seconds (%f seconds per file)\n",$dir,$listsize,$elapsed,($elapsed/$listsize)); }; # foreach dir print "\n-------------------------------------------------------\n"; printf("\t\t\tGrand Total : %6d files.\n",$numall_G); printf("\t\t\tru : %6d files.\n",$numru__G); printf("\t\t\tother : %6d files.\n",$numoth_G); printf("-------------------------------------------------------\n"); printf("-------------------------------------------------------\n"); my $elapsed = tv_interval($t0_G,[gettimeofday]); printf("Completed all processing: %d files, elapsed %.1f seconds (%f seconds per file)\n",$numtot_G,$elapsed,($elapsed/$numtot_G)); printf("-------------------------------------------------------\n"); exit (0); ############################################################################### # # Parameters: list of directory path as parameter (could be single dir, but still a list) # list of filetypes of interest (optional, default is all files) # Returns : resulting full pathnames as list # Uses : File::Find File::Find::name # Globals : none # Errors : # sub koplib_generate_filelist # @METAGS koplib_generate_filelist { # declare local (for this func) variables my @l_flist = (); my @l_not_needed_flist = (); my @l_dir_given = (); my @l_ftypeslist = (); my $l_have_types_fok; my ($l_filenum_in_dir); # my $i = 0; # initialize local (for this func) variables from the func call parameters # $l_dir_given = $_[0]; #reads in the parameter @l_dir_given = @{$_[0]}; #reads in the parameter for($i=0;$i<@l_dir_given;$i++) { printf("DEBUG:koplib_generate_filelist:[%2d] l_dir_given=\"%s\"\n",$i,$l_dir_given[$i]); }; # forr if( defined($_[1]) ) { @l_ftypeslist = @{$_[1]}; #reads in the list as parameter $l_have_types_fok = 1; }else { $l_have_types_fok = 0; }; # generate the list of files # foreach my $ll_dir (@dirList) foreach my $ll_dir (@l_dir_given) { if(! -d $ll_dir) { ### push @failDir, ($ll_dir); next; # foreach ll_dir }; find { wanted => sub { my $l_is_wanted_fok; if(-f $File::Find::name) # is plain file { $l_is_wanted_fok = 0; if( $l_have_types_fok ) { my $suffix = substr($File::Find::name,(rindex($File::Find::name,'.'))+1); # get filetype suffix foreach my $suff (@l_ftypeslist) { # printf("DEBUG:suff=$suff suffix=$suffix name=$File::Find::name\n"); if( uc($suffix) eq uc($suff)) { $l_is_wanted_fok = 1; # this suffix is wanted last; # foreach }; # iffelse }; # foreach }else { $l_is_wanted_fok = 1; # no suffixes given, so all files are wanted }; if($l_is_wanted_fok) { push(@l_flist,$File::Find::name); } else { push(@l_not_needed_flist,$File::Find::name); }; # iffelse }; # if plain file return; # sub wanted() # }, follow => 0}, "$l_dir_given"; }, follow => 0}, "$ll_dir"; }; # foreach ##### # my $i = 1; # foreach my $fname (@l_not_needed_flist) # { # printf("[%5d]DEBUG:koplib_generate_filelist:NOT-NEEDED:$fname\n",$i++); # }; ##### return @l_flist; } # koplib_generate_filelist ############################################################################### # # Parameters: $lp_inp_fb2_fname # Returns : $lr_description, "" if error # Uses : # Globals : none # Errors : exit if input file does not exist # sub koplib_fb2_load_raw_description_from_file # @METAGS koplib_fb2_load_raw_description_from_file { my $lp_inp_fb2_fname = $_[0]; #reads in the parameter my $lr_description = ""; my $l_allline = ""; my @l_lines = (); my $li = 0; if ( ! ( open (INP_FILE,"<$lp_inp_fb2_fname"))) { ########## file doesn't exist printf("ERROR:378:koplib_fb2_load_raw_description_from_file: File does not exist fname=\"%s\"\n",$lp_inp_fb2_fname); exit (253); }else{ @l_lines=; close(INP_FILE); $l_allline = join('',@l_lines); # re-join array of lines back into one string $li = index($l_allline,""); if($li<=0) { $li = index($l_allline,""); }; # iff if($li<=0) { return ""; # description end tag was not found }; # iff $li += 14; $lr_description = substr($l_allline,0,$li); }; # iffelse return $lr_description; } # koplib_fb2_load_raw_description_from_file ############################################################################### # # Parameters: $lp_description # Returns : $lr_encoding # Uses : # Globals : none # Errors : # sub koplib_fb2_get_encoding # @METAGS koplib_fb2_get_encoding { my $lp_description = $_[0]; #reads in the parameter my $lr_encoding = ""; my $i = index(uc($lp_description)," ENCODING=\""); if($i<0) { return (""); }; # No proper encoding here my $ibeg = $i+11; #$i = index($lp_description,"\"\?\>",$i); $i = index($lp_description,"\"",$ibeg+1); my $iend = $i; my $ilen = $iend - $ibeg; if($ilen>33) # Magic here, black one... { $ilen=33; }; # iff $lr_encoding = substr($lp_description,$ibeg,$ilen); return $lr_encoding; } # koplib_fb2_get_encoding ############################################################################### # # Parameters: $lp_description $lp_tagname, $lp_encoding (optional) # Returns : $lr_tag_content # Uses : koplib_make_win1251 # Globals : none # Errors : # sub koplib_fb2_get_tag_content # @METAGS koplib_fb2_get_tag_content { my $lp_description = $_[0]; #reads in the parameter my $lp_tagname = $_[1]; #reads in the parameter my $lp_encoding = ""; my $l_tagname_uc = ""; my $l_tagname = ""; my $lr_tag_content = ""; my $l_description_uc = ""; if( defined($_[2]) ) { $lp_encoding = $_[2]; }; # iff if( length($lp_encoding) < 4 ) { ### Wrong encoding? We better get it ourselves $lp_encoding = &koplib_fb2_get_encoding($lp_description); }; # iff $l_description_uc = uc($lp_description); $l_tagname_uc = uc($lp_tagname ); $l_tagname = "<" . $l_tagname_uc . ">"; my $i = index($l_description_uc,$l_tagname); if($i<0) { return ""; # error occured, return empty tag content }; # iff my $ibeg = $i+length($l_tagname); $l_tagname = ""; $i = index($l_description_uc,$l_tagname,$ibeg+1); if($i<0) { return ""; # error occured, return empty tag content }; # iff my $iend = $i; my $ilen = $iend - $ibeg; # printf("DEBUG:koplib:273:ibeg=%d iend=%d ilen=%d desc=%s\n",$ibeg,$iend,$ilen,$lp_description); # $ilen = ($ilen>999)?(999):($ilen); # Magic here, black one... if($ilen<1) { return ""; # error occured, return empty tag content }; # iff if($ilen > length($lp_description)-$ibeg) { return ""; # error occured, return empty tag content }; # iff $lr_tag_content = substr($lp_description,$ibeg,$ilen); $lr_tag_content = &koplib_make_win1251($lr_tag_content,$lp_encoding); return $lr_tag_content; } # koplib_fb2_get_tag_content ############################################################################### # # Parameters: $lp_input $lp_encoding # Returns : $lr_out # Uses : Encode # Globals : none # Errors : # sub koplib_make_win1251 # @METAGS koplib_make_win1251 { my $lp_input = $_[0]; #reads in the parameter my $lp_encoding = ""; my $l_input = ""; my $lr_out = ""; if( defined($_[1]) ) { $lp_encoding = $_[1]; }; my $l_encoding_uc = uc($lp_encoding); # printf("DEBUG:koplib:254:inp=\"%s\" enc=\"%s\"\n",$lp_input,$lp_encoding); if( length($lp_encoding) <3 ) { return $lp_input; # wrong encoding call, do nothing }; # iff if( uc($l_encoding_uc) eq "WINDOWS-1252") { $l_input = decode("iso-8859-2",$lp_input); }elsif( uc($l_encoding_uc) eq "WINDOWS-1251" ){ $l_input = decode("windows-1251",$lp_input); }elsif( uc($l_encoding_uc) eq "ISO-8859-1" ){ $l_input = decode("iso-8859-1",$lp_input); }elsif( uc($l_encoding_uc) eq "KOI8-R" ){ $l_input = decode("koi8-r",$lp_input); }elsif( uc($l_encoding_uc) eq "UTF-8" ){ $l_input = decode("utf-8",$lp_input); # yes, it's necessary! }else{ return $lp_input; # error occured, do nothing }; # iffelse ### Works!!! $lr_out = encode("windows-1251",$l_input); return $lr_out; } # koplib_make_win1251