diff --git a/outils/stats-corpus/statsCorpus.pl b/outils/stats-corpus/statsCorpus.pl index e357904..64c0f0a 100755 --- a/outils/stats-corpus/statsCorpus.pl +++ b/outils/stats-corpus/statsCorpus.pl @@ -26,8 +26,8 @@ " $substitut [ -f (csv|json|tsv) ] [ -x (normal|lodex) ] \n". " $programme -h \n"; -my $version = "3.0.0"; -my $dateModif = "1 Juin 2018"; +my $version = "3.3.1"; +my $dateModif = "24 Juillet 2018"; # Variables pour les options my $aide = 0; @@ -253,7 +253,7 @@ $nbDocs = $nbIds } if ( $type eq 'json' ) { - print OUT "{\n \"total\": $nbDocs,\n \"docs\": [\n"; + print OUT "{\n \"total\": $nbDocs,\n \"data\": [\n"; } # ... avant la lecture du fichier “logRequete” @@ -313,7 +313,7 @@ $nbDocs = $#fichiers + 1; if ( $type eq 'json' ) { - print OUT "{\n \"total\": $nbDocs,\n \"docs\": [\n"; + print OUT "{\n \"total\": $nbDocs,\n \"data\": [\n"; } foreach my $fichier (@fichiers) { @@ -433,42 +433,26 @@ my $scienceMetrix = ""; my $scopus = ""; my $inist = ""; +if ( $type eq 'json' ) { + $wos = []; + $scienceMetrix = []; + $scopus = []; + $inist = []; + } if ( defined $hit{'categories'} ) { my %categories = %{$hit{'categories'}}; if ( defined $categories{'wos'} ) { - $wos = join(" ; ", @{$categories{'wos'}}) if $type ne 'json'; - $wos = \@{$categories{'wos'}} if $type eq 'json'; - } - elsif ( $type eq 'json' ) { - $wos = []; + $wos = categories($categories{'wos'}); } if ( defined $categories{'scienceMetrix'} ) { - $scienceMetrix = join(" ; ", @{$categories{'scienceMetrix'}}) if $type ne 'json'; - $scienceMetrix = \@{$categories{'scienceMetrix'}} if $type eq 'json'; - } - elsif ( $type eq 'json' ) { - $scienceMetrix = []; + $scienceMetrix = categories($categories{'scienceMetrix'}); } if ( defined $categories{'scopus'} ) { - $scopus = join(" ; ", @{$categories{'scopus'}}) if $type ne 'json'; - $scopus = \@{$categories{'scopus'}} if $type eq 'json'; - } - elsif ( $type eq 'json' ) { - $scopus = []; + $scopus = categories($categories{'scopus'}); } if ( defined $categories{'inist'} ) { - $inist = join(" ; ", @{$categories{'inist'}}) if $type ne 'json'; - $inist = \@{$categories{'inist'}} if $type eq 'json'; + $inist = categories($categories{'inist'}); } - elsif ( $type eq 'json' ) { - $inist = []; - } - } -elsif ( $type eq 'json' ) { - $wos = []; - $scienceMetrix = []; - $scopus = []; - $inist = []; } my $genre = ""; if ( defined $hit{'genre'} ) { @@ -540,8 +524,16 @@ if ( defined $xml and -f $xml ) { $structure = "Non"; my $texte = ""; + my $encoding = undef; open(XML, "<:utf8", $xml) or die "$!,"; while() { + if ( m|<\?[^>]+\bencoding *= *(["'])(.+?)\1[^>]*>|o and not defined $encoding ) { + $encoding = $2; + close XML; + $texte = ""; + open(XML, "<:encoding($encoding)", $xml) or die "$!,"; + next; + } tr/\n\r/ /s; $texte .= $_; } @@ -759,6 +751,30 @@ return join(";", @champs); } +sub categories +{ +my $categories = shift; + +my @tmp1 = (); +my @tmp2 = (); +if ( $type eq 'json' ) { + return \@tmp1 if $#{$categories} < 0; + foreach my $categorie (@{$categories}) { + if ( $categorie =~ /^1 - /o and @tmp2 ) { + push(@tmp1, [ @tmp2 ]); + @tmp2 = (); + } + push(@tmp2, $categorie); + } + push(@tmp1, [ @tmp2 ]) if @tmp2; + return \@tmp1; + } +else { + return join(" ; ", @{$categories}); + } + +} + sub json { my @valeurs = @_; @@ -769,16 +785,39 @@ foreach my $champ (@champs) { my $valeur = shift @valeurs; if ( ref($valeur) eq 'ARRAY' ) { - my @tmp = @{$valeur}; + my @niv1 = @{$valeur}; + if ( $#niv1 < 0 ) { + print OUT " \"$champ\": []", $nb -- ? "," : "", "\n"; + next; + } print OUT " \"$champ\": [\n"; - while(my $tmp = shift @tmp) { - if ( $tmp =~ /^\d+\z/o ) { - print OUT " $tmp"; + while(my $niv1 = shift @niv1) { + if ( ref($niv1) eq 'ARRAY' ) { + my @niv2 = @{$niv1}; + if ( $#niv2 < 0 ) { + print OUT " []\n", $#niv1 > -1 ? "," : "", "\n"; + next; + } + print OUT " [\n"; + while(my $niv2 = shift @niv2) { + if ( $niv2 =~ /^[-+]? ?\d+(\.\d+)?\z/o ) { + print OUT " $niv2"; + } + else { + print OUT " \"$niv2\""; + } + print OUT "," if $#niv2 > -1; + print OUT "\n"; + } + print OUT " ]"; + } + elsif ( $niv1 =~ /^[-+]? ?\d+(\.\d+)?\z/o ) { + print OUT " $niv1"; } else { - print OUT " \"$tmp\""; + print OUT " \"$niv1\""; } - print OUT "," if $#tmp > -1; + print OUT "," if $#niv1 > -1; print OUT "\n"; } print OUT " ]"; @@ -786,7 +825,7 @@ print OUT "\n"; } else { - if ( $valeur =~ /^\d+\z/o ) { + if ( $valeur =~ /^[-+]? ?\d+(\.\d+)?\z/o ) { print OUT " \"$champ\": $valeur"; } else {