blob: 2594a4badb3d7b942b28b57ca036650328a1b050 [file] [log] [blame]
#!/usr/bin/perl
# gitweb - simple web interface to track changes in git repositories
#
# (C) 2005-2006, Kay Sievers <kay.sievers@vrfy.org>
# (C) 2005, Christian Gierke
#
# This program is licensed under the GPLv2
use 5.008;
use strict;
use warnings;
# handle ACL in file access tests
use filetest 'access';
use CGI qw(:standard :escapeHTML -nosticky);
use CGI::Util qw(unescape);
use CGI::Carp qw(fatalsToBrowser set_message);
use Encode;
use Fcntl ':mode';
use File::Find qw();
use File::Basename qw(basename);
use Time::HiRes qw(gettimeofday tv_interval);
use Digest::MD5 qw(md5_hex);
binmode STDOUT, ':utf8';
if (!defined($CGI::VERSION) || $CGI::VERSION < 4.08) {
eval 'sub CGI::multi_param { CGI::param(@_) }'
}
our $t0 = [ gettimeofday() ];
our $number_of_git_cmds = 0;
BEGIN {
CGI->compile() if $ENV{'MOD_PERL'};
}
our $version = "++GIT_VERSION++";
our ($my_url, $my_uri, $base_url, $path_info, $home_link);
sub evaluate_uri {
our $cgi;
our $my_url = $cgi->url();
our $my_uri = $cgi->url(-absolute => 1);
# Base URL for relative URLs in gitweb ($logo, $favicon, ...),
# needed and used only for URLs with nonempty PATH_INFO
our $base_url = $my_url;
# When the script is used as DirectoryIndex, the URL does not contain the name
# of the script file itself, and $cgi->url() fails to strip PATH_INFO, so we
# have to do it ourselves. We make $path_info global because it's also used
# later on.
#
# Another issue with the script being the DirectoryIndex is that the resulting
# $my_url data is not the full script URL: this is good, because we want
# generated links to keep implying the script name if it wasn't explicitly
# indicated in the URL we're handling, but it means that $my_url cannot be used
# as base URL.
# Therefore, if we needed to strip PATH_INFO, then we know that we have
# to build the base URL ourselves:
our $path_info = decode_utf8($ENV{"PATH_INFO"});
if ($path_info) {
# $path_info has already been URL-decoded by the web server, but
# $my_url and $my_uri have not. URL-decode them so we can properly
# strip $path_info.
$my_url = unescape($my_url);
$my_uri = unescape($my_uri);
if ($my_url =~ s,\Q$path_info\E$,, &&
$my_uri =~ s,\Q$path_info\E$,, &&
defined $ENV{'SCRIPT_NAME'}) {
$base_url = $cgi->url(-base => 1) . $ENV{'SCRIPT_NAME'};
}
}
# target of the home link on top of all pages
our $home_link = $my_uri || "/";
}
# core git executable to use
# this can just be "git" if your webserver has a sensible PATH
our $GIT = "++GIT_BINDIR++/git";
# absolute fs-path which will be prepended to the project path
#our $projectroot = "/pub/scm";
our $projectroot = "++GITWEB_PROJECTROOT++";
# fs traversing limit for getting project list
# the number is relative to the projectroot
our $project_maxdepth = "++GITWEB_PROJECT_MAXDEPTH++";
# string of the home link on top of all pages
our $home_link_str = "++GITWEB_HOME_LINK_STR++";
# extra breadcrumbs preceding the home link
our @extra_breadcrumbs = ();
# name of your site or organization to appear in page titles
# replace this with something more descriptive for clearer bookmarks
our $site_name = "++GITWEB_SITENAME++"
|| ($ENV{'SERVER_NAME'} || "Untitled") . " Git";
# html snippet to include in the <head> section of each page
our $site_html_head_string = "++GITWEB_SITE_HTML_HEAD_STRING++";
# filename of html text to include at top of each page
our $site_header = "++GITWEB_SITE_HEADER++";
# html text to include at home page
our $home_text = "++GITWEB_HOMETEXT++";
# filename of html text to include at bottom of each page
our $site_footer = "++GITWEB_SITE_FOOTER++";
# URI of stylesheets
our @stylesheets = ("++GITWEB_CSS++");
# URI of a single stylesheet, which can be overridden in GITWEB_CONFIG.
our $stylesheet = undef;
# URI of GIT logo (72x27 size)
our $logo = "++GITWEB_LOGO++";
# URI of GIT favicon, assumed to be image/png type
our $favicon = "++GITWEB_FAVICON++";
# URI of gitweb.js (JavaScript code for gitweb)
our $javascript = "++GITWEB_JS++";
# URI and label (title) of GIT logo link
#our $logo_url = "http://www.kernel.org/pub/software/scm/git/docs/";
#our $logo_label = "git documentation";
our $logo_url = "http://git-scm.com/";
our $logo_label = "git homepage";
# source of projects list
our $projects_list = "++GITWEB_LIST++";
# the width (in characters) of the projects list "Description" column
our $projects_list_description_width = 25;
# group projects by category on the projects list
# (enabled if this variable evaluates to true)
our $projects_list_group_categories = 0;
# default category if none specified
# (leave the empty string for no category)
our $project_list_default_category = "";
# default order of projects list
# valid values are none, project, descr, owner, and age
our $default_projects_order = "project";
# show repository only if this file exists
# (only effective if this variable evaluates to true)
our $export_ok = "++GITWEB_EXPORT_OK++";
# don't generate age column on the projects list page
our $omit_age_column = 0;
# don't generate information about owners of repositories
our $omit_owner=0;
# show repository only if this subroutine returns true
# when given the path to the project, for example:
# sub { return -e "$_[0]/git-daemon-export-ok"; }
our $export_auth_hook = undef;
# only allow viewing of repositories also shown on the overview page
our $strict_export = "++GITWEB_STRICT_EXPORT++";
# list of git base URLs used for URL to where fetch project from,
# i.e. full URL is "$git_base_url/$project"
our @git_base_url_list = grep { $_ ne '' } ("++GITWEB_BASE_URL++");
# default blob_plain mimetype and default charset for text/plain blob
our $default_blob_plain_mimetype = 'text/plain';
our $default_text_plain_charset = undef;
# file to use for guessing MIME types before trying /etc/mime.types
# (relative to the current git repository)
our $mimetypes_file = undef;
# assume this charset if line contains non-UTF-8 characters;
# it should be valid encoding (see Encoding::Supported(3pm) for list),
# for which encoding all byte sequences are valid, for example
# 'iso-8859-1' aka 'latin1' (it is decoded without checking, so it
# could be even 'utf-8' for the old behavior)
our $fallback_encoding = 'latin1';
# rename detection options for git-diff and git-diff-tree
# - default is '-M', with the cost proportional to
# (number of removed files) * (number of new files).
# - more costly is '-C' (which implies '-M'), with the cost proportional to
# (number of changed files + number of removed files) * (number of new files)
# - even more costly is '-C', '--find-copies-harder' with cost
# (number of files in the original tree) * (number of new files)
# - one might want to include '-B' option, e.g. '-B', '-M'
our @diff_opts = ('-M'); # taken from git_commit
# Disables features that would allow repository owners to inject script into
# the gitweb domain.
our $prevent_xss = 0;
# Path to the highlight executable to use (must be the one from
# http://www.andre-simon.de due to assumptions about parameters and output).
# Useful if highlight is not installed on your webserver's PATH.
# [Default: highlight]
our $highlight_bin = "++HIGHLIGHT_BIN++";
# information about snapshot formats that gitweb is capable of serving
our %known_snapshot_formats = (
# name => {
# 'display' => display name,
# 'type' => mime type,
# 'suffix' => filename suffix,
# 'format' => --format for git-archive,
# 'compressor' => [compressor command and arguments]
# (array reference, optional)
# 'disabled' => boolean (optional)}
#
'tgz' => {
'display' => 'tar.gz',
'type' => 'application/x-gzip',
'suffix' => '.tar.gz',
'format' => 'tar',
'compressor' => ['gzip', '-n']},
'tbz2' => {
'display' => 'tar.bz2',
'type' => 'application/x-bzip2',
'suffix' => '.tar.bz2',
'format' => 'tar',
'compressor' => ['bzip2']},
'txz' => {
'display' => 'tar.xz',
'type' => 'application/x-xz',
'suffix' => '.tar.xz',
'format' => 'tar',
'compressor' => ['xz'],
'disabled' => 1},
'zip' => {
'display' => 'zip',
'type' => 'application/x-zip',
'suffix' => '.zip',
'format' => 'zip'},
);
# Aliases so we understand old gitweb.snapshot values in repository
# configuration.
our %known_snapshot_format_aliases = (
'gzip' => 'tgz',
'bzip2' => 'tbz2',
'xz' => 'txz',
# backward compatibility: legacy gitweb config support
'x-gzip' => undef, 'gz' => undef,
'x-bzip2' => undef, 'bz2' => undef,
'x-zip' => undef, '' => undef,
);
# Pixel sizes for icons and avatars. If the default font sizes or lineheights
# are changed, it may be appropriate to change these values too via
# $GITWEB_CONFIG.
our %avatar_size = (
'default' => 16,
'double' => 32
);
# Used to set the maximum load that we will still respond to gitweb queries.
# If server load exceed this value then return "503 server busy" error.
# If gitweb cannot determined server load, it is taken to be 0.
# Leave it undefined (or set to 'undef') to turn off load checking.
our $maxload = 300;
# configuration for 'highlight' (http://www.andre-simon.de/)
# match by basename
our %highlight_basename = (
#'Program' => 'py',
#'Library' => 'py',
'SConstruct' => 'py', # SCons equivalent of Makefile
'Makefile' => 'make',
);
# match by extension
our %highlight_ext = (
# main extensions, defining name of syntax;
# see files in /usr/share/highlight/langDefs/ directory
(map { $_ => $_ } qw(py rb java css js tex bib xml awk bat ini spec tcl sql)),
# alternate extensions, see /etc/highlight/filetypes.conf
(map { $_ => 'c' } qw(c h)),
(map { $_ => 'sh' } qw(sh bash zsh ksh)),
(map { $_ => 'cpp' } qw(cpp cxx c++ cc)),
(map { $_ => 'php' } qw(php php3 php4 php5 phps)),
(map { $_ => 'pl' } qw(pl perl pm)), # perhaps also 'cgi'
(map { $_ => 'make'} qw(make mak mk)),
(map { $_ => 'xml' } qw(xml xhtml html htm)),
);
# You define site-wide feature defaults here; override them with
# $GITWEB_CONFIG as necessary.
our %feature = (
# feature => {
# 'sub' => feature-sub (subroutine),
# 'override' => allow-override (boolean),
# 'default' => [ default options...] (array reference)}
#
# if feature is overridable (it means that allow-override has true value),
# then feature-sub will be called with default options as parameters;
# return value of feature-sub indicates if to enable specified feature
#
# if there is no 'sub' key (no feature-sub), then feature cannot be
# overridden
#
# use gitweb_get_feature(<feature>) to retrieve the <feature> value
# (an array) or gitweb_check_feature(<feature>) to check if <feature>
# is enabled
# Enable the 'blame' blob view, showing the last commit that modified
# each line in the file. This can be very CPU-intensive.
# To enable system wide have in $GITWEB_CONFIG
# $feature{'blame'}{'default'} = [1];
# To have project specific config enable override in $GITWEB_CONFIG
# $feature{'blame'}{'override'} = 1;
# and in project config gitweb.blame = 0|1;
'blame' => {
'sub' => sub { feature_bool('blame', @_) },
'override' => 0,
'default' => [0]},
# Enable the 'snapshot' link, providing a compressed archive of any
# tree. This can potentially generate high traffic if you have large
# project.
# Value is a list of formats defined in %known_snapshot_formats that
# you wish to offer.
# To disable system wide have in $GITWEB_CONFIG
# $feature{'snapshot'}{'default'} = [];
# To have project specific config enable override in $GITWEB_CONFIG
# $feature{'snapshot'}{'override'} = 1;
# and in project config, a comma-separated list of formats or "none"
# to disable. Example: gitweb.snapshot = tbz2,zip;
'snapshot' => {
'sub' => \&feature_snapshot,
'override' => 0,
'default' => ['tgz']},
# Enable text search, which will list the commits which match author,
# committer or commit text to a given string. Enabled by default.
# Project specific override is not supported.
#
# Note that this controls all search features, which means that if
# it is disabled, then 'grep' and 'pickaxe' search would also be
# disabled.
'search' => {
'override' => 0,
'default' => [1]},
# Enable grep search, which will list the files in currently selected
# tree containing the given string. Enabled by default. This can be
# potentially CPU-intensive, of course.
# Note that you need to have 'search' feature enabled too.
# To enable system wide have in $GITWEB_CONFIG
# $feature{'grep'}{'default'} = [1];
# To have project specific config enable override in $GITWEB_CONFIG
# $feature{'grep'}{'override'} = 1;
# and in project config gitweb.grep = 0|1;
'grep' => {
'sub' => sub { feature_bool('grep', @_) },
'override' => 0,
'default' => [1]},
# Enable the pickaxe search, which will list the commits that modified
# a given string in a file. This can be practical and quite faster
# alternative to 'blame', but still potentially CPU-intensive.
# Note that you need to have 'search' feature enabled too.
# To enable system wide have in $GITWEB_CONFIG
# $feature{'pickaxe'}{'default'} = [1];
# To have project specific config enable override in $GITWEB_CONFIG
# $feature{'pickaxe'}{'override'} = 1;
# and in project config gitweb.pickaxe = 0|1;
'pickaxe' => {
'sub' => sub { feature_bool('pickaxe', @_) },
'override' => 0,
'default' => [1]},
# Enable showing size of blobs in a 'tree' view, in a separate
# column, similar to what 'ls -l' does. This cost a bit of IO.
# To disable system wide have in $GITWEB_CONFIG
# $feature{'show-sizes'}{'default'} = [0];
# To have project specific config enable override in $GITWEB_CONFIG
# $feature{'show-sizes'}{'override'} = 1;
# and in project config gitweb.showsizes = 0|1;
'show-sizes' => {
'sub' => sub { feature_bool('showsizes', @_) },
'override' => 0,
'default' => [1]},
# Make gitweb use an alternative format of the URLs which can be
# more readable and natural-looking: project name is embedded
# directly in the path and the query string contains other
# auxiliary information. All gitweb installations recognize
# URL in either format; this configures in which formats gitweb
# generates links.
# To enable system wide have in $GITWEB_CONFIG
# $feature{'pathinfo'}{'default'} = [1];
# Project specific override is not supported.
# Note that you will need to change the default location of CSS,
# favicon, logo and possibly other files to an absolute URL. Also,
# if gitweb.cgi serves as your indexfile, you will need to force
# $my_uri to contain the script name in your $GITWEB_CONFIG.
'pathinfo' => {
'override' => 0,
'default' => [0]},
# Make gitweb consider projects in project root subdirectories
# to be forks of existing projects. Given project $projname.git,
# projects matching $projname/*.git will not be shown in the main
# projects list, instead a '+' mark will be added to $projname
# there and a 'forks' view will be enabled for the project, listing
# all the forks. If project list is taken from a file, forks have
# to be listed after the main project.
# To enable system wide have in $GITWEB_CONFIG
# $feature{'forks'}{'default'} = [1];
# Project specific override is not supported.
'forks' => {
'override' => 0,
'default' => [0]},
# Insert custom links to the action bar of all project pages.
# This enables you mainly to link to third-party scripts integrating
# into gitweb; e.g. git-browser for graphical history representation
# or custom web-based repository administration interface.
# The 'default' value consists of a list of triplets in the form
# (label, link, position) where position is the label after which
# to insert the link and link is a format string where %n expands
# to the project name, %f to the project path within the filesystem,
# %h to the current hash (h gitweb parameter) and %b to the current
# hash base (hb gitweb parameter); %% expands to %.
# To enable system wide have in $GITWEB_CONFIG e.g.
# $feature{'actions'}{'default'} = [('graphiclog',
# '/git-browser/by-commit.html?r=%n', 'summary')];
# Project specific override is not supported.
'actions' => {
'override' => 0,
'default' => []},
# Allow gitweb scan project content tags of project repository,
# and display the popular Web 2.0-ish "tag cloud" near the projects
# list. Note that this is something COMPLETELY different from the
# normal Git tags.
# gitweb by itself can show existing tags, but it does not handle
# tagging itself; you need to do it externally, outside gitweb.
# The format is described in git_get_project_ctags() subroutine.
# You may want to install the HTML::TagCloud Perl module to get
# a pretty tag cloud instead of just a list of tags.
# To enable system wide have in $GITWEB_CONFIG
# $feature{'ctags'}{'default'} = [1];
# Project specific override is not supported.
# In the future whether ctags editing is enabled might depend
# on the value, but using 1 should always mean no editing of ctags.
'ctags' => {
'override' => 0,
'default' => [0]},
# The maximum number of patches in a patchset generated in patch
# view. Set this to 0 or undef to disable patch view, or to a
# negative number to remove any limit.
# To disable system wide have in $GITWEB_CONFIG
# $feature{'patches'}{'default'} = [0];
# To have project specific config enable override in $GITWEB_CONFIG
# $feature{'patches'}{'override'} = 1;
# and in project config gitweb.patches = 0|n;
# where n is the maximum number of patches allowed in a patchset.
'patches' => {
'sub' => \&feature_patches,
'override' => 0,
'default' => [16]},
# Avatar support. When this feature is enabled, views such as
# shortlog or commit will display an avatar associated with
# the email of the committer(s) and/or author(s).
# Currently available providers are gravatar and picon.
# If an unknown provider is specified, the feature is disabled.
# Picon currently relies on the indiana.edu database.
# To enable system wide have in $GITWEB_CONFIG
# $feature{'avatar'}{'default'} = ['<provider>'];
# where <provider> is either gravatar or picon.
# To have project specific config enable override in $GITWEB_CONFIG
# $feature{'avatar'}{'override'} = 1;
# and in project config gitweb.avatar = <provider>;
'avatar' => {
'sub' => \&feature_avatar,
'override' => 0,
'default' => ['']},
# Enable displaying how much time and how many git commands
# it took to generate and display page. Disabled by default.
# Project specific override is not supported.
'timed' => {
'override' => 0,
'default' => [0]},
# Enable turning some links into links to actions which require
# JavaScript to run (like 'blame_incremental'). Not enabled by
# default. Project specific override is currently not supported.
'javascript-actions' => {
'override' => 0,
'default' => [0]},
# Enable and configure ability to change common timezone for dates
# in gitweb output via JavaScript. Enabled by default.
# Project specific override is not supported.
'javascript-timezone' => {
'override' => 0,
'default' => [
'local', # default timezone: 'utc', 'local', or '(-|+)HHMM' format,
# or undef to turn off this feature
'gitweb_tz', # name of cookie where to store selected timezone
'datetime', # CSS class used to mark up dates for manipulation
]},
# Syntax highlighting support. This is based on Daniel Svensson's
# and Sham Chukoury's work in gitweb-xmms2.git.
# It requires the 'highlight' program present in $PATH,
# and therefore is disabled by default.
# To enable system wide have in $GITWEB_CONFIG
# $feature{'highlight'}{'default'} = [1];
'highlight' => {
'sub' => sub { feature_bool('highlight', @_) },
'override' => 0,
'default' => [0]},
# Enable displaying of remote heads in the heads list
# To enable system wide have in $GITWEB_CONFIG
# $feature{'remote_heads'}{'default'} = [1];
# To have project specific config enable override in $GITWEB_CONFIG
# $feature{'remote_heads'}{'override'} = 1;
# and in project config gitweb.remoteheads = 0|1;
'remote_heads' => {
'sub' => sub { feature_bool('remote_heads', @_) },
'override' => 0,
'default' => [0]},
# Enable showing branches under other refs in addition to heads
# To set system wide extra branch refs have in $GITWEB_CONFIG
# $feature{'extra-branch-refs'}{'default'} = ['dirs', 'of', 'choice'];
# To have project specific config enable override in $GITWEB_CONFIG
# $feature{'extra-branch-refs'}{'override'} = 1;
# and in project config gitweb.extrabranchrefs = dirs of choice
# Every directory is separated with whitespace.
'extra-branch-refs' => {
'sub' => \&feature_extra_branch_refs,
'override' => 0,
'default' => []},
);
sub gitweb_get_feature {
my ($name) = @_;
return unless exists $feature{$name};
my ($sub, $override, @defaults) = (
$feature{$name}{'sub'},
$feature{$name}{'override'},
@{$feature{$name}{'default'}});
# project specific override is possible only if we have project
our $git_dir; # global variable, declared later
if (!$override || !defined $git_dir) {
return @defaults;
}
if (!defined $sub) {
warn "feature $name is not overridable";
return @defaults;
}
return $sub->(@defaults);
}
# A wrapper to check if a given feature is enabled.
# With this, you can say
#
# my $bool_feat = gitweb_check_feature('bool_feat');
# gitweb_check_feature('bool_feat') or somecode;
#
# instead of
#
# my ($bool_feat) = gitweb_get_feature('bool_feat');
# (gitweb_get_feature('bool_feat'))[0] or somecode;
#
sub gitweb_check_feature {
return (gitweb_get_feature(@_))[0];
}
sub feature_bool {
my $key = shift;
my ($val) = git_get_project_config($key, '--bool');
if (!defined $val) {
return ($_[0]);
} elsif ($val eq 'true') {
return (1);
} elsif ($val eq 'false') {
return (0);
}
}
sub feature_snapshot {
my (@fmts) = @_;
my ($val) = git_get_project_config('snapshot');
if ($val) {
@fmts = ($val eq 'none' ? () : split /\s*[,\s]\s*/, $val);
}
return @fmts;
}
sub feature_patches {
my @val = (git_get_project_config('patches', '--int'));
if (@val) {
return @val;
}
return ($_[0]);
}
sub feature_avatar {
my @val = (git_get_project_config('avatar'));
return @val ? @val : @_;
}
sub feature_extra_branch_refs {
my (@branch_refs) = @_;
my $values = git_get_project_config('extrabranchrefs');
if ($values) {
$values = config_to_multi ($values);
@branch_refs = ();
foreach my $value (@{$values}) {
push @branch_refs, split /\s+/, $value;
}
}
return @branch_refs;
}
# checking HEAD file with -e is fragile if the repository was
# initialized long time ago (i.e. symlink HEAD) and was pack-ref'ed
# and then pruned.
sub check_head_link {
my ($dir) = @_;
my $headfile = "$dir/HEAD";
return ((-e $headfile) ||
(-l $headfile && readlink($headfile) =~ /^refs\/heads\//));
}
sub check_export_ok {
my ($dir) = @_;
return (check_head_link($dir) &&
(!$export_ok || -e "$dir/$export_ok") &&
(!$export_auth_hook || $export_auth_hook->($dir)));
}
# process alternate names for backward compatibility
# filter out unsupported (unknown) snapshot formats
sub filter_snapshot_fmts {
my @fmts = @_;
@fmts = map {
exists $known_snapshot_format_aliases{$_} ?
$known_snapshot_format_aliases{$_} : $_} @fmts;
@fmts = grep {
exists $known_snapshot_formats{$_} &&
!$known_snapshot_formats{$_}{'disabled'}} @fmts;
}
sub filter_and_validate_refs {
my @refs = @_;
my %unique_refs = ();
foreach my $ref (@refs) {
die_error(500, "Invalid ref '$ref' in 'extra-branch-refs' feature") unless (is_valid_ref_format($ref));
# 'heads' are added implicitly in get_branch_refs().
$unique_refs{$ref} = 1 if ($ref ne 'heads');
}
return sort keys %unique_refs;
}
# If it is set to code reference, it is code that it is to be run once per
# request, allowing updating configurations that change with each request,
# while running other code in config file only once.
#
# Otherwise, if it is false then gitweb would process config file only once;
# if it is true then gitweb config would be run for each request.
our $per_request_config = 1;
# read and parse gitweb config file given by its parameter.
# returns true on success, false on recoverable error, allowing
# to chain this subroutine, using first file that exists.
# dies on errors during parsing config file, as it is unrecoverable.
sub read_config_file {
my $filename = shift;
return unless defined $filename;
# die if there are errors parsing config file
if (-e $filename) {
do $filename;
die $@ if $@;
return 1;
}
return;
}
our ($GITWEB_CONFIG, $GITWEB_CONFIG_SYSTEM, $GITWEB_CONFIG_COMMON);
sub evaluate_gitweb_config {
our $GITWEB_CONFIG = $ENV{'GITWEB_CONFIG'} || "++GITWEB_CONFIG++";
our $GITWEB_CONFIG_SYSTEM = $ENV{'GITWEB_CONFIG_SYSTEM'} || "++GITWEB_CONFIG_SYSTEM++";
our $GITWEB_CONFIG_COMMON = $ENV{'GITWEB_CONFIG_COMMON'} || "++GITWEB_CONFIG_COMMON++";
# Protect against duplications of file names, to not read config twice.
# Only one of $GITWEB_CONFIG and $GITWEB_CONFIG_SYSTEM is used, so
# there possibility of duplication of filename there doesn't matter.
$GITWEB_CONFIG = "" if ($GITWEB_CONFIG eq $GITWEB_CONFIG_COMMON);
$GITWEB_CONFIG_SYSTEM = "" if ($GITWEB_CONFIG_SYSTEM eq $GITWEB_CONFIG_COMMON);
# Common system-wide settings for convenience.
# Those settings can be ovverriden by GITWEB_CONFIG or GITWEB_CONFIG_SYSTEM.
read_config_file($GITWEB_CONFIG_COMMON);
# Use first config file that exists. This means use the per-instance
# GITWEB_CONFIG if exists, otherwise use GITWEB_SYSTEM_CONFIG.
read_config_file($GITWEB_CONFIG) and return;
read_config_file($GITWEB_CONFIG_SYSTEM);
}
# Get loadavg of system, to compare against $maxload.
# Currently it requires '/proc/loadavg' present to get loadavg;
# if it is not present it returns 0, which means no load checking.
sub get_loadavg {
if( -e '/proc/loadavg' ){
open my $fd, '<', '/proc/loadavg'
or return 0;
my @load = split(/\s+/, scalar <$fd>);
close $fd;
# The first three columns measure CPU and IO utilization of the last one,
# five, and 10 minute periods. The fourth column shows the number of
# currently running processes and the total number of processes in the m/n
# format. The last column displays the last process ID used.
return $load[0] || 0;
}
# additional checks for load average should go here for things that don't export
# /proc/loadavg
return 0;
}
# version of the core git binary
our $git_version;
sub evaluate_git_version {
our $git_version = qx("$GIT" --version) =~ m/git version (.*)$/ ? $1 : "unknown";
$number_of_git_cmds++;
}
sub check_loadavg {
if (defined $maxload && get_loadavg() > $maxload) {
die_error(503, "The load average on the server is too high");
}
}
# ======================================================================
# input validation and dispatch
# input parameters can be collected from a variety of sources (presently, CGI
# and PATH_INFO), so we define an %input_params hash that collects them all
# together during validation: this allows subsequent uses (e.g. href()) to be
# agnostic of the parameter origin
our %input_params = ();
# input parameters are stored with the long parameter name as key. This will
# also be used in the href subroutine to convert parameters to their CGI
# equivalent, and since the href() usage is the most frequent one, we store
# the name -> CGI key mapping here, instead of the reverse.
#
# XXX: Warning: If you touch this, check the search form for updating,
# too.
our @cgi_param_mapping = (
project => "p",
action => "a",
file_name => "f",
file_parent => "fp",
hash => "h",
hash_parent => "hp",
hash_base => "hb",
hash_parent_base => "hpb",
page => "pg",
order => "o",
searchtext => "s",
searchtype => "st",
snapshot_format => "sf",
extra_options => "opt",
search_use_regexp => "sr",
ctag => "by_tag",
diff_style => "ds",
project_filter => "pf",
# this must be last entry (for manipulation from JavaScript)
javascript => "js"
);
our %cgi_param_mapping = @cgi_param_mapping;
# we will also need to know the possible actions, for validation
our %actions = (
"blame" => \&git_blame,
"blame_incremental" => \&git_blame_incremental,
"blame_data" => \&git_blame_data,
"blobdiff" => \&git_blobdiff,
"blobdiff_plain" => \&git_blobdiff_plain,
"blob" => \&git_blob,
"blob_plain" => \&git_blob_plain,
"commitdiff" => \&git_commitdiff,
"commitdiff_plain" => \&git_commitdiff_plain,
"commit" => \&git_commit,
"forks" => \&git_forks,
"heads" => \&git_heads,
"history" => \&git_history,
"log" => \&git_log,
"patch" => \&git_patch,
"patches" => \&git_patches,
"remotes" => \&git_remotes,
"rss" => \&git_rss,
"atom" => \&git_atom,
"search" => \&git_search,
"search_help" => \&git_search_help,
"shortlog" => \&git_shortlog,
"summary" => \&git_summary,
"tag" => \&git_tag,
"tags" => \&git_tags,
"tree" => \&git_tree,
"snapshot" => \&git_snapshot,
"object" => \&git_object,
# those below don't need $project
"opml" => \&git_opml,
"project_list" => \&git_project_list,
"project_index" => \&git_project_index,
);
# finally, we have the hash of allowed extra_options for the commands that
# allow them
our %allowed_options = (
"--no-merges" => [ qw(rss atom log shortlog history) ],
);
# fill %input_params with the CGI parameters. All values except for 'opt'
# should be single values, but opt can be an array. We should probably
# build an array of parameters that can be multi-valued, but since for the time
# being it's only this one, we just single it out
sub evaluate_query_params {
our $cgi;
while (my ($name, $symbol) = each %cgi_param_mapping) {
if ($symbol eq 'opt') {
$input_params{$name} = [ map { decode_utf8($_) } $cgi->multi_param($symbol) ];
} else {
$input_params{$name} = decode_utf8($cgi->param($symbol));
}
}
}
# now read PATH_INFO and update the parameter list for missing parameters
sub evaluate_path_info {
return if defined $input_params{'project'};
return if !$path_info;
$path_info =~ s,^/+,,;
return if !$path_info;
# find which part of PATH_INFO is project
my $project = $path_info;
$project =~ s,/+$,,;
while ($project && !check_head_link("$projectroot/$project")) {
$project =~ s,/*[^/]*$,,;
}
return unless $project;
$input_params{'project'} = $project;
# do not change any parameters if an action is given using the query string
return if $input_params{'action'};
$path_info =~ s,^\Q$project\E/*,,;
# next, check if we have an action
my $action = $path_info;
$action =~ s,/.*$,,;
if (exists $actions{$action}) {
$path_info =~ s,^$action/*,,;
$input_params{'action'} = $action;
}
# list of actions that want hash_base instead of hash, but can have no
# pathname (f) parameter
my @wants_base = (
'tree',
'history',
);
# we want to catch, among others
# [$hash_parent_base[:$file_parent]..]$hash_parent[:$file_name]
my ($parentrefname, $parentpathname, $refname, $pathname) =
($path_info =~ /^(?:(.+?)(?::(.+))?\.\.)?([^:]+?)?(?::(.+))?$/);
# first, analyze the 'current' part
if (defined $pathname) {
# we got "branch:filename" or "branch:dir/"
# we could use git_get_type(branch:pathname), but:
# - it needs $git_dir
# - it does a git() call
# - the convention of terminating directories with a slash
# makes it superfluous
# - embedding the action in the PATH_INFO would make it even
# more superfluous
$pathname =~ s,^/+,,;
if (!$pathname || substr($pathname, -1) eq "/") {
$input_params{'action'} ||= "tree";
$pathname =~ s,/$,,;
} else {
# the default action depends on whether we had parent info
# or not
if ($parentrefname) {
$input_params{'action'} ||= "blobdiff_plain";
} else {
$input_params{'action'} ||= "blob_plain";
}
}
$input_params{'hash_base'} ||= $refname;
$input_params{'file_name'} ||= $pathname;
} elsif (defined $refname) {
# we got "branch". In this case we have to choose if we have to
# set hash or hash_base.
#
# Most of the actions without a pathname only want hash to be
# set, except for the ones specified in @wants_base that want
# hash_base instead. It should also be noted that hand-crafted
# links having 'history' as an action and no pathname or hash
# set will fail, but that happens regardless of PATH_INFO.
if (defined $parentrefname) {
# if there is parent let the default be 'shortlog' action
# (for http://git.example.com/repo.git/A..B links); if there
# is no parent, dispatch will detect type of object and set
# action appropriately if required (if action is not set)
$input_params{'action'} ||= "shortlog";
}
if ($input_params{'action'} &&
grep { $_ eq $input_params{'action'} } @wants_base) {
$input_params{'hash_base'} ||= $refname;
} else {
$input_params{'hash'} ||= $refname;
}
}
# next, handle the 'parent' part, if present
if (defined $parentrefname) {
# a missing pathspec defaults to the 'current' filename, allowing e.g.
# someproject/blobdiff/oldrev..newrev:/filename
if ($parentpathname) {
$parentpathname =~ s,^/+,,;
$parentpathname =~ s,/$,,;
$input_params{'file_parent'} ||= $parentpathname;
} else {
$input_params{'file_parent'} ||= $input_params{'file_name'};
}
# we assume that hash_parent_base is wanted if a path was specified,
# or if the action wants hash_base instead of hash
if (defined $input_params{'file_parent'} ||
grep { $_ eq $input_params{'action'} } @wants_base) {
$input_params{'hash_parent_base'} ||= $parentrefname;
} else {
$input_params{'hash_parent'} ||= $parentrefname;
}
}
# for the snapshot action, we allow URLs in the form
# $project/snapshot/$hash.ext
# where .ext determines the snapshot and gets removed from the
# passed $refname to provide the $hash.
#
# To be able to tell that $refname includes the format extension, we
# require the following two conditions to be satisfied:
# - the hash input parameter MUST have been set from the $refname part
# of the URL (i.e. they must be equal)
# - the snapshot format MUST NOT have been defined already (e.g. from
# CGI parameter sf)
# It's also useless to try any matching unless $refname has a dot,
# so we check for that too
if (defined $input_params{'action'} &&
$input_params{'action'} eq 'snapshot' &&
defined $refname && index($refname, '.') != -1 &&
$refname eq $input_params{'hash'} &&
!defined $input_params{'snapshot_format'}) {
# We loop over the known snapshot formats, checking for
# extensions. Allowed extensions are both the defined suffix
# (which includes the initial dot already) and the snapshot
# format key itself, with a prepended dot
while (my ($fmt, $opt) = each %known_snapshot_formats) {
my $hash = $refname;
unless ($hash =~ s/(\Q$opt->{'suffix'}\E|\Q.$fmt\E)$//) {
next;
}
my $sfx = $1;
# a valid suffix was found, so set the snapshot format
# and reset the hash parameter
$input_params{'snapshot_format'} = $fmt;
$input_params{'hash'} = $hash;
# we also set the format suffix to the one requested
# in the URL: this way a request for e.g. .tgz returns
# a .tgz instead of a .tar.gz
$known_snapshot_formats{$fmt}{'suffix'} = $sfx;
last;
}
}
}
our ($action, $project, $file_name, $file_parent, $hash, $hash_parent, $hash_base,
$hash_parent_base, @extra_options, $page, $searchtype, $search_use_regexp,
$searchtext, $search_regexp, $project_filter);
sub evaluate_and_validate_params {
our $action = $input_params{'action'};
if (defined $action) {
if (!is_valid_action($action)) {
die_error(400, "Invalid action parameter");
}
}
# parameters which are pathnames
our $project = $input_params{'project'};
if (defined $project) {
if (!is_valid_project($project)) {
undef $project;
die_error(404, "No such project");
}
}
our $project_filter = $input_params{'project_filter'};
if (defined $project_filter) {
if (!is_valid_pathname($project_filter)) {
die_error(404, "Invalid project_filter parameter");
}
}
our $file_name = $input_params{'file_name'};
if (defined $file_name) {
if (!is_valid_pathname($file_name)) {
die_error(400, "Invalid file parameter");
}
}
our $file_parent = $input_params{'file_parent'};
if (defined $file_parent) {
if (!is_valid_pathname($file_parent)) {
die_error(400, "Invalid file parent parameter");
}
}
# parameters which are refnames
our $hash = $input_params{'hash'};
if (defined $hash) {
if (!is_valid_refname($hash)) {
die_error(400, "Invalid hash parameter");
}
}
our $hash_parent = $input_params{'hash_parent'};
if (defined $hash_parent) {
if (!is_valid_refname($hash_parent)) {
die_error(400, "Invalid hash parent parameter");
}
}
our $hash_base = $input_params{'hash_base'};
if (defined $hash_base) {
if (!is_valid_refname($hash_base)) {
die_error(400, "Invalid hash base parameter");
}
}
our @extra_options = @{$input_params{'extra_options'}};
# @extra_options is always defined, since it can only be (currently) set from
# CGI, and $cgi->param() returns the empty array in array context if the param
# is not set
foreach my $opt (@extra_options) {
if (not exists $allowed_options{$opt}) {
die_error(400, "Invalid option parameter");
}
if (not grep(/^$action$/, @{$allowed_options{$opt}})) {
die_error(400, "Invalid option parameter for this action");
}
}
our $hash_parent_base = $input_params{'hash_parent_base'};
if (defined $hash_parent_base) {
if (!is_valid_refname($hash_parent_base)) {
die_error(400, "Invalid hash parent base parameter");
}
}
# other parameters
our $page = $input_params{'page'};
if (defined $page) {
if ($page =~ m/[^0-9]/) {
die_error(400, "Invalid page parameter");
}
}
our $searchtype = $input_params{'searchtype'};
if (defined $searchtype) {
if ($searchtype =~ m/[^a-z]/) {
die_error(400, "Invalid searchtype parameter");
}
}
our $search_use_regexp = $input_params{'search_use_regexp'};
our $searchtext = $input_params{'searchtext'};
our $search_regexp = undef;
if (defined $searchtext) {
if (length($searchtext) < 2) {
die_error(403, "At least two characters are required for search parameter");
}
if ($search_use_regexp) {
$search_regexp = $searchtext;
if (!eval { qr/$search_regexp/; 1; }) {
(my $error = $@) =~ s/ at \S+ line \d+.*\n?//;
die_error(400, "Invalid search regexp '$search_regexp'",
esc_html($error));
}
} else {
$search_regexp = quotemeta $searchtext;
}
}
}
# path to the current git repository
our $git_dir;
sub evaluate_git_dir {
our $git_dir = "$projectroot/$project" if $project;
}
our (@snapshot_fmts, $git_avatar, @extra_branch_refs);
sub configure_gitweb_features {
# list of supported snapshot formats
our @snapshot_fmts = gitweb_get_feature('snapshot');
@snapshot_fmts = filter_snapshot_fmts(@snapshot_fmts);
our ($git_avatar) = gitweb_get_feature('avatar');
$git_avatar = '' unless $git_avatar =~ /^(?:gravatar|picon)$/s;
our @extra_branch_refs = gitweb_get_feature('extra-branch-refs');
@extra_branch_refs = filter_and_validate_refs (@extra_branch_refs);
}
sub get_branch_refs {
return ('heads', @extra_branch_refs);
}
# custom error handler: 'die <message>' is Internal Server Error
sub handle_errors_html {
my $msg = shift; # it is already HTML escaped
# to avoid infinite loop where error occurs in die_error,
# change handler to default handler, disabling handle_errors_html
set_message("Error occurred when inside die_error:\n$msg");
# you cannot jump out of die_error when called as error handler;
# the subroutine set via CGI::Carp::set_message is called _after_
# HTTP headers are already written, so it cannot write them itself
die_error(undef, undef, $msg, -error_handler => 1, -no_http_header => 1);
}
set_message(\&handle_errors_html);
# dispatch
sub dispatch {
if (!defined $action) {
if (defined $hash) {
$action = git_get_type($hash);
$action or die_error(404, "Object does not exist");
} elsif (defined $hash_base && defined $file_name) {
$action = git_get_type("$hash_base:$file_name");
$action or die_error(404, "File or directory does not exist");
} elsif (defined $project) {
$action = 'summary';
} else {
$action = 'project_list';
}
}
if (!defined($actions{$action})) {
die_error(400, "Unknown action");
}
if ($action !~ m/^(?:opml|project_list|project_index)$/ &&
!$project) {
die_error(400, "Project needed");
}
$actions{$action}->();
}
sub reset_timer {
our $t0 = [ gettimeofday() ]
if defined $t0;
our $number_of_git_cmds = 0;
}
our $first_request = 1;
sub run_request {
reset_timer();
evaluate_uri();
if ($first_request) {
evaluate_gitweb_config();
evaluate_git_version();
}
if ($per_request_config) {
if (ref($per_request_config) eq 'CODE') {
$per_request_config->();
} elsif (!$first_request) {
evaluate_gitweb_config();
}
}
check_loadavg();
# $projectroot and $projects_list might be set in gitweb config file
$projects_list ||= $projectroot;
evaluate_query_params();
evaluate_path_info();
evaluate_and_validate_params();
evaluate_git_dir();
configure_gitweb_features();
dispatch();
}
our $is_last_request = sub { 1 };
our ($pre_dispatch_hook, $post_dispatch_hook, $pre_listen_hook);
our $CGI = 'CGI';
our $cgi;
sub configure_as_fcgi {
require CGI::Fast;
our $CGI = 'CGI::Fast';
my $request_number = 0;
# let each child service 100 requests
our $is_last_request = sub { ++$request_number > 100 };
}
sub evaluate_argv {
my $script_name = $ENV{'SCRIPT_NAME'} || $ENV{'SCRIPT_FILENAME'} || __FILE__;
configure_as_fcgi()
if $script_name =~ /\.fcgi$/;
return unless (@ARGV);
require Getopt::Long;
Getopt::Long::GetOptions(
'fastcgi|fcgi|f' => \&configure_as_fcgi,
'nproc|n=i' => sub {
my ($arg, $val) = @_;
return unless eval { require FCGI::ProcManager; 1; };
my $proc_manager = FCGI::ProcManager->new({
n_processes => $val,
});
our $pre_listen_hook = sub { $proc_manager->pm_manage() };
our $pre_dispatch_hook = sub { $proc_manager->pm_pre_dispatch() };
our $post_dispatch_hook = sub { $proc_manager->pm_post_dispatch() };
},
);
}
sub run {
evaluate_argv();
$first_request = 1;
$pre_listen_hook->()
if $pre_listen_hook;
REQUEST:
while ($cgi = $CGI->new()) {
$pre_dispatch_hook->()
if $pre_dispatch_hook;
run_request();
$post_dispatch_hook->()
if $post_dispatch_hook;
$first_request = 0;
last REQUEST if ($is_last_request->());
}
DONE_GITWEB:
1;
}
run();
if (defined caller) {
# wrapped in a subroutine processing requests,
# e.g. mod_perl with ModPerl::Registry, or PSGI with Plack::App::WrapCGI
return;
} else {
# pure CGI script, serving single request
exit;
}
## ======================================================================
## action links
# possible values of extra options
# -full => 0|1 - use absolute/full URL ($my_uri/$my_url as base)
# -replay => 1 - start from a current view (replay with modifications)
# -path_info => 0|1 - don't use/use path_info URL (if possible)
# -anchor => ANCHOR - add #ANCHOR to end of URL, implies -replay if used alone
sub href {
my %params = @_;
# default is to use -absolute url() i.e. $my_uri
my $href = $params{-full} ? $my_url : $my_uri;
# implicit -replay, must be first of implicit params
$params{-replay} = 1 if (keys %params == 1 && $params{-anchor});
$params{'project'} = $project unless exists $params{'project'};
if ($params{-replay}) {
while (my ($name, $symbol) = each %cgi_param_mapping) {
if (!exists $params{$name}) {
$params{$name} = $input_params{$name};
}
}
}
my $use_pathinfo = gitweb_check_feature('pathinfo');
if (defined $params{'project'} &&
(exists $params{-path_info} ? $params{-path_info} : $use_pathinfo)) {
# try to put as many parameters as possible in PATH_INFO:
# - project name
# - action
# - hash_parent or hash_parent_base:/file_parent
# - hash or hash_base:/filename
# - the snapshot_format as an appropriate suffix
# When the script is the root DirectoryIndex for the domain,
# $href here would be something like http://gitweb.example.com/
# Thus, we strip any trailing / from $href, to spare us double
# slashes in the final URL
$href =~ s,/$,,;
# Then add the project name, if present
$href .= "/".esc_path_info($params{'project'});
delete $params{'project'};
# since we destructively absorb parameters, we keep this
# boolean that remembers if we're handling a snapshot
my $is_snapshot = $params{'action'} eq 'snapshot';
# Summary just uses the project path URL, any other action is
# added to the URL
if (defined $params{'action'}) {
$href .= "/".esc_path_info($params{'action'})
unless $params{'action'} eq 'summary';
delete $params{'action'};
}
# Next, we put hash_parent_base:/file_parent..hash_base:/file_name,
# stripping nonexistent or useless pieces
$href .= "/" if ($params{'hash_base'} || $params{'hash_parent_base'}
|| $params{'hash_parent'} || $params{'hash'});
if (defined $params{'hash_base'}) {
if (defined $params{'hash_parent_base'}) {
$href .= esc_path_info($params{'hash_parent_base'});
# skip the file_parent if it's the same as the file_name
if (defined $params{'file_parent'}) {
if (defined $params{'file_name'} && $params{'file_parent'} eq $params{'file_name'}) {
delete $params{'file_parent'};
} elsif ($params{'file_parent'} !~ /\.\./) {
$href .= ":/".esc_path_info($params{'file_parent'});
delete $params{'file_parent'};
}
}
$href .= "..";
delete $params{'hash_parent'};
delete $params{'hash_parent_base'};
} elsif (defined $params{'hash_parent'}) {
$href .= esc_path_info($params{'hash_parent'}). "..";
delete $params{'hash_parent'};
}
$href .= esc_path_info($params{'hash_base'});
if (defined $params{'file_name'} && $params{'file_name'} !~ /\.\./) {
$href .= ":/".esc_path_info($params{'file_name'});
delete $params{'file_name'};
}
delete $params{'hash'};
delete $params{'hash_base'};
} elsif (defined $params{'hash'}) {
$href .= esc_path_info($params{'hash'});
delete $params{'hash'};
}
# If the action was a snapshot, we can absorb the
# snapshot_format parameter too
if ($is_snapshot) {
my $fmt = $params{'snapshot_format'};
# snapshot_format should always be defined when href()
# is called, but just in case some code forgets, we
# fall back to the default
$fmt ||= $snapshot_fmts[0];
$href .= $known_snapshot_formats{$fmt}{'suffix'};
delete $params{'snapshot_format'};
}
}
# now encode the parameters explicitly
my @result = ();
for (my $i = 0; $i < @cgi_param_mapping; $i += 2) {
my ($name, $symbol) = ($cgi_param_mapping[$i], $cgi_param_mapping[$i+1]);
if (defined $params{$name}) {
if (ref($params{$name}) eq "ARRAY") {
foreach my $par (@{$params{$name}}) {
push @result, $symbol . "=" . esc_param($par);
}
} else {
push @result, $symbol . "=" . esc_param($params{$name});
}
}
}
$href .= "?" . join(';', @result) if scalar @result;
# final transformation: trailing spaces must be escaped (URI-encoded)
$href =~ s/(\s+)$/CGI::escape($1)/e;
if ($params{-anchor}) {
$href .= "#".esc_param($params{-anchor});
}
return $href;
}
## ======================================================================
## validation, quoting/unquoting and escaping
sub is_valid_action {
my $input = shift;
return undef unless exists $actions{$input};
return 1;
}
sub is_valid_project {
my $input = shift;
return unless defined $input;
if (!is_valid_pathname($input) ||
!(-d "$projectroot/$input") ||
!check_export_ok("$projectroot/$input") ||
($strict_export && !project_in_list($input))) {
return undef;
} else {
return 1;
}
}
sub is_valid_pathname {
my $input = shift;
return undef unless defined $input;
# no '.' or '..' as elements of path, i.e. no '.' or '..'
# at the beginning, at the end, and between slashes.
# also this catches doubled slashes
if ($input =~ m!(^|/)(|\.|\.\.)(/|$)!) {
return undef;
}
# no null characters
if ($input =~ m!\0!) {
return undef;
}
return 1;
}
sub is_valid_ref_format {
my $input = shift;
return undef unless defined $input;
# restrictions on ref name according to git-check-ref-format
if ($input =~ m!(/\.|\.\.|[\000-\040\177 ~^:?*\[]|/$)!) {
return undef;
}
return 1;
}
sub is_valid_refname {
my $input = shift;
return undef unless defined $input;
# textual hashes are O.K.
if ($input =~ m/^[0-9a-fA-F]{40}$/) {
return 1;
}
# it must be correct pathname
is_valid_pathname($input) or return undef;
# check git-check-ref-format restrictions
is_valid_ref_format($input) or return undef;
return 1;
}
# decode sequences of octets in utf8 into Perl's internal form,
# which is utf-8 with utf8 flag set if needed. gitweb writes out
# in utf-8 thanks to "binmode STDOUT, ':utf8'" at beginning
sub to_utf8 {
my $str = shift;
return undef unless defined $str;
if (utf8::is_utf8($str) || utf8::decode($str)) {
return $str;
} else {
return decode($fallback_encoding, $str, Encode::FB_DEFAULT);
}
}
# quote unsafe chars, but keep the slash, even when it's not
# correct, but quoted slashes look too horrible in bookmarks
sub esc_param {
my $str = shift;
return undef unless defined $str;
$str =~ s/([^A-Za-z0-9\-_.~()\/:@ ]+)/CGI::escape($1)/eg;
$str =~ s/ /\+/g;
return $str;
}
# the quoting rules for path_info fragment are slightly different
sub esc_path_info {
my $str = shift;
return undef unless defined $str;
# path_info doesn't treat '+' as space (specially), but '?' must be escaped
$str =~ s/([^A-Za-z0-9\-_.~();\/;:@&= +]+)/CGI::escape($1)/eg;
return $str;
}
# quote unsafe chars in whole URL, so some characters cannot be quoted
sub esc_url {
my $str = shift;
return undef unless defined $str;
$str =~ s/([^A-Za-z0-9\-_.~();\/;?:@&= ]+)/CGI::escape($1)/eg;
$str =~ s/ /\+/g;
return $str;
}
# quote unsafe characters in HTML attributes
sub esc_attr {
# for XHTML conformance escaping '"' to '&quot;' is not enough
return esc_html(@_);
}
# replace invalid utf8 character with SUBSTITUTION sequence
sub esc_html {
my $str = shift;
my %opts = @_;
return undef unless defined $str;
$str = to_utf8($str);
$str = $cgi->escapeHTML($str);
if ($opts{'-nbsp'}) {
$str =~ s/ /&nbsp;/g;
}
$str =~ s|([[:cntrl:]])|(($1 ne "\t") ? quot_cec($1) : $1)|eg;
return $str;
}
# quote control characters and escape filename to HTML
sub esc_path {
my $str = shift;
my %opts = @_;
return undef unless defined $str;
$str = to_utf8($str);
$str = $cgi->escapeHTML($str);
if ($opts{'-nbsp'}) {
$str =~ s/ /&nbsp;/g;
}
$str =~ s|([[:cntrl:]])|quot_cec($1)|eg;
return $str;
}
# Sanitize for use in XHTML + application/xml+xhtml (valid XML 1.0)
sub sanitize {
my $str = shift;
return undef unless defined $str;
$str = to_utf8($str);
$str =~ s|([[:cntrl:]])|(index("\t\n\r", $1) != -1 ? $1 : quot_cec($1))|eg;
return $str;
}
# Make control characters "printable", using character escape codes (CEC)
sub quot_cec {
my $cntrl = shift;
my %opts = @_;
my %es = ( # character escape codes, aka escape sequences
"\t" => '\t', # tab (HT)
"\n" => '\n', # line feed (LF)
"\r" => '\r', # carrige return (CR)
"\f" => '\f', # form feed (FF)
"\b" => '\b', # backspace (BS)
"\a" => '\a', # alarm (bell) (BEL)
"\e" => '\e', # escape (ESC)
"\013" => '\v', # vertical tab (VT)
"\000" => '\0', # nul character (NUL)
);
my $chr = ( (exists $es{$cntrl})
? $es{$cntrl}
: sprintf('\%2x', ord($cntrl)) );
if ($opts{-nohtml}) {
return $chr;
} else {
return "<span class=\"cntrl\">$chr</span>";
}
}
# Alternatively use unicode control pictures codepoints,
# Unicode "printable representation" (PR)
sub quot_upr {
my $cntrl = shift;
my %opts = @_;
my $chr = sprintf('&#%04d;', 0x2400+ord($cntrl));
if ($opts{-nohtml}) {
return $chr;
} else {
return "<span class=\"cntrl\">$chr</span>";
}
}
# git may return quoted and escaped filenames
sub unquote {
my $str = shift;
sub unq {
my $seq = shift;
my %es = ( # character escape codes, aka escape sequences
't' => "\t", # tab (HT, TAB)
'n' => "\n", # newline (NL)
'r' => "\r", # return (CR)
'f' => "\f", # form feed (FF)
'b' => "\b", # backspace (BS)
'a' => "\a", # alarm (bell) (BEL)
'e' => "\e", # escape (ESC)
'v' => "\013", # vertical tab (VT)
);
if ($seq =~ m/^[0-7]{1,3}$/) {
# octal char sequence
return chr(oct($seq));
} elsif (exists $es{$seq}) {
# C escape sequence, aka character escape code
return $es{$seq};
}
# quoted ordinary character
return $seq;
}
if ($str =~ m/^"(.*)"$/) {
# needs unquoting
$str = $1;
$str =~ s/\\([^0-7]|[0-7]{1,3})/unq($1)/eg;
}
return $str;
}
# escape tabs (convert tabs to spaces)
sub untabify {
my $line = shift;
while ((my $pos = index($line, "\t")) != -1) {
if (my $count = (8 - ($pos % 8))) {
my $spaces = ' ' x $count;
$line =~ s/\t/$spaces/;
}
}
return $line;
}
sub project_in_list {
my $project = shift;
my @list = git_get_projects_list();
return @list && scalar(grep { $_->{'path'} eq $project } @list);
}
## ----------------------------------------------------------------------
## HTML aware string manipulation
# Try to chop given string on a word boundary between position
# $len and $len+$add_len. If there is no word boundary there,
# chop at $len+$add_len. Do not chop if chopped part plus ellipsis
# (marking chopped part) would be longer than given string.
sub chop_str {
my $str = shift;
my $len = shift;
my $add_len = shift || 10;
my $where = shift || 'right'; # 'left' | 'center' | 'right'
# Make sure perl knows it is utf8 encoded so we don't
# cut in the middle of a utf8 multibyte char.
$str = to_utf8($str);
# allow only $len chars, but don't cut a word if it would fit in $add_len
# if it doesn't fit, cut it if it's still longer than the dots we would add
# remove chopped character entities entirely
# when chopping in the middle, distribute $len into left and right part
# return early if chopping wouldn't make string shorter
if ($where eq 'center') {
return $str if ($len + 5 >= length($str)); # filler is length 5
$len = int($len/2);
} else {
return $str if ($len + 4 >= length($str)); # filler is length 4
}
# regexps: ending and beginning with word part up to $add_len
my $endre = qr/.{$len}\w{0,$add_len}/;
my $begre = qr/\w{0,$add_len}.{$len}/;
if ($where eq 'left') {
$str =~ m/^(.*?)($begre)$/;
my ($lead, $body) = ($1, $2);
if (length($lead) > 4) {
$lead = " ...";
}
return "$lead$body";
} elsif ($where eq 'center') {
$str =~ m/^($endre)(.*)$/;
my ($left, $str) = ($1, $2);
$str =~ m/^(.*?)($begre)$/;
my ($mid, $right) = ($1, $2);
if (length($mid) > 5) {
$mid = " ... ";
}
return "$left$mid$right";
} else {
$str =~ m/^($endre)(.*)$/;
my $body = $1;
my $tail = $2;
if (length($tail) > 4) {
$tail = "... ";
}
return "$body$tail";
}
}
# takes the same arguments as chop_str, but also wraps a <span> around the
# result with a title attribute if it does get chopped. Additionally, the
# string is HTML-escaped.
sub chop_and_escape_str {
my ($str) = @_;
my $chopped = chop_str(@_);
$str = to_utf8($str);
if ($chopped eq $str) {
return esc_html($chopped);
} else {
$str =~ s/[[:cntrl:]]/?/g;
return $cgi->span({-title=>$str}, esc_html($chopped));
}
}
# Highlight selected fragments of string, using given CSS class,
# and escape HTML. It is assumed that fragments do not overlap.
# Regions are passed as list of pairs (array references).
#
# Example: esc_html_hl_regions("foobar", "mark", [ 0, 3 ]) returns
# '<span class="mark">foo</span>bar'
sub esc_html_hl_regions {
my ($str, $css_class, @sel) = @_;
my %opts = grep { ref($_) ne 'ARRAY' } @sel;
@sel = grep { ref($_) eq 'ARRAY' } @sel;
return esc_html($str, %opts) unless @sel;
my $out = '';
my $pos = 0;
for my $s (@sel) {
my ($begin, $end) = @$s;
# Don't create empty <span> elements.
next if $end <= $begin;
my $escaped = esc_html(substr($str, $begin, $end - $begin),
%opts);
$out .= esc_html(substr($str, $pos, $begin - $pos), %opts)
if ($begin - $pos > 0);
$out .= $cgi->span({-class => $css_class}, $escaped);
$pos = $end;
}
$out .= esc_html(substr($str, $pos), %opts)
if ($pos < length($str));
return $out;
}
# return positions of beginning and end of each match
sub matchpos_list {
my ($str, $regexp) = @_;
return unless (defined $str && defined $regexp);
my @matches;
while ($str =~ /$regexp/g) {
push @matches, [$-[0], $+[0]];
}
return @matches;
}
# highlight match (if any), and escape HTML
sub esc_html_match_hl {
my ($str, $regexp) = @_;
return esc_html($str) unless defined $regexp;
my @matches = matchpos_list($str, $regexp);
return esc_html($str) unless @matches;
return esc_html_hl_regions($str, 'match', @matches);
}
# highlight match (if any) of shortened string, and escape HTML
sub esc_html_match_hl_chopped {
my ($str, $chopped, $regexp) = @_;
return esc_html_match_hl($str, $regexp) unless defined $chopped;
my @matches = matchpos_list($str, $regexp);
return esc_html($chopped) unless @matches;
# filter matches so that we mark chopped string
my $tail = "... "; # see chop_str
unless ($chopped =~ s/\Q$tail\E$//) {
$tail = '';
}
my $chop_len = length($chopped);
my $tail_len = length($tail);
my @filtered;
for my $m (@matches) {
if ($m->[0] > $chop_len) {
push @filtered, [ $chop_len, $chop_len + $tail_len ] if ($tail_len > 0);
last;
} elsif ($m->[1] > $chop_len) {
push @filtered, [ $m->[0], $chop_len + $tail_len ];
last;
}
push @filtered, $m;
}
return esc_html_hl_regions($chopped . $tail, 'match', @filtered);
}
## ----------------------------------------------------------------------
## functions returning short strings
# CSS class for given age value (in seconds)
sub age_class {
my $age = shift;
if (!defined $age) {
return "noage";
} elsif ($age < 60*60*2) {
return "age0";
} elsif ($age < 60*60*24*2) {
return "age1";
} else {
return "age2";
}
}
# convert age in seconds to "nn units ago" string
sub age_string {
my $age = shift;
my $age_str;
if ($age > 60*60*24*365*2) {
$age_str = (int $age/60/60/24/365);
$age_str .= " years ago";
} elsif ($age > 60*60*24*(365/12)*2) {
$age_str = int $age/60/60/24/(365/12);
$age_str .= " months ago";
} elsif ($age > 60*60*24*7*2) {
$age_str = int $age/60/60/24/7;
$age_str .= " weeks ago";
} elsif ($age > 60*60*24*2) {
$age_str = int $age/60/60/24;
$age_str .= " days ago";
} elsif ($age > 60*60*2) {
$age_str = int $age/60/60;
$age_str .= " hours ago";
} elsif ($age > 60*2) {
$age_str = int $age/60;
$age_str .= " min ago";
} elsif ($age > 2) {
$age_str = int $age;
$age_str .= " sec ago";
} else {
$age_str .= " right now";
}
return $age_str;
}
use constant {
S_IFINVALID => 0030000,
S_IFGITLINK => 0160000,
};
# submodule/subproject, a commit object reference
sub S_ISGITLINK {
my $mode = shift;
return (($mode & S_IFMT) == S_IFGITLINK)
}
# convert file mode in octal to symbolic file mode string
sub mode_str {
my $mode = oct shift;
if (S_ISGITLINK($mode)) {
return 'm---------';
} elsif (S_ISDIR($mode & S_IFMT)) {
return 'drwxr-xr-x';
} elsif (S_ISLNK($mode)) {
return 'lrwxrwxrwx';
} elsif (S_ISREG($mode)) {
# git cares only about the executable bit
if ($mode & S_IXUSR) {
return '-rwxr-xr-x';
} else {
return '-rw-r--r--';
};
} else {
return '----------';
}
}
# convert file mode in octal to file type string
sub file_type {
my $mode = shift;
if ($mode !~ m/^[0-7]+$/) {
return $mode;
} else {
$mode = oct $mode;
}
if (S_ISGITLINK($mode)) {
return "submodule";
} elsif (S_ISDIR($mode & S_IFMT)) {
return "directory";
} elsif (S_ISLNK($mode)) {
return "symlink";
} elsif (S_ISREG($mode)) {
return "file";
} else {
return "unknown";
}
}
# convert file mode in octal to file type description string
sub file_type_long {
my $mode = shift;
if ($mode !~ m/^[0-7]+$/) {
return $mode;
} else {
$mode = oct $mode;
}
if (S_ISGITLINK($mode)) {
return "submodule";
} elsif (S_ISDIR($mode & S_IFMT)) {
return "directory";
} elsif (S_ISLNK($mode)) {
return "symlink";
} elsif (S_ISREG($mode)) {
if ($mode & S_IXUSR) {
return "executable";
} else {
return "file";
};
} else {
return "unknown";
}
}
## ----------------------------------------------------------------------
## functions returning short HTML fragments, or transforming HTML fragments
## which don't belong to other sections
# format line of commit message.
sub format_log_line_html {
my $line = shift;
$line = esc_html($line, -nbsp=>1);
$line =~ s{
\b
(
# The output of "git describe", e.g. v2.10.0-297-gf6727b0
# or hadoop-20160921-113441-20-g094fb7d
(?<!-) # see strbuf_check_tag_ref(). Tags can't start with -
[A-Za-z0-9.-]+
(?!\.) # refs can't end with ".", see check_refname_format()
-g[0-9a-fA-F]{7,40}
|
# Just a normal looking Git SHA1
[0-9a-fA-F]{7,40}
)
\b
}{
$cgi->a({-href => href(action=>"object", hash=>$1),
-class => "text"}, $1);
}egx;
return $line;
}
# format marker of refs pointing to given object
# the destination action is chosen based on object type and current context:
# - for annotated tags, we choose the tag view unless it's the current view
# already, in which case we go to shortlog view
# - for other refs, we keep the current view if we're in history, shortlog or
# log view, and select shortlog otherwise
sub format_ref_marker {
my ($refs, $id) = @_;
my $markers = '';
if (defined $refs->{$id}) {
foreach my $ref (@{$refs->{$id}}) {
# this code exploits the fact that non-lightweight tags are the
# only indirect objects, and that they are the only objects for which
# we want to use tag instead of shortlog as action
my ($type, $name) = qw();
my $indirect = ($ref =~ s/\^\{\}$//);
# e.g. tags/v2.6.11 or heads/next
if ($ref =~ m!^(.*?)s?/(.*)$!) {
$type = $1;
$name = $2;
} else {
$type = "ref";
$name = $ref;
}
my $class = $type;
$class .= " indirect" if $indirect;
my $dest_action = "shortlog";
if ($indirect) {
$dest_action = "tag" unless $action eq "tag";
} elsif ($action =~ /^(history|(short)?log)$/) {
$dest_action = $action;
}
my $dest = "";
$dest .= "refs/" unless $ref =~ m!^refs/!;
$dest .= $ref;
my $link = $cgi->a({
-href => href(
action=>$dest_action,
hash=>$dest
)}, esc_html($name));
$markers .= " <span class=\"".esc_attr($class)."\" title=\"".esc_attr($ref)."\">" .
$link . "</span>";
}
}
if ($markers) {
return ' <span class="refs">'. $markers . '</span>';
} else {
return "";
}
}
# format, perhaps shortened and with markers, title line
sub format_subject_html {
my ($long, $short, $href, $extra) = @_;
$extra = '' unless defined($extra);
if (length($short) < length($long)) {
$long =~ s/[[:cntrl:]]/?/g;
return $cgi->a({-href => $href, -class => "list subject",
-title => to_utf8($long)},
esc_html($short)) . $extra;
} else {
return $cgi->a({-href => $href, -class => "list subject"},
esc_html($long)) . $extra;
}
}
# Rather than recomputing the url for an email multiple times, we cache it
# after the first hit. This gives a visible benefit in views where the avatar
# for the same email is used repeatedly (e.g. shortlog).
# The cache is shared by all avatar engines (currently gravatar only), which
# are free to use it as preferred. Since only one avatar engine is used for any
# given page, there's no risk for cache conflicts.
our %avatar_cache = ();
# Compute the picon url for a given email, by using the picon search service over at
# http://www.cs.indiana.edu/picons/search.html
sub picon_url {
my $email = lc shift;
if (!$avatar_cache{$email}) {
my ($user, $domain) = split('@', $email);
$avatar_cache{$email} =
"//www.cs.indiana.edu/cgi-pub/kinzler/piconsearch.cgi/" .
"$domain/$user/" .
"users+domains+unknown/up/single";
}
return $avatar_cache{$email};
}
# Compute the gravatar url for a given email, if it's not in the cache already.
# Gravatar stores only the part of the URL before the size, since that's the
# one computationally more expensive. This also allows reuse of the cache for
# different sizes (for this particular engine).
sub gravatar_url {
my $email = lc shift;
my $size = shift;
$avatar_cache{$email} ||=
"//www.gravatar.com/avatar/" .
md5_hex($email) . "?s=";
return $avatar_cache{$email} . $size;
}
# Insert an avatar for the given $email at the given $size if the feature
# is enabled.
sub git_get_avatar {
my ($email, %opts) = @_;
my $pre_white = ($opts{-pad_before} ? "&nbsp;" : "");
my $post_white = ($opts{-pad_after} ? "&nbsp;" : "");
$opts{-size} ||= 'default';
my $size = $avatar_size{$opts{-size}} || $avatar_size{'default'};
my $url = "";
if ($git_avatar eq 'gravatar') {
$url = gravatar_url($email, $size);
} elsif ($git_avatar eq 'picon') {
$url = picon_url($email);
}
# Other providers can be added by extending the if chain, defining $url
# as needed. If no variant puts something in $url, we assume avatars
# are completely disabled/unavailable.
if ($url) {
return $pre_white .
"<img width=\"$size\" " .
"class=\"avatar\" " .
"src=\"".esc_url($url)."\" " .
"alt=\"\" " .
"/>" . $post_white;
} else {
return "";
}
}
sub format_search_author {
my ($author, $searchtype, $displaytext) = @_;
my $have_search = gitweb_check_feature('search');
if ($have_search) {
my $performed = "";
if ($searchtype eq 'author') {
$performed = "authored";
} elsif ($searchtype eq 'committer') {
$performed = "committed";
}
return $cgi->a({-href => href(action=>"search", hash=>$hash,
searchtext=>$author,
searchtype=>$searchtype), class=>"list",
title=>"Search for commits $performed by $author"},
$displaytext);
} else {
return $displaytext;
}
}
# format the author name of the given commit with the given tag
# the author name is chopped and escaped according to the other
# optional parameters (see chop_str).
sub format_author_html {
my $tag = shift;
my $co = shift;
my $author = chop_and_escape_str($co->{'author_name'}, @_);
return "<$tag class=\"author\">" .
format_search_author($co->{'author_name'}, "author",
git_get_avatar($co->{'author_email'}, -pad_after => 1) .
$author) .
"</$tag>";
}
# format git diff header line, i.e. "diff --(git|combined|cc) ..."
sub format_git_diff_header_line {
my $line = shift;
my $diffinfo = shift;
my ($from, $to) = @_;
if ($diffinfo->{'nparents'}) {
# combined diff
$line =~ s!^(diff (.*?) )"?.*$!$1!;
if ($to->{'href'}) {
$line .= $cgi->a({-href => $to->{'href'}, -class => "path"},
esc_path($to->{'file'}));
} else { # file was deleted (no href)
$line .= esc_path($to->{'file'});
}
} else {
# "ordinary" diff
$line =~ s!^(diff (.*?) )"?a/.*$!$1!;
if ($from->{'href'}) {
$line .= $cgi->a({-href => $from->{'href'}, -class => "path"},
'a/' . esc_path($from->{'file'}));
} else { # file was added (no href)
$line .= 'a/' . esc_path($from->{'file'});
}
$line .= ' ';
if ($to->{'href'}) {
$line .= $cgi->a({-href => $to->{'href'}, -class => "path"},
'b/' . esc_path($to->{'file'}));
} else { # file was deleted
$line .= 'b/' . esc_path($to->{'file'});
}
}
return "<div class=\"diff header\">$line</div>\n";
}
# format extended diff header line, before patch itself
sub format_extended_diff_header_line {
my $line = shift;
my $diffinfo = shift;
my ($from, $to) = @_;
# match <path>
if ($line =~ s!^((copy|rename) from ).*$!$1! && $from->{'href'}) {
$line .= $cgi->a({-href=>$from->{'href'}, -class=>"path"},
esc_path($from->{'file'}));
}
if ($line =~ s!^((copy|rename) to ).*$!$1! && $to->{'href'}) {
$line .= $cgi->a({-href=>$to->{'href'}, -class=>"path"},
esc_path($to->{'file'}));
}
# match single <mode>
if ($line =~ m/\s(\d{6})$/) {
$line .= '<span class="info"> (' .
file_type_long($1) .
')</span>';
}
# match <hash>
if ($line =~ m/^index [0-9a-fA-F]{40},[0-9a-fA-F]{40}/) {
# can match only for combined diff
$line = 'index ';
for (my $i = 0; $i < $diffinfo->{'nparents'}; $i++) {
if ($from->{'href'}[$i]) {
$line .= $cgi->a({-href=>$from->{'href'}[$i],
-class=>"hash"},
substr($diffinfo->{'from_id'}[$i],0,7));
} else {
$line .= '0' x 7;
}
# separator
$line .= ',' if ($i < $diffinfo->{'nparents'} - 1);
}
$line .= '..';
if ($to->{'href'}) {
$line .= $cgi->a({-href=>$to->{'href'}, -class=>"hash"},
substr($diffinfo->{'to_id'},0,7));
} else {
$line .= '0' x 7;
}
} elsif ($line =~ m/^index [0-9a-fA-F]{40}..[0-9a-fA-F]{40}/) {
# can match only for ordinary diff
my ($from_link, $to_link);
if ($from->{'href'}) {
$from_link = $cgi->a({-href=>$from->{'href'}, -class=>"hash"},
substr($diffinfo->{'from_id'},0,7));
} else {
$from_link = '0' x 7;
}
if ($to->{'href'}) {
$to_link = $cgi->a({-href=>$to->{'href'}, -class=>"hash"},
substr($diffinfo->{'to_id'},0,7));
} else {
$to_link = '0' x 7;
}
my ($from_id, $to_id) = ($diffinfo->{'from_id'}, $diffinfo->{'to_id'});
$line =~ s!$from_id\.\.$to_id!$from_link..$to_link!;
}
return $line . "<br/>\n";
}
# format from-file/to-file diff header
sub format_diff_from_to_header {
my ($from_line, $to_line, $diffinfo, $from, $to, @parents) = @_;
my $line;
my $result = '';
$line = $from_line;
#assert($line =~ m/^---/) if DEBUG;
# no extra formatting for "^--- /dev/null"
if (! $diffinfo->{'nparents'}) {
# ordinary (single parent) diff
if ($line =~ m!^--- "?a/!) {
if ($from->{'href'}) {
$line = '--- a/' .
$cgi->a({-href=>$from->{'href'}, -class=>"path"},
esc_path($from->{'file'}));
} else {
$line = '--- a/' .
esc_path($from->{'file'});
}
}
$result .= qq!<div class="diff from_file">$line</div>\n!;
} else {
# combined diff (merge commit)
for (my $i = 0; $i < $diffinfo->{'nparents'}; $i++) {
if ($from->{'href'}[$i]) {
$line = '--- ' .
$cgi->a({-href=>href(action=>"blobdiff",
hash_parent=>$diffinfo->{'from_id'}[$i],
hash_parent_base=>$parents[$i],
file_parent=>$from->{'file'}[$i],
hash=>$diffinfo->{'to_id'},
hash_base=>$hash,
file_name=>$to->{'file'}),
-class=>"path",
-title=>"diff" . ($i+1)},
$i+1) .
'/' .
$cgi->a({-href=>$from->{'href'}[$i], -class=>"path"},
esc_path($from->{'file'}[$i]));
} else {
$line = '--- /dev/null';
}
$result .= qq!<div class="diff from_file">$line</div>\n!;
}
}
$line = $to_line;
#assert($line =~ m/^\+\+\+/) if DEBUG;
# no extra formatting for "^+++ /dev/null"
if ($line =~ m!^\+\+\+ "?b/!) {
if ($to->{'href'}) {
$line = '+++ b/' .
$cgi->a({-href=>$to->{'href'}, -class=>"path"},
esc_path($to->{'file'}));
} else {
$line = '+++ b/' .
esc_path($to->{'file'});
}
}
$result .= qq!<div class="diff to_file">$line</div>\n!;
return $result;
}
# create note for patch simplified by combined diff
sub format_diff_cc_simplified {
my ($diffinfo, @parents) = @_;
my $result = '';
$result .= "<div class=\"diff header\">" .
"diff --cc ";
if (!is_deleted($diffinfo)) {
$result .= $cgi->a({-href => href(action=>"blob",
hash_base=>$hash,
hash=>$diffinfo->{'to_id'},
file_name=>$diffinfo->{'to_file'}),
-class => "path"},
esc_path($diffinfo->{'to_file'}));
} else {
$result .= esc_path($diffinfo->{'to_file'});
}
$result .= "</div>\n" . # class="diff header"
"<div class=\"diff nodifferences\">" .
"Simple merge" .
"</div>\n"; # class="diff nodifferences"
return $result;
}
sub diff_line_class {
my ($line, $from, $to) = @_;
# ordinary diff
my $num_sign = 1;
# combined diff
if ($from && $to && ref($from->{'href'}) eq "ARRAY") {
$num_sign = scalar @{$from->{'href'}};
}
my @diff_line_classifier = (
{ regexp => qr/^\@\@{$num_sign} /, class => "chunk_header"},
{ regexp => qr/^\\/, class => "incomplete" },
{ regexp => qr/^ {$num_sign}/, class => "ctx" },
# classifier for context must come before classifier add/rem,
# or we would have to use more complicated regexp, for example
# qr/(?= {0,$m}\+)[+ ]{$num_sign}/, where $m = $num_sign - 1;
{ regexp => qr/^[+ ]{$num_sign}/, class => "add" },
{ regexp => qr/^[- ]{$num_sign}/, class => "rem" },
);
for my $clsfy (@diff_line_classifier) {
return $clsfy->{'class'}
if ($line =~ $clsfy->{'regexp'});
}
# fallback
return "";
}
# assumes that $from and $to are defined and correctly filled,
# and that $line holds a line of chunk header for unified diff
sub format_unidiff_chunk_header {
my ($line, $from, $to) = @_;
my ($from_text, $from_start, $from_lines, $to_text, $to_start, $to_lines, $section) =
$line =~ m/^\@{2} (-(\d+)(?:,(\d+))?) (\+(\d+)(?:,(\d+))?) \@{2}(.*)$/;
$from_lines = 0 unless defined $from_lines;
$to_lines = 0 unless defined $to_lines;
if ($from->{'href'}) {
$from_text = $cgi->a({-href=>"$from->{'href'}#l$from_start",
-class=>"list"}, $from_text);
}
if ($to->{'href'}) {
$to_text = $cgi->a({-href=>"$to->{'href'}#l$to_start",
-class=>"list"}, $to_text);
}
$line = "<span class=\"chunk_info\">@@ $from_text $to_text @@</span>" .
"<span class=\"section\">" . esc_html($section, -nbsp=>1) . "</span>";
return $line;
}
# assumes that $from and $to are defined and correctly filled,
# and that $line holds a line of chunk header for combined diff
sub format_cc_diff_chunk_header {
my ($line, $from, $to) = @_;
my ($prefix, $ranges, $section) = $line =~ m/^(\@+) (.*?) \@+(.*)$/;
my (@from_text, @from_start, @from_nlines, $to_text, $to_start, $to_nlines);
@from_text = split(' ', $ranges);
for (my $i = 0; $i < @from_text; ++$i) {
($from_start[$i], $from_nlines[$i]) =
(split(',', substr($from_text[$i], 1)), 0);
}
$to_text = pop @from_text;
$to_start = pop @from_start;
$to_nlines = pop @from_nlines;
$line = "<span class=\"chunk_info\">$prefix ";
for (my $i = 0; $i < @from_text; ++$i) {
if ($from->{'href'}[$i]) {
$line .= $cgi->a({-href=>"$from->{'href'}[$i]#l$from_start[$i]",
-class=>"list"}, $from_text[$i]);
} else {
$line .= $from_text[$i];
}
$line .= " ";
}
if ($to->{'href'}) {
$line .= $cgi->a({-href=>"$to->{'href'}#l$to_start",
-class=>"list"}, $to_text);
} else {
$line .= $to_text;
}
$line .= " $prefix</span>" .
"<span class=\"section\">" . esc_html($section, -nbsp=>1) . "</span>";
return $line;
}
# process patch (diff) line (not to be used for diff headers),
# returning HTML-formatted (but not wrapped) line.
# If the line is passed as a reference, it is treated as HTML and not
# esc_html()'ed.
sub format_diff_line {
my ($line, $diff_class, $from, $to) = @_;
if (ref($line)) {
$line = $$line;
} else {
chomp $line;
$line = untabify($line);
if ($from && $to && $line =~ m/^\@{2} /) {
$line = format_unidiff_chunk_header($line, $from, $to);
} elsif ($from && $to && $line =~ m/^\@{3}/) {
$line = format_cc_diff_chunk_header($line, $from, $to);
} else {
$line = esc_html($line, -nbsp=>1);
}
}
my $diff_classes = "diff";
$diff_classes .= " $diff_class" if ($diff_class);
$line = "<div class=\"$diff_classes\">$line</div>\n";
return $line;
}
# Generates undef or something like "_snapshot_" or "snapshot (_tbz2_ _zip_)",
# linked. Pass the hash of the tree/commit to snapshot.
sub format_snapshot_links {
my ($hash) = @_;
my $num_fmts = @snapshot_fmts;
if ($num_fmts > 1) {
# A parenthesized list of links bearing format names.
# e.g. "snapshot (_tar.gz_ _zip_)"
return "snapshot (" . join(' ', map
$cgi->a({
-href => href(
action=>"snapshot",
hash=>$hash,
snapshot_format=>$_
)
}, $known_snapshot_formats{$_}{'display'})
, @snapshot_fmts) . ")";
} elsif ($num_fmts == 1) {
# A single "snapshot" link whose tooltip bears the format name.
# i.e. "_snapshot_"
my ($fmt) = @snapshot_fmts;
return
$cgi->a({
-href => href(
action=>"snapshot",
hash=>$hash,
snapshot_format=>$fmt
),
-title => "in format: $known_snapshot_formats{$fmt}{'display'}"
}, "snapshot");
} else { # $num_fmts == 0
return undef;
}
}
## ......................................................................
## functions returning values to be passed, perhaps after some
## transformation, to other functions; e.g. returning arguments to href()
# returns hash to be passed to href to generate gitweb URL
# in -title key it returns description of link
sub get_feed_info {
my $format = shift || 'Atom';
my %res = (action => lc($format));
my $matched_ref = 0;
# feed links are possible only for project views
return unless (defined $project);
# some views should link to OPML, or to generic project feed,
# or don't have specific feed yet (so they should use generic)
return if (!$action || $action =~ /^(?:tags|heads|forks|tag|search)$/x);
my $branch = undef;
# branches refs uses 'refs/' + $get_branch_refs()[x] + '/' prefix
# (fullname) to differentiate from tag links; this also makes
# possible to detect branch links
for my $ref (get_branch_refs()) {
if ((defined $hash_base && $hash_base =~ m!^refs/\Q$ref\E/(.*)$!) ||
(defined $hash && $hash =~ m!^refs/\Q$ref\E/(.*)$!)) {
$branch = $1;
$matched_ref = $ref;
last;
}
}
# find log type for feed description (title)
my $type = 'log';
if (defined $file_name) {
$type = "history of $file_name";
$type .= "/" if ($action eq 'tree');
$type .= " on '$branch'" if (defined $branch);
} else {
$type = "log of $branch" if (defined $branch);
}
$res{-title} = $type;
$res{'hash'} = (defined $branch ? "refs/$matched_ref/$branch" : undef);
$res{'file_name'} = $file_name;
return %res;
}
## ----------------------------------------------------------------------
## git utility subroutines, invoking git commands
# returns path to the core git executable and the --git-dir parameter as list
sub git_cmd {
$number_of_git_cmds++;
return $GIT, '--git-dir='.$git_dir;
}
# quote the given arguments for passing them to the shell
# quote_command("command", "arg 1", "arg with ' and ! characters")
# => "'command' 'arg 1' 'arg with '\'' and '\!' characters'"
# Try to avoid using this function wherever possible.
sub quote_command {
return join(' ',
map { my $a = $_; $a =~ s/(['!])/'\\$1'/g; "'$a'" } @_ );
}
# get HEAD ref of given project as hash
sub git_get_head_hash {
return git_get_full_hash(shift, 'HEAD');
}
sub git_get_full_hash {
return git_get_hash(@_);
}
sub git_get_short_hash {
return git_get_hash(@_, '--short=7');
}
sub git_get_hash {
my ($project, $hash, @options) = @_;
my $o_git_dir = $git_dir;
my $retval = undef;
$git_dir = "$projectroot/$project";
if (open my $fd, '-|', git_cmd(), 'rev-parse',
'--verify', '-q', @options, $hash) {
$retval = <$fd>;
chomp $retval if defined $retval;
close $fd;
}
if (defined $o_git_dir) {
$git_dir = $o_git_dir;
}
return $retval;
}
# get type of given object
sub git_get_type {
my $hash = shift;
open my $fd, "-|", git_cmd(), "cat-file", '-t', $hash or return;
my $type = <$fd>;
close $fd or return;
chomp $type;
return $type;
}
# repository configuration
our $config_file = '';
our %config;
# store multiple values for single key as anonymous array reference
# single values stored directly in the hash, not as [ <value> ]
sub hash_set_multi {
my ($hash, $key, $value) = @_;
if (!exists $hash->{$key}) {
$hash->{$key} = $value;
} elsif (!ref $hash->{$key}) {
$hash->{$key} = [ $hash->{$key}, $value ];
} else {
push @{$hash->{$key}}, $value;
}
}
# return hash of git project configuration
# optionally limited to some section, e.g. 'gitweb'
sub git_parse_project_config {
my $section_regexp = shift;
my %config;
local $/ = "\0";
open my $fh, "-|", git_cmd(), "config", '-z', '-l',
or return;
while (my $keyval = <$fh>) {
chomp $keyval;
my ($key, $value) = split(/\n/, $keyval, 2);
hash_set_multi(\%config, $key, $value)
if (!defined $section_regexp || $key =~ /^(?:$section_regexp)\./o);
}
close $fh;
return %config;
}
# convert config value to boolean: 'true' or 'false'
# no value, number > 0, 'true' and 'yes' values are true
# rest of values are treated as false (never as error)
sub config_to_bool {
my $val = shift;
return 1 if !defined $val; # section.key
# strip leading and trailing whitespace
$val =~ s/^\s+//;
$val =~ s/\s+$//;
return (($val =~ /^\d+$/ && $val) || # section.key = 1
($val =~ /^(?:true|yes)$/i)); # section.key = true
}
# convert config value to simple decimal number
# an optional value suffix of 'k', 'm', or 'g' will cause the value
# to be multiplied by 1024, 1048576, or 1073741824
sub config_to_int {
my $val = shift;
# strip leading and trailing whitespace
$val =~ s/^\s+//;
$val =~ s/\s+$//;
if (my ($num, $unit) = ($val =~ /^([0-9]*)([kmg])$/i)) {
$unit = lc($unit);
# unknown unit is treated as 1
return $num * ($unit eq 'g' ? 1073741824 :
$unit eq 'm' ? 1048576 :
$unit eq 'k' ? 1024 : 1);
}
return $val;
}
# convert config value to array reference, if needed
sub config_to_multi {
my $val = shift;
return ref($val) ? $val : (defined($val) ? [ $val ] : []);
}
sub git_get_project_config {
my ($key, $type) = @_;
return unless defined $git_dir;
# key sanity check
return unless ($key);
# only subsection, if exists, is case sensitive,
# and not lowercased by 'git config -z -l'
if (my ($hi, $mi, $lo) = ($key =~ /^([^.]*)\.(.*)\.([^.]*)$/)) {
$lo =~ s/_//g;
$key = join(".", lc($hi), $mi, lc($lo));
return if ($lo =~ /\W/ || $hi =~ /\W/);
} else {
$key = lc($key);
$key =~ s/_//g;
return if ($key =~ /\W/);
}
$key =~ s/^gitweb\.//;
# type sanity check
if (defined $type) {
$type =~ s/^--//;
$type = undef
unless ($type eq 'bool' || $type eq 'int');
}
# get config
if (!defined $config_file ||
$config_file ne "$git_dir/config") {
%config = git_parse_project_config('gitweb');
$config_file = "$git_dir/config";
}
# check if config variable (key) exists
return unless exists $config{"gitweb.$key"};
# ensure given type
if (!defined $type) {
return $config{"gitweb.$key"};
} elsif ($type eq 'bool') {
# backward compatibility: 'git config --bool' returns true/false
return config_to_bool($config{"gitweb.$key"}) ? 'true' : 'false';
} elsif ($type eq 'int') {
return config_to_int($config{"gitweb.$key"});
}
return $config{"gitweb.$key"};
}
# get hash of given path at given ref
sub git_get_hash_by_path {
my $base = shift;
my $path = shift || return undef;
my $type = shift;
$path =~ s,/+$,,;
open my $fd, "-|", git_cmd(), "ls-tree", $base, "--", $path
or die_error(500, "Open git-ls-tree failed");
my $line = <$fd>;
close $fd or return undef;
if (!defined $line) {
# there is no tree or hash given by $path at $base
return undef;
}
#'100644 blob 0fa3f3a66fb6a137f6ec2c19351ed4d807070ffa panic.c'
$line =~ m/^([0-9]+) (.+) ([0-9a-fA-F]{40})\t/;
if (defined $type && $type ne $2) {
# type doesn't match
return undef;
}
return $3;
}
# get path of entry with given hash at given tree-ish (ref)
# used to get 'from' filename for combined diff (merge commit) for renames
sub git_get_path_by_hash {
my $base = shift || return;
my $hash = shift || return;
local $/ = "\0";
open my $fd, "-|", git_cmd(), "ls-tree", '-r', '-t', '-z', $base
or return undef;
while (my $line = <$fd>) {
chomp $line;
#'040000 tree 595596a6a9117ddba9fe379b6b012b558bac8423 gitweb'
#'100644 blob e02e90f0429be0d2a69b76571101f20b8f75530f gitweb/README'
if ($line =~ m/(?:[0-9]+) (?:.+) $hash\t(.+)$/) {
close $fd;
return $1;
}
}
close $fd;
return undef;
}
## ......................................................................
## git utility functions, directly accessing git repository
# get the value of config variable either from file named as the variable
# itself in the repository ($GIT_DIR/$name file), or from gitweb.$name
# configuration variable in the repository config file.
sub git_get_file_or_project_config {
my ($path, $name) = @_;
$git_dir = "$projectroot/$path";
open my $fd, '<', "$git_dir/$name"
or return git_get_project_config($name);
my $conf = <$fd>;
close $fd;
if (defined $conf) {
chomp $conf;
}
return $conf;
}
sub git_get_project_description {
my $path = shift;
return git_get_file_or_project_config($path, 'description');
}
sub git_get_project_category {
my $path = shift;
return git_get_file_or_project_config($path, 'category');
}
# supported formats:
# * $GIT_DIR/ctags/<tagname> file (in 'ctags' subdirectory)
# - if its contents is a number, use it as tag weight,
# - otherwise add a tag with weight 1
# * $GIT_DIR/ctags file, each line is a tag (with weight 1)
# the same value multiple times increases tag weight
# * `gitweb.ctag' multi-valued repo config variable
sub git_get_project_ctags {
my $project = shift;
my $ctags = {};
$git_dir = "$projectroot/$project";
if (opendir my $dh, "$git_dir/ctags") {
my @files = grep { -f $_ } map { "$git_dir/ctags/$_" } readdir($dh);
foreach my $tagfile (@files) {
open my $ct, '<', $tagfile
or next;
my $val = <$ct>;
chomp $val if $val;
close $ct;
(my $ctag = $tagfile) =~ s#.*/##;
if ($val =~ /^\d+$/) {
$ctags->{$ctag} = $val;
} else {
$ctags->{$ctag} = 1;
}
}
closedir $dh;
} elsif (open my $fh, '<', "$git_dir/ctags") {
while (my $line = <$fh>) {
chomp $line;
$ctags->{$line}++ if $line;
}
close $fh;
} else {
my $taglist = config_to_multi(git_get_project_config('ctag'));
foreach my $tag (@$taglist) {
$ctags->{$tag}++;
}
}
return $ctags;
}
# return hash, where keys are content tags ('ctags'),
# and values are sum of weights of given tag in every project
sub git_gather_all_ctags {
my $projects = shift;
my $ctags = {};
foreach my $p (@$projects) {
foreach my $ct (keys %{$p->{'ctags'}}) {
$ctags->{$ct} += $p->{'ctags'}->{$ct};
}
}
return $ctags;
}
sub git_populate_project_tagcloud {
my $ctags = shift;
# First, merge different-cased tags; tags vote on casing
my %ctags_lc;
foreach (keys %$ctags) {
$ctags_lc{lc $_}->{count} += $ctags->{$_};
if (not $ctags_lc{lc $_}->{topcount}
or $ctags_lc{lc $_}->{topcount} < $ctags->{$_}) {
$ctags_lc{lc $_}->{topcount} = $ctags->{$_};
$ctags_lc{lc $_}->{topname} = $_;
}
}
my $cloud;
my $matched = $input_params{'ctag'};
if (eval { require HTML::TagCloud; 1; }) {
$cloud = HTML::TagCloud->new;
foreach my $ctag (sort keys %ctags_lc) {
# Pad the title with spaces so that the cloud looks
# less crammed.
my $title = esc_html($ctags_lc{$ctag}->{topname});
$title =~ s/ /&nbsp;/g;
$title =~ s/^/&nbsp;/g;
$title =~ s/$/&nbsp;/g;
if (defined $matched && $matched eq $ctag) {
$title = qq(<span class="match">$title</span>);
}
$cloud->add($title, href(project=>undef, ctag=>$ctag),
$ctags_lc{$ctag}->{count});
}
} else {
$cloud = {};
foreach my $ctag (keys %ctags_lc) {
my $title = esc_html($ctags_lc{$ctag}->{topname}, -nbsp=>1);
if (defined $matched && $matched eq $ctag) {
$title = qq(<span class="match">$title</span>);
}
$cloud->{$ctag}{count} = $ctags_lc{$ctag}->{count};
$cloud->{$ctag}{ctag} =
$cgi->a({-href=>href(project=>undef, ctag=>$ctag)}, $title);
}
}
return $cloud;
}
sub git_show_project_tagcloud {
my ($cloud, $count) = @_;
if (ref $cloud eq 'HTML::TagCloud') {
return $cloud->html_and_css($count);
} else {
my @tags = sort { $cloud->{$a}->{'count'} <=> $cloud->{$b}->{'count'} } keys %$cloud;
return
'<div id="htmltagcloud"'.($project ? '' : ' align="center"').'>' .
join (', ', map {
$cloud->{$_}->{'ctag'}
} splice(@tags, 0, $count)) .
'</div>';
}
}
sub git_get_project_url_list {
my $path = shift;
$git_dir = "$projectroot/$path";
open my $fd, '<', "$git_dir/cloneurl"
or return wantarray ?
@{ config_to_multi(git_get_project_config('url')) } :
config_to_multi(git_get_project_config('url'));
my @git_project_url_list = map { chomp; $_ } <$fd>;
close $fd;
return wantarray ? @git_project_url_list : \@git_project_url_list;
}
sub git_get_projects_list {
my $filter = shift || '';
my $paranoid = shift;
my @list;
if (-d $projects_list) {
# search in directory
my $dir = $projects_list;
# remove the trailing "/"
$dir =~ s!/+$!!;
my $pfxlen = length("$dir");
my $pfxdepth = ($dir =~ tr!/!!);
# when filtering, search only given subdirectory
if ($filter && !$paranoid) {
$dir .= "/$filter";
$dir =~ s!/+$!!;
}
File::Find::find({
follow_fast => 1, # follow symbolic links
follow_skip => 2, # ignore duplicates
dangling_symlinks => 0, # ignore dangling symlinks, silently
wanted => sub {
# global variables
our $project_maxdepth;
our $projectroot;
# skip project-list toplevel, if we get it.
return if (m!^[/.]$!);
# only directories can be git repositories
return unless (-d $_);
# need search permission
return unless (-x $_);
# don't traverse too deep (Find is super slow on os x)
# $project_maxdepth excludes depth of $projectroot
if (($File::Find::name =~ tr!/!!) - $pfxdepth > $project_maxdepth) {
$File::Find::prune = 1;
return;
}
my $path = substr($File::Find::name, $pfxlen + 1);
# paranoidly only filter here
if ($paranoid && $filter && $path !~ m!^\Q$filter\E/!) {
next;
}
# we check related file in $projectroot
if (check_export_ok("$projectroot/$path")) {
push @list, { path => $path };
$File::Find::prune = 1;
}
},
}, "$dir");
} elsif (-f $projects_list) {
# read from file(url-encoded):
# 'git%2Fgit.git Linus+Torvalds'
# 'libs%2Fklibc%2Fklibc.git H.+Peter+Anvin'
# 'linux%2Fhotplug%2Fudev.git Greg+Kroah-Hartman'
open my $fd, '<', $projects_list or return;
PROJECT:
while (my $line = <$fd>) {
chomp $line;
my ($path, $owner) = split ' ', $line;
$path = unescape($path);
$owner = unescape($owner);
if (!defined $path) {
next;
}
# if $filter is rpovided, check if $path begins with $filter
if ($filter && $path !~ m!^\Q$filter\E/!) {
next;
}
if (check_export_ok("$projectroot/$path")) {
my $pr = {
path => $path
};
if ($owner) {
$pr->{'owner'} = to_utf8($owner);
}
push @list, $pr;
}
}
close $fd;
}
return @list;
}
# written with help of Tree::Trie module (Perl Artistic License, GPL compatible)
# as side effects it sets 'forks' field to list of forks for forked projects
sub filter_forks_from_projects_list {
my $projects = shift;
my %trie; # prefix tree of directories (path components)
# generate trie out of those directories that might contain forks
foreach my $pr (@$projects) {
my $path = $pr->{'path'};
$path =~ s/\.git$//; # forks of 'repo.git' are in 'repo/' directory
next if ($path =~ m!/$!); # skip non-bare repositories, e.g. 'repo/.git'
next unless ($path); # skip '.git' repository: tests, git-instaweb
next unless (-d "$projectroot/$path"); # containing directory exists
$pr->{'forks'} = []; # there can be 0 or more forks of project
# add to trie
my @dirs = split('/', $path);
# walk the trie, until either runs out of components or out of trie
my $ref = \%trie;
while (scalar @dirs &&
exists($ref->{$dirs[0]})) {
$ref = $ref->{shift @dirs};
}
# create rest of trie structure from rest of components
foreach my $dir (@dirs) {
$ref = $ref->{$dir} = {};
}
# create end marker, store $pr as a data
$ref->{''} = $pr if (!exists $ref->{''});
}
# filter out forks, by finding shortest prefix match for paths
my @filtered;
PROJECT:
foreach my $pr (@$projects) {
# trie lookup
my $ref = \%trie;
DIR:
foreach my $dir (split('/', $pr->{'path'})) {
if (exists $ref->{''}) {
# found [shortest] prefix, is a fork - skip it
push @{$ref->{''}{'forks'}}, $pr;
next PROJECT;
}
if (!exists $ref->{$dir}) {
# not in trie, cannot have prefix, not a fork
push @filtered, $pr;
next PROJECT;
}
# If the dir is there, we just walk one step down the trie.
$ref = $ref->{$dir};
}
# we ran out of trie
# (shouldn't happen: it's either no match, or end marker)
push @filtered, $pr;
}
return @filtered;
}
# note: fill_project_list_info must be run first,
# for 'descr_long' and 'ctags' to be filled
sub search_projects_list {
my ($projlist, %opts) = @_;
my $tagfilter = $opts{'tagfilter'};
my $search_re = $opts{'search_regexp'};
return @$projlist
unless ($tagfilter || $search_re);
# searching projects require filling to be run before it;
fill_project_list_info($projlist,
$tagfilter ? 'ctags' : (),
$search_re ? ('path', 'descr') : ());
my @projects;
PROJECT:
foreach my $pr (@$projlist) {
if ($tagfilter) {
next unless ref($pr->{'ctags'}) eq 'HASH';
next unless
grep { lc($_) eq lc($tagfilter) } keys %{$pr->{'ctags'}};
}
if ($search_re) {
next unless
$pr->{'path'} =~ /$search_re/ ||
$pr->{'descr_long'} =~ /$search_re/;
}
push @projects, $pr;
}
return @projects;
}
our $gitweb_project_owner = undef;
sub git_get_project_list_from_file {
return if (defined $gitweb_project_owner);
$gitweb_project_owner = {};
# read from file (url-encoded):
# 'git%2Fgit.git Linus+Torvalds'
# 'libs%2Fklibc%2Fklibc.git H.+Peter+Anvin'
# 'linux%2Fhotplug%2Fudev.git Greg+Kroah-Hartman'
if (-f $projects_list) {
open(my $fd, '<', $projects_list);
while (my $line = <$fd>) {
chomp $line;
my ($pr, $ow) = split ' ', $line;
$pr = unescape($pr);
$ow = unescape($ow);
$gitweb_project_owner->{$pr} = to_utf8($ow);
}
close $fd;
}
}
sub git_get_project_owner {
my $project = shift;
my $owner;
return undef unless $project;
$git_dir = "$projectroot/$project";
if (!defined $gitweb_project_owner) {
git_get_project_list_from_file();
}
if (exists $gitweb_project_owner->{$project}) {
$owner = $gitweb_project_owner->{$project};
}
if (!defined $owner){
$owner = git_get_project_config('owner');
}
if (!defined $owner) {
$owner = get_file_owner("$git_dir");
}
return $owner;
}
sub git_get_last_activity {
my ($path) = @_;
my $fd;
$git_dir = "$projectroot/$path";
open($fd, "-|", git_cmd(), 'for-each-ref',
'--format=%(committer)',
'--sort=-committerdate',
'--count=1',
map { "refs/$_" } get_branch_refs ()) or return;
my $most_recent = <$fd>;
close $fd or return;
if (defined $most_recent &&
$most_recent =~ / (\d+) [-+][01]\d\d\d$/) {
my $timestamp = $1;
my $age = time - $timestamp;
return ($age, age_string($age));
}
return (undef, undef);
}
# Implementation note: when a single remote is wanted, we cannot use 'git
# remote show -n' because that command always work (assuming it's a remote URL
# if it's not defined), and we cannot use 'git remote show' because that would
# try to make a network roundtrip. So the only way to find if that particular
# remote is defined is to walk the list provided by 'git remote -v' and stop if
# and when we find what we want.
sub git_get_remotes_list {
my $wanted = shift;
my %remotes = ();
open my $fd, '-|' , git_cmd(), 'remote', '-v';
return unless $fd;
while (my $remote = <$fd>) {
chomp $remote;
$remote =~ s!\t(.*?)\s+\((\w+)\)$!!;
next if $wanted and not $remote eq $wanted;
my ($url, $key) = ($1, $2);
$remotes{$remote} ||= { 'heads' => () };
$remotes{$remote}{$key} = $url;
}
close $fd or return;
return wantarray ? %remotes : \%remotes;
}
# Takes a hash of remotes as first parameter and fills it by adding the
# available remote heads for each of the indicated remotes.
sub fill_remote_heads {
my $remotes = shift;
my @heads = map { "remotes/$_" } keys %$remotes;
my @remoteheads = git_get_heads_list(undef, @heads);
foreach my $remote (keys %$remotes) {
$remotes->{$remote}{'heads'} = [ grep {
$_->{'name'} =~ s!^$remote/!!
} @remoteheads ];
}
}
sub git_get_references {
my $type = shift || "";
my %refs;
# 5dc01c595e6c6ec9ccda4f6f69c131c0dd945f8c refs/tags/v2.6.11
# c39ae07f393806ccf406ef966e9a15afc43cc36a refs/tags/v2.6.11^{}
open my $fd, "-|", git_cmd(), "show-ref", "--dereference",
($type ? ("--", "refs/$type") : ()) # use -- <pattern> if $type
or return;
while (my $line = <$fd>) {
chomp $line;
if ($line =~ m!^([0-9a-fA-F]{40})\srefs/($type.*)$!) {
if (defined $refs{$1}) {
push @{$refs{$1}}, $2;
} else {
$refs{$1} = [ $2 ];
}
}
}
close $fd or return;
return \%refs;
}
sub git_get_rev_name_tags {
my $hash = shift || return undef;
open my $fd, "-|", git_cmd(), "name-rev", "--tags", $hash
or return;
my $name_rev = <$fd>;
close $fd;
if ($name_rev =~ m|^$hash tags/(.*)$|) {
return $1;
} else {
# catches also '$hash undefined' output
return undef;
}
}
## ----------------------------------------------------------------------
## parse to hash functions
sub parse_date {
my $epoch = shift;
my $tz = shift || "-0000";
my %date;
my @months = ("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec");
my @days = ("Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat");
my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday) = gmtime($epoch);
$date{'hour'} = $hour;
$date{'minute'} = $min;
$date{'mday'} = $mday;
$date{'day'} = $days[$wday];
$date{'month'} = $months[$mon];
$date{'rfc2822'} = sprintf "%s, %d %s %4d %02d:%02d:%02d +0000",
$days[$wday], $mday, $months[$mon], 1900+$year, $hour ,$min, $sec;
$date{'mday-time'} = sprintf "%d %s %02d:%02d",
$mday, $months[$mon], $hour ,$min;
$date{'iso-8601'} = sprintf "%04d-%02d-%02dT%02d:%02d:%02dZ",
1900+$year, 1+$mon, $mday, $hour ,$min, $sec;
my ($tz_sign, $tz_hour, $tz_min) =
($tz =~ m/^([-+])(\d\d)(\d\d)$/);
$tz_sign = ($tz_sign eq '-' ? -1 : +1);
my $local = $epoch + $tz_sign*((($tz_hour*60) + $tz_min)*60);
($sec, $min, $hour, $mday, $mon, $year, $wday, $yday) = gmtime($local);
$date{'hour_local'} = $hour;
$date{'minute_local'} = $min;
$date{'tz_local'} = $tz;
$date{'iso-tz'} = sprintf("%04d-%02d-%02d %02d:%02d:%02d %s",
1900+$year, $mon+1, $mday,
$hour, $min, $sec, $tz);
return %date;
}
sub parse_tag {
my $tag_id = shift;
my %tag;
my @comment;
open my $fd, "-|", git_cmd(), "cat-file", "tag", $tag_id or return;
$tag{'id'} = $tag_id;
while (my $line = <$fd>) {
chomp $line;
if ($line =~ m/^object ([0-9a-fA-F]{40})$/) {
$tag{'object'} = $1;
} elsif ($line =~ m/^type (.+)$/) {
$tag{'type'} = $1;
} elsif ($line =~ m/^tag (.+)$/) {
$tag{'name'} = $1;
} elsif ($line =~ m/^tagger (.*) ([0-9]+) (.*)$/) {
$tag{'author'} = $1;
$tag{'author_epoch'} = $2;
$tag{'author_tz'} = $3;
if ($tag{'author'} =~ m/^([^<]+) <([^>]*)>/) {
$tag{'author_name'} = $1;
$tag{'author_email'} = $2;
} else {
$tag{'author_name'} = $tag{'author'};
}
} elsif ($line =~ m/--BEGIN/) {
push @comment, $line;
last;
} elsif ($line eq "") {
last;
}
}
push @comment, <$fd>;
$tag{'comment'} = \@comment;
close $fd or return;
if (!defined $tag{'name'}) {
return
};
return %tag
}
sub parse_commit_text {
my ($commit_text, $withparents) = @_;
my @commit_lines = split '\n', $commit_text;
my %co;
pop @commit_lines; # Remove '\0'
if (! @commit_lines) {
return;
}
my $header = shift @commit_lines;
if ($header !~ m/^[0-9a-fA-F]{40}/) {
return;
}
($co{'id'}, my @parents) = split ' ', $header;
while (my $line = shift @commit_lines) {
last if $line eq "\n";
if ($line =~ m/^tree ([0-9a-fA-F]{40})$/) {
$co{'tree'} = $1;
} elsif ((!defined $withparents) && ($line =~ m/^parent ([0-9a-fA-F]{40})$/)) {
push @parents, $1;
} elsif ($line =~ m/^author (.*) ([0-9]+) (.*)$/) {
$co{'author'} = to_utf8($1);
$co{'author_epoch'} = $2;
$co{'author_tz'} = $3;
if ($co{'author'} =~ m/^([^<]+) <([^>]*)>/) {
$co{'author_name'} = $1;
$co{'author_email'} = $2;
} else {
$co{'author_name'} = $co{'author'};
}
} elsif ($line =~ m/^committer (.*) ([0-9]+) (.*)$/) {
$co{'committer'} = to_utf8($1);
$co{'committer_epoch'} = $2;
$co{'committer_tz'} = $3;
if ($co{'committer'} =~ m/^([^<]+) <([^>]*)>/) {
$co{'committer_name'} = $1;
$co{'committer_email'} = $2;
} else {
$co{'committer_name'} = $co{'committer'};
}
}
}
if (!defined $co{'tree'}) {
return;
};
$co{'parents'} = \@parents;
$co{'parent'} = $parents[0];
foreach my $title (@commit_lines) {
$title =~ s/^ //;
if ($title ne "") {
$co{'title'} = chop_str($title, 80, 5);
# remove leading stuff of merges to make the interesting part visible
if (length($title) > 50) {
$title =~ s/^Automatic //;
$title =~ s/^merge (of|with) /Merge ... /i;
if (length($title) > 50) {
$title =~ s/(http|rsync):\/\///;
}
if (length($title) > 50) {
$title =~ s/(master|www|rsync)\.//;
}
if (length($title) > 50) {
$title =~ s/kernel.org:?//;
}
if (length($title) > 50) {
$title =~ s/\/pub\/scm//;
}
}
$co{'title_short'} = chop_str($title, 50, 5);
last;
}
}
if (! defined $co{'title'} || $co{'title'} eq "") {
$co{'title'} = $co{'title_short'} = '(no commit message)';
}
# remove added spaces
foreach my $line (@commit_lines) {
$line =~ s/^ //;
}
$co{'comment'} = \@commit_lines;
my $age = time - $co{'committer_epoch'};
$co{'age'} = $age;
$co{'age_string'} = age_string($age);
my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday) = gmtime($co{'committer_epoch'});
if ($age > 60*60*24*7*2) {
$co{'age_string_date'} = sprintf "%4i-%02u-%02i", 1900 + $year, $mon+1, $mday;
$co{'age_string_age'} = $co{'age_string'};
} else {
$co{'age_string_date'} = $co{'age_string'};
$co{'age_string_age'} = sprintf "%4i-%02u-%02i", 1900 + $year, $mon+1, $mday;
}
return %co;
}
sub parse_commit {
my ($commit_id) = @_;
my %co;
local $/ = "\0";
open my $fd, "-|", git_cmd(), "rev-list",
"--parents",
"--header",
"--max-count=1",
$commit_id,
"--",
or die_error(500, "Open git-rev-list failed");
%co = parse_commit_text(<$fd>, 1);
close $fd;
return %co;
}
sub parse_commits {
my ($commit_id, $maxcount, $skip, $filename, @args) = @_;
my @cos;
$maxcount ||= 1;
$skip ||= 0;
local $/ = "\0";
open my $fd, "-|", git_cmd(), "rev-list",
"--header",
@args,
("--max-count=" . $maxcount),
("--skip=" . $skip),
@extra_options,
$commit_id,
"--",
($filename ? ($filename) : ())
or die_error(500, "Open git-rev-list failed");
while (my $line = <$fd>) {
my %co = parse_commit_text($line);
push @cos, \%co;
}
close $fd;
return wantarray ? @cos : \@cos;
}
# parse line of git-diff-tree "raw" output
sub parse_difftree_raw_line {
my $line = shift;
my %res;
# ':100644 100644 03b218260e99b78c6df0ed378e59ed9205ccc96d 3b93d5e7cc7f7dd4ebed13a5cc1a4ad976fc94d8 M ls-files.c'
# ':100644 100644 7f9281985086971d3877aca27704f2aaf9c448ce bc190ebc71bbd923f2b728e505408f5e54bd073a M rev-tree.c'
if ($line =~ m/^:([0-7]{6}) ([0-7]{6}) ([0-9a-fA-F]{40}) ([0-9a-fA-F]{40}) (.)([0-9]{0,3})\t(.*)$/) {
$res{'from_mode'} = $1;
$res{'to_mode'} = $2;
$res{'from_id'} = $3;
$res{'to_id'} = $4;
$res{'status'} = $5;
$res{'similarity'} = $6;
if ($res{'status'} eq 'R' || $res{'status'} eq 'C') { # renamed or copied
($res{'from_file'}, $res{'to_file'}) = map { unquote($_) } split("\t", $7);
} else {
$res{'from_file'} = $res{'to_file'} = $res{'file'} = unquote($7);
}
}
# '::100755 100755 100755 60e79ca1b01bc8b057abe17ddab484699a7f5fdb 94067cc5f73388f33722d52ae02f44692bc07490 94067cc5f73388f33722d52ae02f44692bc07490 MR git-gui/git-gui.sh'
# combined diff (for merge commit)
elsif ($line =~ s/^(::+)((?:[0-7]{6} )+)((?:[0-9a-fA-F]{40} )+)([a-zA-Z]+)\t(.*)$//) {
$res{'nparents'} = length($1);
$res{'from_mode'} = [ split(' ', $2) ];
$res{'to_mode'} = pop @{$res{'from_mode'}};
$res{'from_id'} = [ split(' ', $3) ];
$res{'to_id'} = pop @{$res{'from_id'}};
$res{'status'} = [ split('', $4) ];
$res{'to_file'} = unquote($5);
}
# 'c512b523472485aef4fff9e57b229d9d243c967f'
elsif ($line =~ m/^([0-9a-fA-F]{40})$/) {
$res{'commit'} = $1;
}
return wantarray ? %res : \%res;
}
# wrapper: return parsed line of git-diff-tree "raw" output
# (the argument might be raw line, or parsed info)
sub parsed_difftree_line {
my $line_or_ref = shift;
if (ref($line_or_ref) eq "HASH") {
# pre-parsed (or generated by hand)
return $line_or_ref;
} else {
return parse_difftree_raw_line($line_or_ref);
}
}
# parse line of git-ls-tree output
sub parse_ls_tree_line {
my $line = shift;
my %opts = @_;
my %res;
if ($opts{'-l'}) {
#'100644 blob 0fa3f3a66fb6a137f6ec2c19351ed4d807070ffa 16717 panic.c'
$line =~ m/^([0-9]+) (.+) ([0-9a-fA-F]{40}) +(-|[0-9]+)\t(.+)$/s;
$res{'mode'} = $1;
$res{'type'} = $2;
$res{'hash'} = $3;
$res{'size'} = $4;
if ($opts{'-z'}) {
$res{'name'} = $5;
} else {
$res{'name'} = unquote($5);
}
} else {
#'100644 blob 0fa3f3a66fb6a137f6ec2c19351ed4d807070ffa panic.c'
$line =~ m/^([0-9]+) (.+) ([0-9a-fA-F]{40})\t(.+)$/s;
$res{'mode'} = $1;
$res{'type'} = $2;
$res{'hash'} = $3;
if ($opts{'-z'}) {
$res{'name'} = $4;
} else {
$res{'name'} = unquote($4);
}
}
return wantarray ? %res : \%res;
}
# generates _two_ hashes, references to which are passed as 2 and 3 argument
sub parse_from_to_diffinfo {
my ($diffinfo, $from, $to, @parents) = @_;
if ($diffinfo->{'nparents'}) {
# combined diff
$from->{'file'} = [];
$from->{'href'} = [];
fill_from_file_info($diffinfo, @parents)
unless exists $diffinfo->{'from_file'};
for (my $i = 0; $i < $diffinfo->{'nparents'}; $i++) {
$from->{'file'}[$i] =
defined $diffinfo->{'from_file'}[$i] ?
$diffinfo->{'from_file'}[$i] :
$diffinfo->{'to_file'};
if ($diffinfo->{'status'}[$i] ne "A") { # not new (added) file
$from->{'href'}[$i] = href(action=>"blob",
hash_base=>$parents[$i],
hash=>$diffinfo->{'from_id'}[$i],
file_name=>$from->{'file'}[$i]);
} else {
$from->{'href'}[$i] = undef;
}
}
} else {
# ordinary (not combined) diff
$from->{'file'} = $diffinfo->{'from_file'};
if ($diffinfo->{'status'} ne "A") { # not new (added) file
$from->{'href'} = href(action=>"blob", hash_base=>$hash_parent,
hash=>$diffinfo->{'from_id'},
file_name=>$from->{'file'});
} else {
delete $from->{'href'};
}
}
$to->{'file'} = $diffinfo->{'to_file'};
if (!is_deleted($diffinfo)) { # file exists in result
$to->{'href'} = href(action=>"blob", hash_base=>$hash,
hash=>$diffinfo->{'to_id'},
file_name=>$to->{'file'});
} else {
delete $to->{'href'};
}
}
## ......................................................................
## parse to array of hashes functions
sub git_get_heads_list {
my ($limit, @classes) = @_;
@classes = get_branch_refs() unless @