#!/bin/sh
# Usage: ppttitles 01/index.htm 01_3.xml
# Read in 01/01/index.htm and generate a list of the PowerPoint slide titles
# in xml format
# The output will look like
#
#
#
#
#
#
#
# The second argument is optional. If it is present, then
# it is assumed to name a UTF-8 file that was generated by Windows Media
# File Editor and then converted to UTF-8. We look in this file
# for Script tags that have Command Attributes and adjust the
# output to match the base of the Command Attribute. For example
# if the index.htm file contains links to files like sld002.htm, and
# the second argument contains links to files like 01/slide2.gif, then
# we adjust the output so that we have links to files like slide2.gif
# instead of sld002.htm.
#
if [ $# -ne 1 -a $# -ne 2 ]; then
echo "Usage: $0 index.htm [toc.xml ]"
echo " Read in index.htm and generate a list of the PowerPoint slide titles"
exit 9
fi
if [ $# -eq 2 ]; then
# Look inside argument $2 and determine whether to use Slide or slide
# We use the second Command= because the first one is sometimes a
# different from the second one because it might have been added by hand.
commandurl=`awk '$0 ~ / Command="/ {
commandCount++
if (commandCount == 2) {
p = match($0,/ Command=/) + length(" Command=") + 1
commandEtc = substr($0, p, length($0) - p)
split(commandEtc, command, "\"");
np = split(command[1], pa, "/");
baseurl=pa[np]
if (baseurl ~ /slide/) {
print "slide"
} else if (baseurl ~ /Slide/) {
print "Slide"
}
exit
}
}' $2`
else
commandurl=Slide
fi
if [ "$commandurl" = "" ]; then
echo "$0: Warning, 'commandurl' = '', which is likely a bug" 1>&2
echo " The problem is likely in '$2'" 1>&2
fi
grep -i 'a href="sld' $1 |
grep -vi "Click here to start" |
tr "\013\205\222\223\224\225\226\227" " .'' \-\-_" |
sed 's/&/&/g' |
awk '
BEGIN { print ""
print ""
}
{ nf=split($0, f, "<")
# Look for the A HREF tag and print out the part after the >
for(i=1; i<=nf; i++) {
if (f[i] ~ /A HREF/) {
split(f[i], tag, ">")
split(tag[1], href, "\"")
if (tag[2] ~ /^PPT Slide$/ && href[2] ~ /^sld/) {
talkSlideNumber = substr(href[2],4,length(href[2])-7)
talkSlideNumberFixed = talkSlideNumber + 0
tag[2] = "Slide " talkSlideNumberFixed
}
if (href[2] ~ /^sld/ && commandurl != "sld") {
# Convertd sld to whatever the value of commandurl is,
# get rid of leading zeros, so sld001.htm -> slide1.gif
slideNumber = substr(href[2],4,length(href[2])-7)
previousSlideNumberFixed = slideNumberFixed
slideNumberFixed = slideNumber + 0
# We use talkSlideNumberFixed below.
# FIXME: We need two variables here, slideNumberFixed and
# talkSlideNumberFixed to handle checking against the
# the previous slide number.
talkSlideNumberFixed = slideNumberFixed
if (slideNumberFixed > previousSlideNumberFixed) {
if (mergedPPTFile == 0) {
# If we are handling slides for only one
# powerpoint file, then we will end up here.
href[2] = commandurl slideNumberFixed ".gif"
} else {
# We are handling a merged powerpoint file
# situation
#print ""
mergedCount++
href[2] = commandurl mergedCount ".gif"
}
} else {
# We are handling a file that contains the
# concatenation of multiple index.htm files.
# This would occur if we combined multiple talks
# into a single ppt file and then generated
# slides for each individual talk and then
# created an htm file with:
# cat */*/index.htm > all.htm
# However, the talks themselves will have URL
# events that point to an every increasing SlideNN.gif
# file.
# We assume that each talk is separated by a blank
# gif.
# So, the first talk will have
# Slide1.gif ... SlideN.gif
# SlideN+1 will be blank
# The second talk will have SlideN+2.gif ... SlideM.gif
# Thus if slideNumberFixed is less than
# the previousSlideNumberFixed, we should set a
# flag indicating we are ignoring the count and using
# our own count and increment the count by two
# to skip the blank
if (mergedPPTFile == 0) {
print ""
mergedPPTFile = 1
mergedCount = previousSlideNumberFixed + 2
} else {
print ""
mergedCount += 2
}
href[2] = commandurl mergedCount ".gif"
}
}
if (tag[2] == "") {
# FIXME: The title is empty, which can cause problems
# when playing the stream. The problem here is that
# the title of the slide is likely so long that it is
# on the line after the A HREF line.
tag[2] = "Slide " talkSlideNumberFixed
}
print " "
}
}
}
END { print ""}
' commandurl=$commandurl