(* Scans files from HTML-directory and copies linked files to a new directory. Creates a list of files with number of links etc. *) Clear["@"]; (********** INPUT STARTS **********) (* HTML-directory *) dirhtml="d:\\html"; (* new HTML-directory *) dirnew="d:\\temp\\htmlcopy"; (* Working directory *) dirwork="d:\\sergeev\\bgu\\html\\maintain"; (* Root HTML-doc. *) root="index.htm"; (* Log file *) logfile="d:\\temp\\log-html.txt"; batchfile="d:\\temp\\html.bat"; listingfile=ToFileName[dirnew,"listing.txt"]; listinghtml=ToFileName[dirhtml,"system\\listing.htm"]; htmlzip="d:\\temp\\html.zip"; (* Which auxiliary files to copy *) filesaux={"nocgi.htm","enablejs.htm","outofspa.htm", (*"ticker0000.class",*)"outofspa.gif","noresp.gif","html.gif"}; (* File types, max. sizes and extensions *) kb=1000; maxsize=58000 kb; types={ {"html", 1000 kb, {"htm","html"} }, {"pdf", 15 kb, {"pdf"} }, {"gif", 20 kb, {"gif"} }, {"jpeg", 25 kb, {"jpg","jpeg"} }, {"plain", 15 kb, {"txt","text","cgi","for","m"} }, {"data", 15 kb, {} } (* max. size of other data files *) }; (* Currently, sizes are not used, only maxsize is essential *) (********** INPUT ENDS **********) SetDirectory[dirwork]; OpenWrite[batchfile];Close[batchfile]; mtypes=Length[types]; < True]; CreateDirectory[dirnew]; s=SetDirectory[dirhtml]; If[s!=dirhtml,Print["No HTML-directory found. Exiting..."];Exit[]]; (* Starting list of files *) html={root}; levels={0}; typs={""}; referers={"None"}; cits={0}; refs={0}; mfiles=1; copyfile[root,dirhtml,dirnew,True]; SetDirectory[dirnew]; nfile=0; Put[root,logfile]; While[++nfile<=mfiles, f=html[[nfile]]; type=typs[[nfile]]=whattype[f]; refs[[nfile]]=0; If[type=!="html",Continue[]]; basedir=DirectoryName[f]; file=ToFileName[dirnew,f]; level=levels[[nfile]]; dirf=DirectoryName[ToFileName[dirhtml,f]]; SetDirectory[dirf]; basefiles=ToLowerCase/@FileNames["*"]; (* Two kinds of quotes *) Do[ delim=If[idelim==1,"\"","'"]; s=OpenRead[file]; text1=ReadList[s,Record,RecordSeparators->{delim}]; Close[s]; mrecs=Length[text1]; Do[ref0=text1[[nrec]]; refprev=text1[[nrec-1]]; {ftype,fref,htmlref}=checkref[ref0,refprev,basedir,dirhtml,basefiles]; If[ftype=!=None,refs[[nfile]]++]; If[ftype===File, (* If FILE *) If[MemberQ[html,fref], nfref=Position[html,fref, {1},1, Heads -> False][[1,1]]; cits[[nfref]]++, mfiles++; html=Append[html,fref]; levels=Append[levels,level+1]; typs=Append[typs,""]; referers=Append[referers,f]; cits=Append[cits,1]; refs=Append[refs,0]; ifcopy=(whattype[fref]==="html"); copyfile[fref,dirhtml,dirnew,ifcopy]; ]; (* End Ifs *) ]; (* If DIRECTORY *) If[ftype===Directory, (* If Directory *) fref1=ToFileName[fref,"dir-list.htm"]; If[MemberQ[html,fref1], nfref=Position[html,fref1, {1},1, Heads -> False][[1,1]]; cits[[nfref]]++, mfiles++; html=Append[html,fref1]; levels=Append[levels,level+1]; typs=Append[typs,""]; referers=Append[referers,f]; cits=Append[cits,1]; refs=Append[refs,0]; copyfile[fref1,dirhtml,dirnew,None]; newfile=ToFileName[dirnew,fref1]; s=OpenWrite[newfile,PageWidth->Infinity,FormatType->OutputForm]; SetDirectory[ToFileName[{dirhtml,fref}]]; fnames=ToLowerCase/@FileNames["*"]; mn=Length[fnames]; frefp=StringReplace[fref,"\\"->"/"]; Write[s,""]; Write[s,""]; Write[s,"Directory listing of "<>frefp<>""]; Write[s,"

Directory listing of "<>frefp<>"

"]; Write[s,""]; Do[fname=fnames[[n]]; ft=FileType[fname]; If[ft===Directory,fname=fname<>"/"; Write[s,""]; Write[s,""], tp=whattype[fname]; Write[s,""]; Write[s,""]; ]; ,{n,mn}]; Write[s,"
fname<>"\">"<>fname<>"  Directory
fname<>"\">"<>fname<>"  "<>tp<>" File
"]; Write[s,""]; Close[s]; ]; (* End Ifs *) ]; (* END If DIRECTORY *) ,{nrec,2,mrecs}];(* end Do through records *) ,{idelim,2}]; (* End two kinds of quotes *) PutAppend[{nfile,f,type,levels[[nfile]],refs[[nfile]],cits[[nfile]],referers[[nfile]]}, logfile]; ]; (* End While *) (* Copy auxiliary files *) maux=Length[filesaux]; s=OpenAppend[batchfile,PageWidth->Infinity,FormatType->OutputForm]; Do[f=filesaux[[n]]; file=ToFileName[dirhtml,f]; fcopy=ToFileName[dirnew,f]; Write[s,"xcopy /I "<>file<>" "<>fcopy]; ,{n,maux}]; (* Copy php-scripts *) Write[s,"xcopy /I /S "<>ToFileName[dirhtml,"php"]<>" "<>ToFileName[dirnew,"php"]]; Close[s]; Run[batchfile]; (* Listing of files *) SetDirectory[dirnew]; s=OpenWrite[listingfile,PageWidth->Infinity,FormatType->OutputForm]; of=OpenWrite[listinghtml,PageWidth->Infinity,FormatType->OutputForm]; Clear[p]; Module[{s}, p[s_String]:=Write[of,s] ]; maxlen=0; Do[ m=StringLength[html[[n]]]; If[m>maxlen,maxlen=m]; ,{n,mfiles}]; blank[n_]:=StringJoin[Table[" ",{n}]]; pint[i_,n_]:=StringTake[blank[99]<>ToString[i],-n]; Write[s,blank[11],"FileName",blank[maxlen-21], "Level Refs Cites Size Referer"]; "Full list of HTML files"//p; " "with its levels on HTML tree, number of references, "<> "number of citations, sizes, and referers\">"//p; ""//p; ""//p; "

Full list of HTML files

"//p; "

Compete list of files "<> "with its levels on HTML tree, number of references, "<> "number of citations, sizes, and referers

"//p; Write[of,"

",blank[11],"FileName",blank[maxlen-21],
   "Level Refs Cites    Size  Referer
"]; Do[ {f,lev,ref,cit,refer}={html[[n]],levels[[n]],refs[[n]],cits[[n]],referers[[n]]}; mf=StringLength[f]; size=FileByteCount[f]; Write[s,f,blank[maxlen-mf],pint[lev,2],pint[ref,6], pint[cit,6],blank[1],pint[size,7],blank[2],refer]; f1=StringReplace[f,"\\"->"/"]; refer1=StringReplace[refer,"\\"->"/"]; Write[of,"",f1, "",blank[maxlen-mf],pint[lev,2],pint[ref,6], pint[cit,6],blank[1],pint[size,7],blank[2],refer1]; ,{n,mfiles}]; "
"//p; Close[s];Close[of]; (* (* ZIP files *) s=OpenWrite[batchfile,PageWidth->Infinity,FormatType->OutputForm]; Write[s,"del "<>htmlzip]; Write[s,"cd "<>dirnew]; Write[s,"zip -R "<>htmlzip<>" *"]; Close[s]; Run[batchfile]; *)