用户:Wikibot
来自Ubuntu中文
机器人,自动将 http://help.ubuntu.com 和 http://wiki.ubuntu.com 由 monimoni 格式转换到 mediawiki 格式,并自动更新和发布的小程序。 由java写成。
/*
* Main.java * * Created on 2007年5月12日, 下午1:31 * * To change this template, choose Tools | Template Manager * and open the template in the editor. */
package wiki;
import java.io.BufferedInputStream; import java.io.BufferedReader; import java.io.DataOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.FileReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStream; import java.io.OutputStreamWriter; import java.io.PrintWriter; import java.io.UnsupportedEncodingException; import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.URL; import java.net.URLConnection; import java.net.URLEncoder; import java.security.GeneralSecurityException; import java.security.Security; import java.security.cert.X509Certificate; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Vector; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.net.ssl.HostnameVerifier; import javax.net.ssl.HttpsURLConnection; import javax.net.ssl.SSLContext; import javax.net.ssl.SSLSession; import javax.net.ssl.X509TrustManager;
/**
* * @author oneleaf */
public class Main {
List <String> addDict= new Vector<String>(); List <String> oldDict= new Vector<String>(); String cookie=getCookie(); private String getCookie(){ String cookie = ""; try{ URL httpurl = new URL("http://wiki.ubuntu.org.cn/index.php?title=Special:Userlogin"); HttpURLConnection httpConn = (HttpURLConnection)httpurl.openConnection(); httpConn.addRequestProperty("Cookie",cookie); httpConn.setRequestProperty("User-Agent","Mozilla/4.0 (compatible; MSIE 6.0; Windows 2000)"); httpConn.setRequestProperty("Content-Type", "application/x-www-form-urlencoded"); httpConn.setRequestProperty("Connection", "Keep-Alive"); httpConn.setUseCaches(false); cookie=httpConn.getHeaderField("Set-Cookie"); String data="wpName=wikibot&wpPassword=********&wpRemember=1"; httpurl = new URL("http://wiki.ubuntu.org.cn/index.php?title=Special:Userlogin&action=submitlogin&type=login"); httpConn = (HttpURLConnection)httpurl.openConnection(); httpConn.setRequestMethod("POST"); httpConn.addRequestProperty("Cookie",cookie); httpConn.setRequestProperty("User-Agent","Mozilla/4.0 (compatible; MSIE 6.0; Windows 2000)"); httpConn.setRequestProperty("Content-Type", "application/x-www-form-urlencoded"); httpConn.setRequestProperty("Content-Language","UTF-8" ); httpConn.setRequestProperty("Content-Length", ""+data.getBytes().length); httpConn.setRequestProperty("Connection", "Keep-Alive"); httpConn.setDoOutput(true); httpConn.setDoInput(true); httpConn.setUseCaches(false); DataOutputStream outStream = new DataOutputStream(httpConn.getOutputStream()); outStream.writeBytes(data); outStream.flush(); outStream.close(); List <String> list=httpConn.getHeaderFields().get("Set-Cookie"); for (int i=0;i<list.size();i++){ cookie=cookie+"; "+list.get(i); }
// Iterator iter=httpConn.getHeaderFields().keySet().iterator(); // while (iter.hasNext()){ // String key=(String) iter.next(); // List list=httpConn.getHeaderFields().get(key); // System.out.print(key+": "); // for (int i=0;i<list.size();i++){ // System.out.print(list.get(i)); // } // System.out.print("\n"); // } // // cookie=httpConn.getHeaderField("Set-Cookie"); // System.out.println("Cookie_2:"+cookie);
// BufferedReader in = new BufferedReader(new InputStreamReader(httpConn.getInputStream()));
// String line;
// while ((line = in.readLine())!= null){
// System.out.println(line);
// // result += line+"\n";
// }
// in.close();
} catch (Exception ex){ ex.printStackTrace(); } return cookie; } private void addDict(String dict){ String str=dict.trim(); if (dict.startsWith("/")){ str=dict.substring(1); }else if (dict.indexOf("#")>0){ str=dict.substring(0,dict.indexOf("#")); }else if (dict.indexOf("?")>0){ str=dict.substring(0,dict.indexOf("?")); }else if (dict.startsWith("./")){ str=dict.substring(2); }else if (dict.startsWith("../")){ str=dict.substring(3); } if (str.toLowerCase().indexOf("team")>0) return; if (str.trim().length()==0) return; if (str.trim().length()>=256) return; //../CommandLine if (oldDict.contains(str)) return; if (addDict.contains(str)) return; addDict.add(str); } private void delDict(int dictindex){ oldDict.add(addDict.get(dictindex)); addDict.remove(dictindex); } private void clearDict(){ addDict.clear(); oldDict.clear(); } private void getDicts(String html){ Pattern pattern= Pattern.compile("\\[UbuntuHelp:(.*?)\\]"); Matcher matcher=pattern.matcher(html); while(matcher.find()) { String line=matcher.group(1); if (line.indexOf("|")>0){ addDict(line.substring(0,line.indexOf("|"))); }else{ addDict(line); } } } /** Creates a new instance of Main */ public Main() { SSLContext sslContext = null; try { sslContext = SSLContext.getInstance("TLS"); X509TrustManager[] xtmArray = new X509TrustManager[] { xtm }; sslContext.init(null, xtmArray, new java.security.SecureRandom()); } catch(GeneralSecurityException gse) { } if(sslContext != null) { HttpsURLConnection.setDefaultSSLSocketFactory(sslContext.getSocketFactory()); } HttpsURLConnection.setDefaultHostnameVerifier(hnv); } private X509TrustManager xtm = new X509TrustManager() { public void checkClientTrusted(X509Certificate[] chain, String authType) {} public void checkServerTrusted(X509Certificate[] chain, String authType) {} public X509Certificate[] getAcceptedIssuers() { return null; } }; private HostnameVerifier hnv = new HostnameVerifier() { public boolean verify(String hostname, SSLSession session) { return true; } }; public String getUrl(String urladdress,String dict) throws IOException{ URL url=new URL(urladdress); HttpURLConnection httpConn = (HttpURLConnection) url.openConnection(); httpConn.setReadTimeout(60000); httpConn.setRequestProperty("User-Agent","Mozilla/4.0 (compatible; MSIE 6.0; Windows 2000)"); httpConn.setRequestProperty("Content-Language","UTF-8" ); httpConn.setRequestProperty("Connection", "Keep-Alive"); InputStream in=httpConn.getInputStream(); BufferedReader read=new BufferedReader(new InputStreamReader(in)); StringBuffer sb=new StringBuffer(); while (true){ String line=read.readLine(); if (line==null) break; sb.append(line+"\r\n"); } return moin2wm(sb.toString(),urladdress,dict); } public String moin2wm(String html,String url,String dict) throws UnsupportedEncodingException{ String text=html; String ex="UbuntuHelp"; String turl="https://help.ubuntu.com/community/"; if (url.startsWith("https://wiki")){ ex="UbuntuWiki"; turl="https://wiki.ubuntu.com/"; }String head="\r\n
\r\n";
//删除注释 text=text.replaceAll("\r\n##(.*)",""); text=text.replaceAll("\r\n#format(.*)",""); text=text.replaceAll("\r\n#language(.*)",""); text=text.replaceAll("\r\n#pragma(.*)",""); text=text.replaceAll("\r\n#acl(.*)",""); text=text.replaceAll("^##(.*)\r\n",""); text=text.replaceAll("^#format(.*)\r\n",""); text=text.replaceAll("^#language(.*)\r\n",""); text=text.replaceAll("^#pragma(.*)\r\n",""); text=text.replaceAll("^#acl(.*)\r\n",""); //替换#REDIRECT PDFPrinting => #REDIRECT PDFPrinting text=text.replaceAll("#REDIRECT (\\S*)","#REDIRECT "+""+ex+":$1"); text=text.replaceAll("#redirect (\\S*)","#REDIRECT "+""+ex+":$1"); //#refresh 0 https://wiki.ubuntu.com/ASUS_A3H_5010_Laptop_with_Ubuntu text=text.replaceAll("#REFRESH (.*?) (\\S*)","#REDIRECT "+""+ex+":$2"); text=text.replaceAll("#refresh (.*?) (\\S*)","#REDIRECT "+""+ex+":$2"); //删除主题 text=text.replaceAll(".*TableOfContents.*",""); //标题从二开始 text=text.replaceAll("= (.*?) =","== $1 =="); //转化List text=replaceList(text); //BR ->//link convert - \r\n {{{ * -> \r\n
text=text.replaceAll("\\[\\[BR\\]\\]","
"); //link convert superscripted - ^ * ^ -> * text=text.replaceAll("\\^(.*)\\^","$1"); //link convert subscripted - ,, * ,, -> * text=text.replaceAll(",,(.*?),,","$1"); //link convert - [" * "] -> UbuntuHelp: * text=text.replaceAll("\\[\"(.*?)\"\\]",""+ex+":$1"); //link convert - [# * ] -> * text=text.replaceAll("\\[#(.*?)\\]","$1"); //link convert - [: / * : * ] -> * text=text.replaceAll("\\[:/(.*?):(.*?)\\]","$2"); //link convert - [: * : * ] -> * text=text.replaceAll("\\[:(.*?):(.*?)\\]","$2"); //link convert - [: / * ] -> UbuntuHelp: dict * text=text.replaceAll("\\[:/(.*?)\\]",""+ex+":"+dict+"/$1"); //link convert - [: * ] -> UbuntuHelp: * text=text.replaceAll("\\[:(.*?)\\]",""+ex+":$1"); //link convert - wiki:cat -> UbuntuWiki:cat text=text.replaceAll(" wiki:(\\S*)"," UbuntuWiki:$1"); text=text.replaceAll("\r\nwiki:(\\S*)","\r\nUbuntuWiki:$1"); //link convert - [wiki:cat * ] -> * text=text.replaceAll("\\[wiki:(.*?)\\ (.*?)\\]","$2"); //link convert - [wiki:cat * ] -> * text=text.replaceAll("\\[wiki:(.*?)\\]","UbuntuWiki:$1"); //link convert - [UbuntuWiki:\*] -> [UbuntuWiki:dict\*] text=text.replaceAll("\\[UbuntuWiki:\\\\(.*?)\\]","[UbuntuWiki:"+dict+"\\$1]"); //link convert - [UbuntuHelp:\*] -> [UbuntuHelp:dict\*] text=text.replaceAll("\\[UbuntuHelp:\\\\(.*?)\\]","[UbuntuHelp:"+dict+"\\$1]"); //link convert - __ * __ -> * text=text.replaceAll("__(.*?)__","$1"); //link convert - {{{ * }}} -> <code> * text=text.replaceAll("\\{\\{\\{(.*?)\\}\\}\\}","$1
");
* text=text.replaceAll("\r\n([ \\.]*?)\\{\\{\\{(.*)","\r\n<pre><nowiki>$2"); //link convert - {{{ * -> <pre><nowiki> * text=text.replaceAll("\\{\\{\\{(.*)","\r\n<pre><nowiki>$1"); //link convert - * }}} -> * <\pre><\nowiki> text=text.replaceAll("(.*?)\\}\\}\\}","$1");
//CategoryHomepage =>; text=text.replaceAll("Category(\\S*)",""); text=text.replaceAll("\r\n( *)","\r\n"); text=replaceUrl(text,turl,dict); text=tableConv(text); //xxx:http => http: text=text.replaceAll("\\[\\[(.*?):http(.*?)\\]\\]","http$2"); //xxx:ftp => ftp: text=text.replaceAll("\\[\\[(.*?):ftp(.*?)\\]\\]","ftp$2"); //[[1]] => [[UbuntuWiki:]] text=text.replaceAll("\\[\\[2]\\]","UbuntuWiki:$1"); //[[3]] => [[UbuntuHelp:]] text=text.replaceAll("\\[\\[4]\\]","UbuntuHelp:$1"); //[[5]] => [[UbuntuWiki:]] text=text.replaceAll("\\[\\[6]\\]","UbuntuWiki:$1"); //[[7]] => [[UbuntuHelp:]] text=text.replaceAll("\\[\\[8]\\]","UbuntuHelp:$1"); String foot="\r\n"; if (text.trim().startsWith("#REDIRECT")){ System.out.print(dict + " is redirect :"+text.trim()); return text+head+foot; } if (text.trim().length()<10){ return ""; } return head+text+foot; } public String replaceList(String text){ text=text.replaceAll("\r\n \\. (.*?)","\r\n* $1"); text=text.replaceAll("\r\n \\. (.*?)","\r\n** $1"); text=text.replaceAll("\r\n \\. (.*?)","\r\n*** $1"); text=text.replaceAll("\r\n \\. (.*?)","\r\n**** $1"); text=text.replaceAll("\r\n \\. (.*?)","\r\n***** $1"); text=text.replaceAll("\r\n \\. (.*?)","\r\n****** $1"); text=text.replaceAll("\r\n \\*(.*?)","\r\n*$1"); text=text.replaceAll("\r\n \\*(.*?)","\r\n**$1"); text=text.replaceAll("\r\n \\*(.*?)","\r\n***$1"); text=text.replaceAll("\r\n \\*(.*?)","\r\n****$1"); text=text.replaceAll("\r\n \\*(.*?)","\r\n*****$1"); text=text.replaceAll("\r\n \\*(.*?)","\r\n******$1");
// text=text.replaceAll("\r\n \\. (.*?)","\r\n#: $1"); // text=text.replaceAll("\r\n \\. (.*?)","\r\n##: $1"); // text=text.replaceAll("\r\n \\. (.*?)","\r\n###: $1"); // text=text.replaceAll("\r\n \\. (.*?)","\r\n####: $1"); // text=text.replaceAll("\r\n \\. (.*?)","\r\n#####: $1"); // text=text.replaceAll("\r\n \\. (.*?)","\r\n######: $1"); // text=text.replaceAll("\r\n \\*(.*?)","\r\n#$1"); // text=text.replaceAll("\r\n \\*(.*?)","\r\n##$1"); // text=text.replaceAll("\r\n \\*(.*?)","\r\n###$1"); // text=text.replaceAll("\r\n \\*(.*?)","\r\n####$1"); // text=text.replaceAll("\r\n \\*(.*?)","\r\n#####$1"); // text=text.replaceAll("\r\n \\*(.*?)","\r\n######$1");
for (int i=1;i<20;i++){
// text=text.replaceAll("\r\n "+i+"\\.(.*?)","\r\n#$1"); // text=text.replaceAll("\r\n "+i+"\\.(.*?)","\r\n##$1"); // text=text.replaceAll("\r\n "+i+"\\.(.*?)","\r\n###$1"); // text=text.replaceAll("\r\n "+i+"\\.(.*?)","\r\n####$1"); // text=text.replaceAll("\r\n "+i+"\\.(.*?)","\r\n#####$1"); // text=text.replaceAll("\r\n "+i+"\\.(.*?)","\r\n######$1");
text=text.replaceAll("\r\n "+i+"\\.(.*?)","\r\n*$1"); text=text.replaceAll("\r\n "+i+"\\.(.*?)","\r\n**$1"); text=text.replaceAll("\r\n "+i+"\\.(.*?)","\r\n***$1"); text=text.replaceAll("\r\n "+i+"\\.(.*?)","\r\n****$1"); text=text.replaceAll("\r\n "+i+"\\.(.*?)","\r\n*****$1"); text=text.replaceAll("\r\n "+i+"\\.(.*?)","\r\n******$1"); } return text; } public String replaceUrl(String text,String baseurl,String dict) throws UnsupportedEncodingException{ //attachment:IconsPage/info.png -> while (true){ Pattern pattern= Pattern.compile("attachment:(.*?)/(\\S*)"); Matcher matcher=pattern.matcher(text); String replace; if (matcher.find()) { replace=baseurl+matcher.group(1)+"?action=AttachFile&do=get&target="+URLEncoder.encode(matcher.group(2), "UTF-8"); text=text.substring(0,matcher.start(0))+replace+text.substring(matcher.end(0)); continue; } pattern= Pattern.compile("attachment:(\\S*)"); matcher=pattern.matcher(text); if(matcher.find()) { replace=baseurl+dict+"?action=AttachFile&do=get&target="+URLEncoder.encode(matcher.group(1), "UTF-8"); text=text.substring(0,matcher.start(0))+replace+text.substring(matcher.end(0)); continue; } break; } return text; } public String tableConv(String html){ //||a||b||c|| -> {| //||d||e||f|| |a||b||c // |- // |d||e||f // |} String[] lines=html.split("\r\n"); String block = ""; StringBuffer bf= new StringBuffer(); boolean start=false; for (int i=0;i<lines.length;i++){ String line=lines[i].trim(); if (line.startsWith("||")){ if (! start){ start=true; String str=line.substring(1,line.length()-2); str=str.replaceAll("<bgcolor.*?>",""); str=str.replaceAll("<style.*?>",""); str=str.replaceAll("<rowbgcolor.*?>",""); block="{|border=\"1\" cellspacing=\"0\"\r\n"+str; }else{ String str=line.substring(1,line.length()-2); str=str.replaceAll("<bgcolor.*?>",""); str=str.replaceAll("<style.*?>",""); str=str.replaceAll("<rowbgcolor.*?>",""); block=block+"\r\n|-\r\n"+str; } }else{ if (start){ block=block+"\r\n|}\r\n"; bf.append(block); start=false; } bf.append(lines[i]+"\r\n"); } } if (start){ block=block+"\r\n|}\r\n"; bf.append(block); start=false; } return bf.toString(); } public void putText(String dict,String html,String surl) throws MalformedURLException, IOException{ URL url; URLConnection conn; InputStream in; BufferedReader read; StringBuffer sb; if (html.length()<5){ System.out.print(" is short:"+html); return; }
// if (html.length()<300){ // if (html.toUpperCase().trim().indexOf("REFRESH")>0) { // System.out.println(dict+" is REFRESH."); // return; // } // if (html.toUpperCase().trim().indexOf("REDIRECT")>0) { // System.out.println(dict+" is REDIRECT."); // return; // } // }
String ex="UbuntuHelp"; if (surl.startsWith("https://wiki")){ ex="UbuntuWiki"; } try{ url=new URL("http://wiki.ubuntu.org.cn/"+ex+":"+dict+"?action=raw"); conn = url.openConnection(); conn.setReadTimeout(60000); conn.setRequestProperty("Cookie", cookie); in=conn.getInputStream(); read=new BufferedReader(new InputStreamReader(in)); sb=new StringBuffer(); while (true){ String line=read.readLine(); if (line==null) break; sb.append(line+"\r\n"); } if (sb.toString().trim().length()==html.trim().length()) { System.out.print(" no changes"); return; } }catch(Exception ex0){ //nothing } url=new URL("http://wiki.ubuntu.org.cn/"+ex+":"+dict+"?action=edit"); conn = url.openConnection(); conn.setReadTimeout(60000); conn.setRequestProperty("Connection", "Keep-Alive"); conn.setRequestProperty("Cookie", cookie); conn.setRequestProperty("User-Agent", "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.1.3) Gecko/20061201 Firefox/2.0.0.3 (Ubuntu-feisty)"); conn.setRequestProperty("Accept","text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5"); in=conn.getInputStream(); read=new BufferedReader(new InputStreamReader(in)); sb=new StringBuffer(); while (true){ String line=read.readLine(); if (line==null) break; sb.append(line+"\r\n"); } int start=sb.indexOf("<form id=\"editform\"");int end=sb.indexOf("
String from; try{ from=sb.substring(start,end); }catch(Exception ex0){ System.out.println(dict+" error,please set cookie!"); System.out.println(sb); return; } Map<String,String> map=getPostDate(from); Iterator<String> iterator=map.keySet().iterator(); url = new URL("http://wiki.ubuntu.org.cn/index.php?title="+ex+":"+URLEncoder.encode(dict, "UTF-8")+"&action=submit"); HttpURLConnection conn2 = (HttpURLConnection) url.openConnection(); String boundary="---------------------------167593640336579986891120154"; conn2.setReadTimeout(60000); conn2.setDoOutput(true); conn2.setAllowUserInteraction(false); conn2.setRequestMethod("POST"); conn2.setRequestProperty("Cookie", cookie); conn2.setRequestProperty("Content-Type", "multipart/form-data; boundary="+boundary); conn2.setRequestProperty("User-Agent", "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.1.3) Gecko/20061201 Firefox/2.0.0.3 (Ubuntu-feisty)"); conn2.setRequestProperty("Accept","text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5"); conn2.setRequestProperty("Accept-Language", "zh-cn,zh;q=0.5"); conn2.setRequestProperty("Referer", "http://wiki.ubuntu.org.cn/index.php?title=UbuntuHelp:"+URLEncoder.encode(dict, "UTF-8")+"&action=edit"); conn2.setRequestProperty("Accept-Charset", "UTF-8,*"); conn2.setRequestProperty("Connection", "Keep-Alive"); StringBuffer content=new StringBuffer(); while (iterator.hasNext()){ String name=iterator.next(); if (name.equals("wpPreview")) continue; if (name.equals("wpDiff")) continue; if (name.equals("wpWatchthis")) continue; String value=""; if (name.equals("wpTextbox1")){ value=html; }else { value=map.get(name); } content.append("--"+boundary+"\r\n"); content.append("Content-Disposition: form-data; name=\""+name+"\"\r\n\r\n"); content.append(value+"\r\n"); } content.append("--"+boundary+"--\r\n\r\n"); byte[] data=content.toString().getBytes(); conn2.setRequestProperty("Content-Length", String.valueOf(data.length)); OutputStream out = conn2.getOutputStream(); out.write(data); out.flush(); // Get the response try{ BufferedReader rd = new BufferedReader(new InputStreamReader(conn2.getInputStream())); String line; while ((line = rd.readLine()) != null) { //System.out.println(line); // Nothing break; } rd.close(); }catch(Exception ex0){ System.out.println(" add. but has a error:"+ex0.getMessage()); out.close(); return; } out.close();
// System.out.println(dict+" add.");
}; private Map<String,String> getPostDate(String from){ Map<String,String> map=new HashMap<String,String>(); Pattern pattern= Pattern.compile("<input(.*?)/>"); Matcher matcher=pattern.matcher(from); Pattern pname=Pattern.compile("name=[\"|'](.*?)[\"|']"); Pattern pvalue=Pattern.compile("value=[\"|'](.*?)[\"|']"); while(matcher.find()) { String name,value; String input=matcher.group(1); Matcher mname=pname.matcher(input); Matcher mvalue=pvalue.matcher(input); if (mname.find()){ name=mname.group(1); }else{ continue; }; if (mvalue.find()){ value=mvalue.group(1); }else{ if (input.indexOf("checkbox")>0){ value="0"; }else{ value=""; } }; map.put(name,value); }
// int start=from.indexOf("cols='80' style=\"width:100%\" >"); // int end=from.indexOf("</textarea>"); // // map.put("wpTextbox1",from.substring(start,end));
map.put("wpTextbox1",""); return map; } public void helpstart() throws IOException{ clearDict();
// String dict="community/"; // String out = getUrl("https://help.ubuntu.com/"+dict+"?action=raw",dict); // getDicts(out); // putText(dict,out,"https://help.ubuntu.com/community/");
InputStream in=null; File saveFile=new File("/tmp/helpindex.html"); if (saveFile.exists()){ in=new FileInputStream(saveFile); }else{ URL url=new URL("https://help.ubuntu.com/community/TitleIndex"); in=url.openConnection().getInputStream(); } BufferedReader read=new BufferedReader(new InputStreamReader(in)); StringBuffer sb=new StringBuffer(); while (true){ String line=read.readLine(); if (line==null) break; sb.append(line+"\r\n"); } read.close(); if (!saveFile.exists()){ FileOutputStream out=new FileOutputStream(saveFile); out.write(sb.toString().getBytes()); out.flush(); out.close(); }
// URL url=new URL("https://help.ubuntu.com/community/TitleIndex"); // InputStream in=url.openConnection().getInputStream(); // BufferedReader read=new BufferedReader(new InputStreamReader(in)); // StringBuffer sb=new StringBuffer(); // while (true){ // String line=read.readLine(); // if (line==null) break; // sb.append(line+"\r\n"); // }
String html=sb.substring(sb.indexOf("<a name=\"3\">"),sb.indexOf("<a name=\"%5b\">")); Pattern pattern= Pattern.compile("<a href=\"/community/(.*?)\">"); Matcher matcher=pattern.matcher(html); while(matcher.find()) { String input=matcher.group(1); addDict(input); System.out.println(input); } String dict; String out; System.out.println("一共需要转换 "+String.valueOf(addDict.size())+" 篇文章。"); while (addDict.size()>0){
// if (addDict.size()==0) break; // dict=addDict.get(addDict.size()-1);
dict=addDict.get(0); System.out.print(String.valueOf(addDict.size())+" "+dict); try{
// if (addDict.size()>1500) continue;
try { System.out.print(" read"); out=getUrl("https://help.ubuntu.com/community/"+dict+"?action=raw",dict); System.out.print(" ."); }catch(Exception ex){ try { System.out.print(" read again"); out=getUrl("https://help.ubuntu.com/community/"+dict+"?action=raw",dict); System.out.print(" ."); }catch(Exception ex2){ continue; } } try { System.out.print(" get dict"); getDicts(out); System.out.print(" . put"); putText(dict,out,"https://help.ubuntu.com/community/"+dict); System.out.print(" .\r\n"); }catch(Exception ex){ System.out.println(dict+" error:"+ex.getMessage()); continue; } }finally{ delDict(0); } } } public void wikistart() throws MalformedURLException, IOException{ clearDict(); InputStream in=null; File saveFile=new File("/tmp/wikiindex.html"); if (saveFile.exists()){ in=new FileInputStream(saveFile); }else{ URL url=new URL("https://wiki.ubuntu.com/TitleIndex"); in=url.openConnection().getInputStream(); } BufferedReader read=new BufferedReader(new InputStreamReader(in)); StringBuffer sb=new StringBuffer(); while (true){ String line=read.readLine(); if (line==null) break; sb.append(line+"\r\n"); } read.close(); if (!saveFile.exists()){ FileOutputStream out=new FileOutputStream(saveFile); out.write(sb.toString().getBytes()); out.flush(); out.close(); } String html=sb.substring(sb.indexOf("<a name=\"0\">"),sb.indexOf("<a name=\"%5b\">")); Pattern pattern= Pattern.compile("<a href=\"/(.*?)\">"); Matcher matcher=pattern.matcher(html); while(matcher.find()) { String input=matcher.group(1); addDict(input); System.out.println(input); } String dict; String out; System.out.println("一共需要转换 "+String.valueOf(addDict.size())+" 篇文章。"); while (addDict.size()>0){ dict=addDict.get(0); System.out.print(String.valueOf(addDict.size())+" "+dict); try{
// if (addDict.size()>12285) continue;
try { System.out.print(" read"); out=getUrl("https://wiki.ubuntu.com/"+dict+"?action=raw",dict); System.out.print(" ."); }catch(Exception ex){ ex.printStackTrace(); try { System.out.print(" read again"); out=getUrl("https://wiki.ubuntu.com/"+dict+"?action=raw",dict); System.out.print(" ."); }catch(Exception ex2){ ex2.printStackTrace(); continue; } } try { System.out.print(" get dict"); getDicts(out); System.out.print(" . put"); putText(dict,out,"https://wiki.ubuntu.com/"+dict); System.out.print(" .\r\n"); }catch(Exception ex){ System.out.println(dict+" error:"+ex.getMessage()); } }finally{ delDict(0); } } } public void test() throws IOException{
// String sss="\r\ndd attachment:IconsPage/info.png ClamAV can only\r\n"; // sss=sss.replaceAll("attachment:(.*?)/(.*?) ","https://help.ubuntu.com/community/$1?action=AttachFile&do=get&target=$2 "); // System.out.println(sss); // String dict="RestrictedFormats"; // String out=getUrl("https://help.ubuntu.com/community/"+dict+"?action=raw",dict); // System.out.println(out); // String text="d CategoryHome dd"; // text=text.replaceAll("Category(\\S*)",""); // System.out.println(text); // String out=tableConv("dddd\r\n||xxx||nnn||ddd||\r\n||dd||xxdee||dd||\r\nxdd"); // System.out.println(out); // String text="#title User Documentation\r\n##Please discuss major/structural changes to this page on the Documentation team mailing list at: http://lists.ubuntu.com/mailman/listinfo/ubuntu-doc\r\n##If you want to get involved with editing and organising the Wiki please visit DocumentationTeam.\r\n## This page is designed to remain mostly static - make and propose changes to the pages that are linked to from this page\r\n## For help on contributing to the wiki, see the WikiGuide\r\n||<tablestyl"; // text=text.replaceAll("\r\n#(.*)",""); // text=text.replaceAll("^#(.*?)\r\n",""); // System.out.println(text); // String text="sss\r\ndddf{{{dxx\r\n}}}\r\n . {{{ddd}}}\r\n .{{{ddd}}}\r\n{{{de}}}";
// //link convert - \r\n {{{ * -> \r\n* // text=text.replaceAll("\r\n([ \\.]*?)\\{\\{\\{(.*)","\r\n<pre><nowiki>$2"); //link convert - {{{ * -> <pre><nowiki> * // System.out.println(text); // text=text.replaceAll("\r\n([ \\.]*?)\\{\\{\\{(.*)","<pre><nowiki>$2"); //link convert - {{{ * -> <pre><nowiki> * // text=text.replaceAll("\\{\\{\\{(.*)","\r\n<pre><nowiki>$1"); // //link convert - * }}} -> * <\pre><\nowiki> // text=text.replaceAll("(.*?)\\}\\}\\}","$1");
// // System.out.println(text); // getCookie();
// String url="http://bingniu.3322.org/mywiki/OpenLDAPAdminGuide/SecurityConsideration"; // String out=getUrl(url+"?action=raw","UbuntuManual"); // System.out.println(out);
// BufferedReader read=new BufferedReader(new FileReader("/home/wangpian/a1.txt")); // String s=""; // StringBuffer str=new StringBuffer(); // while (true){ // s=read.readLine(); // if (s==null)break; // str.append(s+"\r\n"); // } // s=str.toString(); // s=s.replaceAll("\\[\\[\\[.*?\\]\\]\\]",""); // // System.out.println(s.replaceAll("\\[\\[UbuntuHelp(.*?)\\|(.*?)\\]\\]","$2"));
} /** * @param args the command line arguments */ public static void main(String[] args) { // TODO code application logic here Main main=new Main(); try {
// main.test(); // main.helpstart();
main.wikistart(); }catch (Exception ex){ ex.printStackTrace(); } }
}
</code>