Lightbox Info Board : SQLの窓

using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Net; using System.Text.RegularExpressions; using System.IO; namespace RegexTest { class Program { static void Main(string[] args) { // インターネットアクセス WebClient client = new WebClient(); client.Encoding = Encoding.UTF8; //client.Encoding = Encoding.GetEncoding("shift_jis"); //client.Encoding = Encoding.GetEncoding("euc-jp"); string result = client.DownloadString("http://gigazine.net/"); // 書き込み用テキストファイルの準備 FileStream fs = new FileStream("result.txt",FileMode.Create,FileAccess.Write); StreamWriter sw = new StreamWriter(fs,Encoding.GetEncoding("shift_jis")); sw.WriteLine("SHIFT_JIS で書き込んでいます"); MatchCollection mc = Regex.Matches(result, "(https?://.+?)[\"']?[;)>\\s]"); foreach (Match match in mc) { sw.WriteLine( match.Groups[1] ); } sw.Close(); sw.Dispose(); fs.Close(); fs.Dispose(); } } }

<?php header( "Content-Type: text/html; Charset=utf-8" ); header( "pragma: no-cache" ); header( "Expires: Wed, 31 May 2000 14:59:58 GMT" ); header( "Cache-control: no-cache" ); //Cache-Control: private, no-store, no-cache, must-revalidate $result = file_get_contents("http://gigazine.net/"); preg_match_all("|(https?://.+?)[\"']?[;)>\\s]|u", $result, $matches, PREG_PATTERN_ORDER ); print "<pre>"; foreach( $matches[1] as $value ) { print($value . "\n"); } print "</pre>"; ?>

import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;


public class HttpGetAndRegex {

	/**
	 * @param args
	 */
	public static void main(String[] args) {
		
		try {
			URL url = new URL("http://gigazine.net/");
			// 接続オブジェクト
			HttpURLConnection http = (HttpURLConnection)url.openConnection();
			http.setRequestMethod("GET");
			// 接続 
			http.connect();
			
			// http から InputStream を取得する
			InputStream i_stream = http.getInputStream();
			
			// InputStream から リーダを作成する( キャラクタセットを指定 )
			// UTF-8 でリーダーを作成( インターネット上のデータが UTF-8 なので )
			InputStreamReader i_stream_reader = new InputStreamReader(i_stream, "UTF-8");
			
			// リーダを行単位で読み込める BufferedReader を使って全ての文字列を取得する )
			BufferedReader buffer_reader = new BufferedReader(i_stream_reader);
			
			String result_string = "";
			String line_buffer = null;   
			// BufferedReader は、readLine が null を返すと読み込み終了   
			while ( null != (line_buffer = buffer_reader.readLine() ) ) {   
				result_string += line_buffer;
			}
			
			// 全て閉じる
			buffer_reader.close();
			i_stream_reader.close();
			i_stream.close();
			http.disconnect();
			
			// **************************************************
			// 書き出し用テキストファイルの用意
			// http://docs.oracle.com/javase/jp/7/technotes/guides/intl/encoding.doc.html
			// **************************************************
			PrintWriter pw = new PrintWriter(".\\result.txt", "SHIFT_JIS");
//			PrintWriter pw = new PrintWriter(".\\result.txt", "EUC-JP");
			pw.println( "SHIFT_JIS で書き込んでいます" );
			
			// **************************************************
			// 正規表現による検索開始
			// **************************************************
			String regex = "(https?://.+?)[\"']?[;)>\\s]";
			Pattern pattern = Pattern.compile(regex);

			Matcher matcher = pattern.matcher(result_string);
			while(matcher.find()){
				System.out.println(matcher.group(1));
				pw.println( matcher.group(1) );
			}
			
			pw.flush();
			pw.close();
			
		}
		catch( Exception e ) {
			e.printStackTrace();
		}
		

	}

}