'' order by cl_sub_cd"); for ($i=0;$i input parameter <-- // output : raw, mil // sortby : freq, perc, alph // p : word(s) // w2 : context // wl : left context // wr : rifht context // freq1 : support frequentcy //init sum stat $tmp=query("select sum(wrd_cnt) as sum_wrd_cnt from wrd_stat"); $sum_wrd_cnt['ALL']=$tmp[0]['sum_wrd_cnt']; if (request('output')=='mil'){ //show stat in MIL $tmp=query("select cl_genre, sum(wrd_cnt) as sum_wrd_cnt from wrd_stat group by cl_genre;"); for ($i=0;$i$val){ if ($data[$wrd_id]['TOT']>=$freq1){ if ($_POST['sortby']=="alph") $aux[$wrd_id]=$data[$wrd_id]['wrd_txt']; elseif ($_POST['sortby']=="perc") $aux[$wrd_id]=$data[$wrd_id]['TOT']; } } //step C : sort if ($_POST['sortby']=="alph") asort($aux); elseif ($_POST['sortby']=="perc") arsort($aux); //print_x($aux);exit; //step D : sum grand total + filter out LIMIT $i=0; foreach ($aux as $wrd_id=>$val){ if (++$i<=$limit){ foreach ($data[$wrd_id] as $a=>$b){ $data['GRAND'][$a]+=$data[$wrd_id][$a]; } }else{ unset ($aux[$wrd_id]); } } } //******************* //case of context else{ $tmp=query("select wrd_id from wrd where wrd_txt like '$p'"); $context_flg=true; if (count($tmp)>1){ //unaccepted case echo "result > 1, error"; } if (count($tmp)==0){// not found data case echo "data not found"; } if (count($tmp)==1) { //valid case $wrd_id=$tmp[0]['wrd_id']; //get total occurrence $sql="select sum(wrd_cnt) as count from wrd_stat b where true ".$_SESSION['filter_sql_1']." ".$_SESSION['filter_sql_2']; $tmp=query1($sql); $total_occurrence=$tmp['count']+0.000001; //get total word //$sql="select count(distinct(wrd_id)) as count from wrd_stat b //where true ".$_SESSION['filter_sql_1']." ".$_SESSION['filter_sql_2']; //$tmp=query1($sql); //$total_word=$tmp['count']+0.000001; //get stat of main word $sql="select sum(wrd_cnt) as count from wrd a left join wrd_stat b using (wrd_id) where wrd_txt like '$p' ".$_SESSION['filter_sql_1']." ".$_SESSION['filter_sql_2']; $tmp=query1($sql); $main_count=$tmp['count']+0.000001; //get stat of collocations $sql=" select b.ctx_id, a.wrd_txt wrd_txt, b.cl_genre, b.ctx_freq total from precache_ctx b, wrd a where b.wrd_id=$wrd_id and b.wrd_dis in(-$wl,$wr) and b.ctx_id=a.wrd_id ". $_SESSION['filter_sql_1']." ".$_SESSION['filter_sql_2']." and a.wrd_txt like '$w2'"; //echo $sql,"
";exit; $tmp = mysql_query($sql) or die("Could not connect: " . mysql_error()); //$tmp=query($sql); //print_x($tmp); //step A : select count for each genre + sum total while ($tmp2=mysql_fetch_array($tmp,MYSQL_ASSOC)){ $data[$tmp2['ctx_id']]['wrd_txt']=$tmp2['wrd_txt']; $data[$tmp2['ctx_id']][$tmp2['cl_genre']]+=$tmp2['total']; $data[$tmp2['ctx_id']]['TOT']+=$tmp2['total']; } //step B : filter out FREQ1 + calculate relavent foreach ($data as $wrd_id=>$val){ if ($data[$wrd_id]['TOT']>=$freq1){ $tmp=query1("select sum(wrd_cnt) as x from wrd_stat b where wrd_id=$wrd_id ". $_SESSION['filter_sql_1']." ".$_SESSION['filter_sql_2']); $data[$wrd_id]['ALL']=$tmp['x']; //CO Collocations $data[$wrd_id]['CO']=$data[$wrd_id]['TOT']/$tmp['x']; //MI n(a and b) * total_word / ( n(a) * n(b) * span ) $data[$wrd_id]['MI']=log( $data[$wrd_id]['TOT']*$total_occurrence/($tmp['x']*$main_count*($wl+$wr)) , 2); //Dunning's Likelihood $data[$wrd_id]['DL']=0; $C12 = $data[$wrd_id]['TOT']; $C1 = $tmp['x']; $C2 = $main_count; $a = $C12; $b = $C1 - $C12; $c = $C2 - $C12; //$d = $total_word - $C1 - $C2 + $C12; $d = $total_occurrence - $C1 - $C2 + $C12; $ll = $a*log($a) + $b*log($b) + $c*log($c) + $d*log($d) - ($a+$b)*log($a+$b) - ($a+$c)*log($a+$c) - ($b+$d)*log($b+$d) - ($c+$d)*log($c+$d) + ($a+$b+$c+$d)*log($a+$b+$c+$d); $data[$wrd_id]['DL'] = 2*$ll; // $p = $C2/$total_word; // $p1 = $C12/$C1; // $p2 = ($C2-$C12)/($total_word - $C1); // if ($p == 1) { $p = 0.9999999; } // if ($p1 == 1) { $p1 = 0.9999999; } // if ($p2 == 0) { $p2 = 0.0000001; } // $likelihood = ($C12*log($p) + ($C1-$C12) * log (1-$p)) + // ( ($C2-$C12) * log($p)+ ($Totalword-$C1-$C2+$C12)*log(1-$p) ) - // ($C12*log($p1) + ($C1-$C12)*log(1-$p1)) - // ( ($C2-$C12) * log($p2)+ ($Totalword-$C1-$C2+$C12)*log(1-$p2) ) ; // $likelihood = -2 * $likelihood; // $data[$wrd_id]['DL']=$likelihood; //choose stat if ($_REQUEST['stat']=='Mutual Information'){ $data[$wrd_id]['REL']=$data[$wrd_id]['MI']; }else{ $data[$wrd_id]['REL']=$data[$wrd_id]['DL']; } if ($_POST['sortby']=="alph") $aux[$wrd_id]=$data[$wrd_id]['wrd_txt']; elseif ($_POST['sortby']=="perc") $aux[$wrd_id]=$data[$wrd_id]['REL']; }else{ unset($data[$wrd_id]); } } //step C : sort if ($_POST['sortby']=="alph") asort($aux); elseif ($_POST['sortby']=="perc") arsort($aux); //step D : sum grand total + filter out LIMIT $i=0; foreach ($aux as $wrd_id=>$val){ if (++$i<=$limit){ foreach ($data[$wrd_id] as $a=>$b){ $data['GRAND'][$a]+=$data[$wrd_id][$a]; } }else{ unset ($aux[$wrd_id]); } } //print_x($data); } //exit; } function sort_perc($a, $b) { return ($b["relevance"]<$a["relevance"])?-1:1; } function sort_alph($a, $b) { return strcoll($a["word"],$b["word"]); } ?>

 

Type your word (in Thai) in the textbox below

 

COLLOCATE (Please click for enable)

MIN FREQ:

Filter

GENRE:

DOMAIN:

Display

Output:

Sort:

 


Result

 

$cl_genre_nm) {?> $val){ if (!$context_flg){ $param="p=".$data[$wrd_id]['wrd_txt']."&r="; }else{ $param="p=$p&w2=".$data[$wrd_id]['wrd_txt']."&wl=$wl&wr=$wr&r="; $param_all="p=".$data[$wrd_id]['wrd_txt']."&r="; } ?>
TOT ALL % DL