Main Page | File List

store.c

00001 #include <db.h> 
00002 #include <stdio.h>
00003 #include <stdlib.h>
00004 #include <string.h>
00005 #include <math.h>
00006 #include "store.h"
00007 
00008 //permanent database
00009 DB *database=NULL;
00010 //temporary learning database
00011 DB *tempDB = NULL;
00012 //stored returned values
00013 unsigned char ret[4];
00014 unsigned int valeur[2];
00015 
00016 /*
00017 * Open the permanent database.
00018 * @param dbname name of the permanent database
00019 */
00020 void store_opendb(char *dbname)
00021 {
00022   int ret;
00023   u_int32_t flags;
00024   
00025   if(database==NULL){
00026 
00027     ret = db_create(&database, NULL, 0);
00028     if (ret != 0){
00029       fprintf(stderr, "Error while creating database\n");
00030       exit(0);
00031     }
00032     
00033     database->set_cachesize(database,0,(unsigned int)270384*270384,1);
00034     flags = DB_CREATE;
00035     
00036     ret = database->open(database,       
00037                          NULL,       
00038                          dbname, 
00039                          0,       
00040                          DB_BTREE,   
00041                          flags,     
00042                          0);  
00043     if(ret != 0){
00044       fprintf(stderr, "Error while opening database");
00045     }
00046   }
00047 }
00048 
00049 /*
00050 * Open the temporary database and then call store_opendb to open the permanent one.
00051 * @param dbname name of the permanent database
00052 */
00053 void store_opendbs(char *dbname)
00054 {
00055     int ret;
00056     u_int32_t flags;
00057 
00058     flags = DB_CREATE|DB_TRUNCATE;
00059     ret = db_create(&tempDB, NULL, 0);
00060     if (ret != 0){
00061     fprintf(stderr, "Error while creating database\n");
00062     exit(0);
00063     }
00064 
00065     tempDB->set_cachesize(tempDB,0,(unsigned int)270384*270384,1);
00066     ret = tempDB->open(tempDB,
00067                        NULL,
00068                        NULL,
00069                        0,
00070                        DB_BTREE,
00071                        flags,
00072                        0);
00073 
00074     if(ret != 0){
00075       fprintf(stderr, "Error while opening basetemp");
00076     }
00077 
00078     store_opendb(dbname);
00079 }
00080 
00081 /*
00082 * Display all the tokens stored in the permanent database.
00083 */
00084 void store_displayTokens()
00085 {     
00086   int ret;
00087   unsigned int *porn;
00088   DBT key, data;
00089   DBC *cursorp;
00090   
00091   database->cursor(database, NULL, &cursorp, 0);  
00092   
00093   memset(&key, 0, sizeof(DBT));
00094   memset(&data, 0, sizeof(DBT));    
00095   
00096   while ((ret = cursorp->c_get(cursorp, &key, &data, DB_NEXT)) == 0) {
00097     porn = to_int(data.data);
00098     printf("%-45s : porn : %5u non porn %5u\n", (char*)key.data, porn[0], porn[1]);
00099   }
00100   
00101   if (cursorp != NULL) 
00102     cursorp->c_close(cursorp); 
00103 }
00104 
00105 /*
00106 * Get the number of "bad" and "good" occurences of a token in the permanent database.
00107 * @param token the token to get the score from
00108 * @return data.data the number of occurences
00109 */
00110 unsigned int* store_getScore(char *token)
00111 {     
00112   int ret;
00113   DBT key, data;
00114   
00115   memset(&key, 0, sizeof(DBT));
00116   memset(&data, 0, sizeof(DBT));    
00117  
00118   key.data = token;
00119   key.size = strlen(token)+1;
00120 
00121   ret = database->get(database, NULL, &key, &data, 0);
00122 
00123   if(ret == DB_NOTFOUND){
00124      valeur[0] = 0;
00125      valeur[1] = 0;
00126      return (valeur);
00127   }else
00128     return (to_int(data.data));
00129 }
00130 
00131 /*
00132 * Store a token in thye temporary learning database (or learning tokens are counted just once per page).
00133 * @param token the token to store
00134 * @param type the type of token (tag, word, biword or domain)
00135 */
00136 void store_storeTempToken(char *token, int type)
00137 {
00138  int ret;
00139  DBT key, data;
00140  unsigned char *val=NULL;
00141  char *debut = NULL;
00142  char *chaine = NULL;
00143 
00144  switch(type){
00145  case TAGS :
00146    debut = malloc(sizeof(char)*(strlen("tag:")+1));
00147    strcpy(debut, "tag:");
00148    break;
00149  case WORDS :
00150    debut = malloc(sizeof(char)*(strlen("word:")+1));
00151    strcpy(debut, "word:");
00152    break;
00153  case BIWORDS :
00154    debut = malloc(sizeof(char)*(strlen("biword:")+1));
00155    strcpy(debut, "biword:");
00156    break;
00157  case DOMAINS :
00158    debut = malloc(sizeof(char)*(strlen("domain:")+1));
00159    strcpy(debut, "domain:");
00160    break;
00161  default :
00162    debut =  malloc(sizeof(char)*(strlen("")+1));
00163    strcpy(debut, "");
00164    break;
00165  }
00166 
00167  memset(&key, 0, sizeof(DBT));
00168  memset(&data, 0, sizeof(DBT));
00169 
00170  chaine = malloc(sizeof(char)*(strlen(token)+strlen(debut)+1));
00171  strcpy(chaine, debut);
00172  strcat(chaine, token);
00173 
00174  key.data = chaine;
00175  key.size = strlen(chaine)+1;
00176 
00177  val = to_hex(1, 0);
00178 
00179  data.data = val;
00180  data.size = 4;
00181 
00182  ret = tempDB->put(tempDB, NULL, &key, &data, DB_NOOVERWRITE);
00183 
00184  if (debut != NULL) {free(debut); debut=NULL;}
00185  else printf("var debut NULL in store_storeToken");
00186  if(chaine != NULL) {free(chaine); chaine=NULL;}
00187  else printf("var chaine NULL in store_storeToken\n");
00188 }
00189 
00190 /*
00191 * Store all the token from the temporary learning database to the permanent one.
00192 * @param isporn (1 = "is porn", 0 = "is not porn")
00193 */
00194 void store_storeAll(int isporn)
00195 {
00196   int retb;
00197   DBT key, data;
00198   DBC *cursorp;
00199 
00200   tempDB->cursor(tempDB, NULL, &cursorp, 0);
00201   memset(&key, 0, sizeof(DBT));
00202   memset(&data, 0, sizeof(DBT));
00203 
00204   while ((retb = cursorp->c_get(cursorp, &key, &data, DB_NEXT)) == 0) {
00205         store_storeToken(key.data, isporn);
00206         cursorp->c_del(cursorp, 0);
00207   }
00208 
00209   if (cursorp != NULL)
00210     cursorp->c_close(cursorp);
00211 
00212 }
00213 
00214 /*
00215 * Store a token in the permanent database.
00216 * @param token the token to store
00217 * @param isporn (1 = "is porn", 0 = "is not porn") 
00218 */
00219 void store_storeToken(char *token, int isporn)
00220 {
00221  int ret; 
00222  DBT key, data;
00223  unsigned int *tmp; 
00224  unsigned char *val=NULL;     
00225 
00226  memset(&key, 0, sizeof(DBT));
00227  memset(&data, 0, sizeof(DBT));    
00228  
00229  key.data = token;
00230  key.size = strlen(token)+1;
00231  
00232  val = to_hex(isporn, 1 - isporn);
00233 
00234  data.data = val;
00235  data.size = 4; 
00236   
00237  ret = database->put(database, NULL, &key, &data, DB_NOOVERWRITE);
00238  if (ret == DB_KEYEXIST) {
00239    database->get(database, NULL, &key, &data, 0);
00240    tmp = to_int(data.data);
00241    val = to_hex(tmp[0]+isporn, tmp[1]+1-isporn);
00242    data.data = val;
00243    data.size = 4;
00244    database->put(database, NULL, &key, &data, 0);
00245  }
00246 }
00247 
00248 /*
00249 * Close the permanent and temporary databases
00250 */
00251 void store_closedb()
00252 {  
00253   if (database != NULL)
00254     database->close(database, 0);
00255   if (tempDB != NULL)
00256     tempDB->close(tempDB, 0); 
00257 }
00258 
00259 /*
00260 * Convert a pair of int values to an hex one (4 bytes) to store in the database
00261 * @param nbp number of porn occurences of a token
00262 * @param nbn number of non porn occurences of the same token
00263 * @return ret the converted value
00264 */
00265 unsigned char* to_hex(unsigned int nbp, unsigned int nbn)
00266 {
00267   ret[0] = (nbp/256);
00268   ret[1] = (nbp%256);
00269 
00270   ret[2] = (nbn/256);
00271   ret[3] = (nbn%256);
00272 
00273   return ret;
00274 }
00275 
00276 /*
00277 * Convert a hex value (4 bytes) to a pair of int values
00278 * @param conv hex value to convert
00279 * @return valeur returned array
00280 */
00281 unsigned int* to_int(unsigned char *conv)
00282 {
00283   valeur[0] = conv[0]*256 + conv[1];
00284   valeur[1] = conv[2]*256 + conv[3];
00285 
00286   return(valeur);
00287 }

Generated on Tue May 31 14:22:44 2005 for filterFlex by  doxygen 1.3.9.1