mirror of
				https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
				synced 2025-11-03 20:02:47 +00:00 
			
		
		
		
	NEw analysis stuff
This commit is contained in:
		@@ -44,7 +44,6 @@ class CQiWrapper(CQiClient):
 | 
			
		||||
        '''
 | 
			
		||||
        p_attrs = self.corpus_positional_attributes(self.corpus_name)
 | 
			
		||||
        struct_attrs = self.corpus_structural_attributes(self.corpus_name)
 | 
			
		||||
        logger.warning('struct_attrs: {}'.format(struct_attrs))
 | 
			
		||||
        self.attr_strings = {}
 | 
			
		||||
        self.attr_strings['positional_attrs'] = {}
 | 
			
		||||
        self.attr_strings['struct_attrs'] = {}
 | 
			
		||||
@@ -137,9 +136,15 @@ class CQiWrapper(CQiClient):
 | 
			
		||||
                                   self.attr_strings['positional_attrs']['word']
 | 
			
		||||
                              )
 | 
			
		||||
        self.nr_matches = min(result_len, self.match_count)
 | 
			
		||||
        if self.nr_matches == 0:
 | 
			
		||||
        if self.match_count == 0:
 | 
			
		||||
            logger.warning('Query resulted in 0 matches.')
 | 
			
		||||
            return None
 | 
			
		||||
            self.results = {'code': 0,
 | 
			
		||||
                            'result': {'matches': [],
 | 
			
		||||
                                       'match_count': self.match_count,
 | 
			
		||||
                                       'cpos_lookup': {},
 | 
			
		||||
                                       'text_lookup': {}}
 | 
			
		||||
                            }
 | 
			
		||||
            return self.results
 | 
			
		||||
        else:
 | 
			
		||||
            # Get match cpos boundries
 | 
			
		||||
            # match_boundries shows the start and end cpos of one match as a
 | 
			
		||||
@@ -200,7 +205,7 @@ class CQiWrapper(CQiClient):
 | 
			
		||||
                        'result': {'matches': all_matches,
 | 
			
		||||
                                   'match_count': self.match_count,
 | 
			
		||||
                                   'cpos_lookup': all_cpos_infos,
 | 
			
		||||
                                   'text_lookup': text_lookup,}
 | 
			
		||||
                                   'text_lookup': text_lookup}
 | 
			
		||||
                        }
 | 
			
		||||
        return self.results
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -20,7 +20,7 @@ corpus_analysis_clients = {}
 | 
			
		||||
@socketio.on('corpus_analysis_init')
 | 
			
		||||
@login_required
 | 
			
		||||
def init_corpus_analysis(corpus_id):
 | 
			
		||||
    logger.warning("a")
 | 
			
		||||
    logger.warning("Initiating corpus analysis.")
 | 
			
		||||
    socketio.start_background_task(corpus_analysis_session_handler,
 | 
			
		||||
                                   current_app._get_current_object(),
 | 
			
		||||
                                   corpus_id, current_user.id, request.sid)
 | 
			
		||||
@@ -45,27 +45,32 @@ def corpus_analysis(message):
 | 
			
		||||
        client.query_subcorpus(query)
 | 
			
		||||
    except Exception as e:
 | 
			
		||||
        logger.warning(e)
 | 
			
		||||
        socketio.emit('corpus_analysis', str(e), room=request.sid)
 | 
			
		||||
        response = str(e)
 | 
			
		||||
        if response == "CQI_CQP_ERROR_GENERAL":
 | 
			
		||||
            response = {'code': 1,
 | 
			
		||||
                        'result': {'matches': [],
 | 
			
		||||
                                   'match_count': 0,
 | 
			
		||||
                                   'cpos_lookup': {},
 | 
			
		||||
                                   'text_lookup': {}}
 | 
			
		||||
                            }
 | 
			
		||||
            socketio.emit('corpus_analysis_query', response, room=request.sid)
 | 
			
		||||
    else:
 | 
			
		||||
        logger.warning('====== Initial query {} ======'.format(query))
 | 
			
		||||
        results = client.show_query_results(result_len=result_len,
 | 
			
		||||
                                            context_len=context_len,
 | 
			
		||||
                                            result_offset=result_offset)
 | 
			
		||||
        result_offset += result_len  # initial offfset is plus result len because client.show_query_results has been already executed once
 | 
			
		||||
        socketio.emit('corpus_analysis', results, room=request.sid)
 | 
			
		||||
        response = client.show_query_results(result_len=result_len,
 | 
			
		||||
                                             context_len=context_len,
 | 
			
		||||
                                             result_offset=result_offset)
 | 
			
		||||
        result_offset += result_len  # initial offset is plus result len because client.show_query_results has already been executed once
 | 
			
		||||
        socketio.emit('corpus_analysis_query', response, room=request.sid)
 | 
			
		||||
        while result_offset < client.match_count:
 | 
			
		||||
            logger.warning('====== While loop start for {} ======'.format(query))
 | 
			
		||||
            logger.warning('result_offset: {}'.format(result_offset))
 | 
			
		||||
            results = client.show_query_results(result_len=result_len,
 | 
			
		||||
                                                context_len=context_len,
 | 
			
		||||
                                                result_offset=result_offset)
 | 
			
		||||
            # results['matches'].extend(results_append['matches'])
 | 
			
		||||
            # results['cpos_lookup'].update(results_append['cpos_lookup'])
 | 
			
		||||
            # results['text_lookup'].update(results_append['text_lookup'])
 | 
			
		||||
            response = client.show_query_results(result_len=result_len,
 | 
			
		||||
                                                 context_len=context_len,
 | 
			
		||||
                                                 result_offset=result_offset)
 | 
			
		||||
            result_offset += result_len
 | 
			
		||||
            result_offset = min(result_offset, client.match_count)
 | 
			
		||||
            logger.warning('result_offset end of while loop: {}'.format(result_offset))
 | 
			
		||||
            socketio.emit('corpus_analysis', results, room=request.sid)
 | 
			
		||||
            socketio.emit('corpus_analysis_query', response, room=request.sid)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@socketio.on('inspect_match')
 | 
			
		||||
 
 | 
			
		||||
@@ -224,12 +224,17 @@
 | 
			
		||||
                                {"dismissible": true});
 | 
			
		||||
    M.Collapsible.init(elem, {accordion: false});
 | 
			
		||||
    loadingModal.open();
 | 
			
		||||
    nopaque.socket.emit("request_corpus_analysis", {{ corpus_id }});
 | 
			
		||||
    nopaque.socket.emit("corpus_analysis_init", {{ corpus_id }});
 | 
			
		||||
  });
 | 
			
		||||
 | 
			
		||||
  // close loading modal if container for analysis has started
 | 
			
		||||
  nopaque.socket.on("request_corpus_analysis", function(msg) {
 | 
			
		||||
    if (msg === "[201]: Created") {loadingModal.close();}
 | 
			
		||||
  nopaque.socket.on("corpus_analysis_init", function(response) {
 | 
			
		||||
    if (response.code === 201) {
 | 
			
		||||
      loadingModal.close();
 | 
			
		||||
    } else {
 | 
			
		||||
      console.log("[ERROR] corpus_analysis_init");
 | 
			
		||||
      console.log(response);
 | 
			
		||||
    }
 | 
			
		||||
  });
 | 
			
		||||
 | 
			
		||||
  // exper view stuff reuse maybe and REMOVE later
 | 
			
		||||
@@ -275,9 +280,9 @@
 | 
			
		||||
  queryFormElement.addEventListener("submit", function(event) {
 | 
			
		||||
    event.preventDefault();
 | 
			
		||||
    let formData = new FormData(queryFormElement);
 | 
			
		||||
    let queryData = {"context": formData.get("context"),
 | 
			
		||||
                     "hits_per_page": formData.get("hits_per_page"),
 | 
			
		||||
                     "query": formData.get("query")};
 | 
			
		||||
    queryData = {"context": formData.get("context"), // global declaration
 | 
			
		||||
                 "hits_per_page": formData.get("hits_per_page"),
 | 
			
		||||
                 "query": formData.get("query")};
 | 
			
		||||
    hitsPerPage = formData.get("hits_per_page");
 | 
			
		||||
    nopaque.socket.emit("corpus_analysis", queryData);
 | 
			
		||||
    // full results object declaration, kind of global maybe store it later?
 | 
			
		||||
@@ -318,7 +323,24 @@
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  // socket.on triggered when result chunks are recieved
 | 
			
		||||
  nopaque.socket.on("corpus_analysis", function(response) {
 | 
			
		||||
  nopaque.socket.on("corpus_analysis_query", function(response) {
 | 
			
		||||
    // ERROR code checking
 | 
			
		||||
    if (response["code"] === 0) {
 | 
			
		||||
      console.log("[ERROR] corpus_analysis_init");
 | 
			
		||||
      console.log("Code:" + response["code"]);
 | 
			
		||||
      // further code execution of this code block starting in line 342
 | 
			
		||||
    } else if (response["code"] === 1) {
 | 
			
		||||
      queryResultsTableElement.classList.add("hide");
 | 
			
		||||
      queryLoadingElement.classList.add("hide");
 | 
			
		||||
      nopaque.toast("Invalid query entered!", "red");
 | 
			
		||||
      console.log("[SUCCESS] corpus_analysis_init");
 | 
			
		||||
      console.log("Code:" + response["code"]);
 | 
			
		||||
      return; // no further code execution of this code block
 | 
			
		||||
    } else {
 | 
			
		||||
      console.log("[ERROR] corpus_analysis_init");
 | 
			
		||||
      console.log("Code:" + response["code"]);
 | 
			
		||||
      return; // no further code execution of this code block
 | 
			
		||||
    }
 | 
			
		||||
    // logs the current recieved chunk
 | 
			
		||||
    chunk = response["result"];
 | 
			
		||||
    console.log("### corpus_analysis chunk ###");
 | 
			
		||||
@@ -330,62 +352,56 @@
 | 
			
		||||
    Object.assign(result["cpos_lookup"], chunk["cpos_lookup"]);
 | 
			
		||||
    Object.assign(result["text_lookup"], chunk["text_lookup"]);
 | 
			
		||||
    result["match_count"] = chunk["match_count"];
 | 
			
		||||
    result["query"] = chunk["query"];
 | 
			
		||||
    result["query"] = queryData["query"];
 | 
			
		||||
    console.log(result);
 | 
			
		||||
    // Some hiding and showing of loading animations
 | 
			
		||||
    queryLoadingElement.classList.add("hide");
 | 
			
		||||
    queryResultsTableElement.classList.remove("hide");
 | 
			
		||||
    queryResultsElement.innerHTML = "";
 | 
			
		||||
 | 
			
		||||
    // some checks for erroneous or empty query results
 | 
			
		||||
    // No results for this query
 | 
			
		||||
    if (chunk === null) {
 | 
			
		||||
    // check if query has any results
 | 
			
		||||
    if (chunk["matches"].length === 0) {
 | 
			
		||||
      queryResultsTableElement.classList.add("hide");
 | 
			
		||||
      nopaque.toast("No results for this query!");
 | 
			
		||||
      return;
 | 
			
		||||
    // Query was invalid
 | 
			
		||||
    } else if (chunk === "CQI_CQP_ERROR_GENERAL") {
 | 
			
		||||
        queryResultsTableElement.classList.add("hide");
 | 
			
		||||
        nopaque.toast("Invalid query entered!", "red");
 | 
			
		||||
        return;
 | 
			
		||||
    // List building/appending the chunks when query had results
 | 
			
		||||
      } else {
 | 
			
		||||
        // write metadata query information into HTML elements
 | 
			
		||||
        // like nr. of all matches in how many files etc.
 | 
			
		||||
        // TODO: count_corpus_files müssen aus full results genommen werden.
 | 
			
		||||
        match_count = chunk["match_count"];
 | 
			
		||||
        let count_corpus_files = Object.keys(result["text_lookup"]).length;
 | 
			
		||||
        queryResultsMetadataElement.innerHTML = chunk["match_count"] + " matches in " + count_corpus_files + " corpus files.";
 | 
			
		||||
        queryResultsMetadataElement.appendChild(exportQueryResults);
 | 
			
		||||
        exportQueryResults.classList.remove("hide");
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
        var toAdd = [];
 | 
			
		||||
        for (let [index, match] of chunk["matches"].entries()) {
 | 
			
		||||
          lc_tokens = "";
 | 
			
		||||
          for (cpos of match["lc"]) {
 | 
			
		||||
            word = chunk["cpos_lookup"][cpos]["word"];
 | 
			
		||||
            lc_tokens += " " + word;
 | 
			
		||||
          }
 | 
			
		||||
          // console.log(lc_tokens);
 | 
			
		||||
          hit_tokens = "";
 | 
			
		||||
          for (cpos of match["hit"]) {
 | 
			
		||||
            word = chunk["cpos_lookup"][cpos]["word"];
 | 
			
		||||
            hit_tokens += " " + word;
 | 
			
		||||
          }
 | 
			
		||||
          // console.log(hit_tokens);
 | 
			
		||||
          rc_tokens = "";
 | 
			
		||||
          for (cpos of match["rc"]) {
 | 
			
		||||
            word = chunk["cpos_lookup"][cpos]["word"];
 | 
			
		||||
            rc_tokens += " " + word;
 | 
			
		||||
          }
 | 
			
		||||
          // console.log(rc_tokens);
 | 
			
		||||
          item = { titles: "test", lc: lc_tokens, hit: hit_tokens, rc: rc_tokens };
 | 
			
		||||
          toAdd.push(item);
 | 
			
		||||
        }
 | 
			
		||||
        resultList.add(toAdd, function(toAdd) {console.log('All '
 | 
			
		||||
                                                           + toAdd.length
 | 
			
		||||
                                                           +  ' results were added!')});
 | 
			
		||||
        }
 | 
			
		||||
    // List building/appending the chunks when query had results
 | 
			
		||||
    // write metadata query information into HTML elements
 | 
			
		||||
    // like nr. of all matches in how many files etc.
 | 
			
		||||
    // TODO: count_corpus_files müssen aus full results genommen werden.
 | 
			
		||||
    match_count = chunk["match_count"];
 | 
			
		||||
    let count_corpus_files = Object.keys(result["text_lookup"]).length;
 | 
			
		||||
    queryResultsMetadataElement.innerHTML = chunk["match_count"] + " matches in " + count_corpus_files + " corpus files.";
 | 
			
		||||
    queryResultsMetadataElement.appendChild(exportQueryResults);
 | 
			
		||||
    exportQueryResults.classList.remove("hide");
 | 
			
		||||
 | 
			
		||||
    var toAdd = [];
 | 
			
		||||
    for (let [index, match] of chunk["matches"].entries()) {
 | 
			
		||||
      lc_tokens = "";
 | 
			
		||||
      for (cpos of match["lc"]) {
 | 
			
		||||
        word = chunk["cpos_lookup"][cpos]["word"];
 | 
			
		||||
        lc_tokens += " " + word;
 | 
			
		||||
      }
 | 
			
		||||
      // console.log(lc_tokens);
 | 
			
		||||
      hit_tokens = "";
 | 
			
		||||
      for (cpos of match["hit"]) {
 | 
			
		||||
        word = chunk["cpos_lookup"][cpos]["word"];
 | 
			
		||||
        hit_tokens += " " + word;
 | 
			
		||||
      }
 | 
			
		||||
      // console.log(hit_tokens);
 | 
			
		||||
      rc_tokens = "";
 | 
			
		||||
      for (cpos of match["rc"]) {
 | 
			
		||||
        word = chunk["cpos_lookup"][cpos]["word"];
 | 
			
		||||
        rc_tokens += " " + word;
 | 
			
		||||
      }
 | 
			
		||||
      // console.log(rc_tokens);
 | 
			
		||||
      item = { titles: "test", lc: lc_tokens, hit: hit_tokens, rc: rc_tokens };
 | 
			
		||||
      toAdd.push(item);
 | 
			
		||||
    }
 | 
			
		||||
    resultList.add(toAdd, function(toAdd) {console.log('All '
 | 
			
		||||
                                                       + toAdd.length
 | 
			
		||||
                                                       +  ' results were added!')});
 | 
			
		||||
      });
 | 
			
		||||
 | 
			
		||||
    // Function to download data to a file
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user