Add more metadate to corpus files

This commit is contained in:
Stephan Porada 2020-01-08 16:02:42 +01:00
parent 5e1ab758bc
commit 8e02c2fd14
4 changed files with 102 additions and 9 deletions

View File

@ -5,12 +5,20 @@ from wtforms.validators import DataRequired, Length
class AddCorpusFileForm(FlaskForm): class AddCorpusFileForm(FlaskForm):
author = StringField('Author', validators=[DataRequired(), Length(1, 64)]) address = StringField('Adress', validators=[Length(0, 255)])
author = StringField('Author', validators=[DataRequired(), Length(1, 255)])
booktitle = StringField('Booktitle', validators=[Length(0, 255)])
chapter = StringField('Chapter', validators=[Length(0, 255)])
editor = StringField('Editor', validators=[Length(0, 255)])
file = FileField('File', validators=[DataRequired()]) file = FileField('File', validators=[DataRequired()])
publishing_year = IntegerField('Publishing year', institution = StringField('institution', validators=[Length(0, 255)])
validators=[DataRequired()]) journal = StringField('Journal', validators=[Length(0, 255)])
pages = StringField('pages', validators=[Length(0, 255)])
publisher = StringField('Publisher', validators=[Length(0, 255)])
publishing_year = IntegerField('Publishing year', validators=[DataRequired()])
school = StringField('School', validators=[Length(0, 255)])
submit = SubmitField() submit = SubmitField()
title = StringField('Title', validators=[DataRequired(), Length(1, 64)]) title = StringField('Title', validators=[DataRequired(), Length(1, 255)])
def validate_file(form, field): def validate_file(form, field):
if not field.data.filename.lower().endswith('.vrt'): if not field.data.filename.lower().endswith('.vrt'):

View File

@ -369,11 +369,20 @@ class CorpusFile(db.Model):
__tablename__ = 'corpus_files' __tablename__ = 'corpus_files'
# Primary key # Primary key
id = db.Column(db.Integer, primary_key=True) id = db.Column(db.Integer, primary_key=True)
author = db.Column(db.String(64)) address = db.Column(db.String(255))
author = db.Column(db.String(255))
booktitle = db.Column(db.String(255))
chapter = db.Column(db.String(255))
dir = db.Column(db.String(255)) dir = db.Column(db.String(255))
editor = db.Column(db.String(255))
filename = db.Column(db.String(255)) filename = db.Column(db.String(255))
institution = db.Column(db.String(255))
journal = db.Column(db.String(255))
pages = db.Column(db.String(255))
publisher = db.Column(db.String(255))
publishing_year = db.Column(db.Integer) publishing_year = db.Column(db.Integer)
title = db.Column(db.String(64)) school = db.Column(db.String(255))
title = db.Column(db.String(255))
corpus_id = db.Column(db.Integer, db.ForeignKey('corpora.id')) corpus_id = db.Column(db.Integer, db.ForeignKey('corpora.id'))
def delete(self): def delete(self):
@ -394,8 +403,17 @@ class CorpusFile(db.Model):
self.dir, self.filename) self.dir, self.filename)
element_tree = ET.parse(file) element_tree = ET.parse(file)
text_node = element_tree.find('text') text_node = element_tree.find('text')
text_node.set('address', self.address)
text_node.set('author', self.author) text_node.set('author', self.author)
text_node.set('booktitle', self.booktitle)
text_node.set('chapter', self.chapter)
text_node.set('editor', self.editor)
text_node.set('institution', self.institution)
text_node.set('journal', self.journal)
text_node.set('pages', self.pages)
text_node.set('publisher', self.publisher)
text_node.set('publishing_year', str(self.publishing_year)) text_node.set('publishing_year', str(self.publishing_year))
text_node.set('school', self.school)
text_node.set('title', self.title) text_node.set('title', self.title)
element_tree.write(file) element_tree.write(file)
self.corpus.status = 'unprepared' self.corpus.status = 'unprepared'

View File

@ -7,9 +7,10 @@
</div> </div>
<div class="col s12 m8"> <div class="col s12 m8">
<form method="POST" enctype="multipart/form-data">
<div class="card"> <div class="card">
<form method="POST" enctype="multipart/form-data">
<div class="card-content"> <div class="card-content">
<span class="card-title">Required metadata</span>
{{ add_corpus_file_form.hidden_tag() }} {{ add_corpus_file_form.hidden_tag() }}
<div class="row"> <div class="row">
<div class="col s12 m4"> <div class="col s12 m4">
@ -61,7 +62,29 @@
<div class="card-action right-align"> <div class="card-action right-align">
<button class="btn waves-effect waves-light" id="submit" name="submit" type="submit">Submit<i class="material-icons right">send</i></button> <button class="btn waves-effect waves-light" id="submit" name="submit" type="submit">Submit<i class="material-icons right">send</i></button>
</div> </div>
</form> </div>
</div> <br>
<ul class="collapsible hoverable">
<li>
<div class="collapsible-header"><i class="material-icons">add</i>Add additional metadata</div>
<div class="collapsible-body">
<span>
<div class="row">
<div class="col s12">
<div class="input-field">
<i class="material-icons prefix">person</i>
{{ add_corpus_file_form.author(data_length='64') }}
{{ add_corpus_file_form.author.label }}
{% for error in add_corpus_file_form.author.errors %}
<span class="helper-text red-text">{{ error }}</span>
{% endfor %}
</div>
</div>
</div>
</span>
</div>
</li>
</ul>
</form>
</div> </div>
{% endblock %} {% endblock %}

View File

@ -0,0 +1,44 @@
"""empty message
Revision ID: ded5a37f8a7b
Revises: 776761fb7466
Create Date: 2020-01-08 14:39:32.182439
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = 'ded5a37f8a7b'
down_revision = '776761fb7466'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('corpus_files', sa.Column('address', sa.String(length=255), nullable=True))
op.add_column('corpus_files', sa.Column('booktitle', sa.String(length=255), nullable=True))
op.add_column('corpus_files', sa.Column('chapter', sa.String(length=255), nullable=True))
op.add_column('corpus_files', sa.Column('editor', sa.String(length=255), nullable=True))
op.add_column('corpus_files', sa.Column('institution', sa.String(length=255), nullable=True))
op.add_column('corpus_files', sa.Column('journal', sa.String(length=255), nullable=True))
op.add_column('corpus_files', sa.Column('pages', sa.String(length=255), nullable=True))
op.add_column('corpus_files', sa.Column('publisher', sa.String(length=255), nullable=True))
op.add_column('corpus_files', sa.Column('school', sa.String(length=255), nullable=True))
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column('corpus_files', 'school')
op.drop_column('corpus_files', 'publisher')
op.drop_column('corpus_files', 'pages')
op.drop_column('corpus_files', 'journal')
op.drop_column('corpus_files', 'institution')
op.drop_column('corpus_files', 'editor')
op.drop_column('corpus_files', 'chapter')
op.drop_column('corpus_files', 'booktitle')
op.drop_column('corpus_files', 'address')
# ### end Alembic commands ###