mirror of
https://gitlab.ub.uni-bielefeld.de/sfb1288inf/nopaque.git
synced 2025-06-11 08:30:41 +00:00
A better application structure
This commit is contained in:
22
app/converters/cli.py
Normal file
22
app/converters/cli.py
Normal file
@ -0,0 +1,22 @@
|
||||
import click
|
||||
from . import bp
|
||||
from .sandpaper import SandpaperConverter
|
||||
|
||||
|
||||
@bp.cli.group('converter')
|
||||
def converter():
|
||||
''' Converter commands. '''
|
||||
pass
|
||||
|
||||
@converter.group('sandpaper')
|
||||
def sandpaper_converter():
|
||||
''' Sandpaper converter commands. '''
|
||||
pass
|
||||
|
||||
@sandpaper_converter.command('run')
|
||||
@click.argument('json_db_file')
|
||||
@click.argument('data_dir')
|
||||
def run_sandpaper_converter(json_db_file, data_dir):
|
||||
''' Run the sandpaper converter. '''
|
||||
sandpaper_converter = SandpaperConverter(json_db_file, data_dir)
|
||||
sandpaper_converter.run()
|
@ -7,101 +7,106 @@ import os
|
||||
import shutil
|
||||
|
||||
|
||||
def convert(json_db_file, data_dir):
|
||||
with open(json_db_file, 'r') as f:
|
||||
json_db = json.loads(f.read())
|
||||
class SandpaperConverter:
|
||||
def __init__(self, json_db_file, data_dir):
|
||||
self.json_db_file = json_db_file
|
||||
self.data_dir = data_dir
|
||||
|
||||
for json_user in json_db:
|
||||
if not json_user['confirmed']:
|
||||
current_app.logger.info(f'Skip unconfirmed user {json_user["username"]}')
|
||||
continue
|
||||
user_dir = os.path.join(data_dir, str(json_user['id']))
|
||||
convert_user(json_user, user_dir)
|
||||
db.session.commit()
|
||||
def run(self):
|
||||
with open(self.json_db_file, 'r') as f:
|
||||
json_db = json.loads(f.read())
|
||||
|
||||
for json_user in json_db:
|
||||
if not json_user['confirmed']:
|
||||
current_app.logger.info(f'Skip unconfirmed user {json_user["username"]}')
|
||||
continue
|
||||
user_dir = os.path.join(self.data_dir, str(json_user['id']))
|
||||
self.convert_user(json_user, user_dir)
|
||||
db.session.commit()
|
||||
|
||||
|
||||
def convert_user(json_user, user_dir):
|
||||
current_app.logger.info(f'Create User {json_user["username"]}...')
|
||||
user = User(
|
||||
confirmed=json_user['confirmed'],
|
||||
email=json_user['email'],
|
||||
last_seen=datetime.fromtimestamp(json_user['last_seen']),
|
||||
member_since=datetime.fromtimestamp(json_user['member_since']),
|
||||
password_hash=json_user['password_hash'], # TODO: Needs to be added manually
|
||||
username=json_user['username']
|
||||
)
|
||||
db.session.add(user)
|
||||
db.session.flush(objects=[user])
|
||||
db.session.refresh(user)
|
||||
try:
|
||||
user.makedirs()
|
||||
except OSError as e:
|
||||
current_app.logger.error(e)
|
||||
db.session.rollback()
|
||||
raise Exception('Internal Server Error')
|
||||
for json_corpus in json_user['corpora'].values():
|
||||
if not json_corpus['files'].values():
|
||||
current_app.logger.info(f'Skip empty corpus {json_corpus["title"]}')
|
||||
continue
|
||||
corpus_dir = os.path.join(user_dir, 'corpora', str(json_corpus['id']))
|
||||
convert_corpus(json_corpus, user, corpus_dir)
|
||||
current_app.logger.info('Done')
|
||||
|
||||
|
||||
def convert_corpus(json_corpus, user, corpus_dir):
|
||||
current_app.logger.info(f'Create Corpus {json_corpus["title"]}...')
|
||||
corpus = Corpus(
|
||||
user=user,
|
||||
creation_date=datetime.fromtimestamp(json_corpus['creation_date']),
|
||||
description=json_corpus['description'],
|
||||
title=json_corpus['title']
|
||||
)
|
||||
db.session.add(corpus)
|
||||
db.session.flush(objects=[corpus])
|
||||
db.session.refresh(corpus)
|
||||
try:
|
||||
corpus.makedirs()
|
||||
except OSError as e:
|
||||
current_app.logger.error(e)
|
||||
db.session.rollback()
|
||||
raise Exception('Internal Server Error')
|
||||
for json_corpus_file in json_corpus['files'].values():
|
||||
convert_corpus_file(json_corpus_file, corpus, corpus_dir)
|
||||
current_app.logger.info('Done')
|
||||
|
||||
|
||||
def convert_corpus_file(json_corpus_file, corpus, corpus_dir):
|
||||
current_app.logger.info(f'Create CorpusFile {json_corpus_file["title"]}...')
|
||||
corpus_file = CorpusFile(
|
||||
corpus=corpus,
|
||||
address=json_corpus_file['address'],
|
||||
author=json_corpus_file['author'],
|
||||
booktitle=json_corpus_file['booktitle'],
|
||||
chapter=json_corpus_file['chapter'],
|
||||
editor=json_corpus_file['editor'],
|
||||
filename=json_corpus_file['filename'],
|
||||
institution=json_corpus_file['institution'],
|
||||
journal=json_corpus_file['journal'],
|
||||
mimetype='application/vrt+xml',
|
||||
pages=json_corpus_file['pages'],
|
||||
publisher=json_corpus_file['publisher'],
|
||||
publishing_year=json_corpus_file['publishing_year'],
|
||||
school=json_corpus_file['school'],
|
||||
title=json_corpus_file['title']
|
||||
)
|
||||
db.session.add(corpus_file)
|
||||
db.session.flush(objects=[corpus_file])
|
||||
db.session.refresh(corpus_file)
|
||||
try:
|
||||
shutil.copy2(
|
||||
os.path.join(corpus_dir, json_corpus_file['filename']),
|
||||
corpus_file.path
|
||||
def convert_user(self, json_user, user_dir):
|
||||
current_app.logger.info(f'Create User {json_user["username"]}...')
|
||||
user = User(
|
||||
confirmed=json_user['confirmed'],
|
||||
email=json_user['email'],
|
||||
last_seen=datetime.fromtimestamp(json_user['last_seen']),
|
||||
member_since=datetime.fromtimestamp(json_user['member_since']),
|
||||
password_hash=json_user['password_hash'], # TODO: Needs to be added manually
|
||||
username=json_user['username']
|
||||
)
|
||||
except:
|
||||
current_app.logger.warning(
|
||||
'Can not convert corpus file: '
|
||||
f'{os.path.join(corpus_dir, json_corpus_file["filename"])}'
|
||||
' -> '
|
||||
f'{corpus_file.path}'
|
||||
db.session.add(user)
|
||||
db.session.flush(objects=[user])
|
||||
db.session.refresh(user)
|
||||
try:
|
||||
user.makedirs()
|
||||
except OSError as e:
|
||||
current_app.logger.error(e)
|
||||
db.session.rollback()
|
||||
raise Exception('Internal Server Error')
|
||||
for json_corpus in json_user['corpora'].values():
|
||||
if not json_corpus['files'].values():
|
||||
current_app.logger.info(f'Skip empty corpus {json_corpus["title"]}')
|
||||
continue
|
||||
corpus_dir = os.path.join(user_dir, 'corpora', str(json_corpus['id']))
|
||||
self.convert_corpus(json_corpus, user, corpus_dir)
|
||||
current_app.logger.info('Done')
|
||||
|
||||
|
||||
def convert_corpus(self, json_corpus, user, corpus_dir):
|
||||
current_app.logger.info(f'Create Corpus {json_corpus["title"]}...')
|
||||
corpus = Corpus(
|
||||
user=user,
|
||||
creation_date=datetime.fromtimestamp(json_corpus['creation_date']),
|
||||
description=json_corpus['description'],
|
||||
title=json_corpus['title']
|
||||
)
|
||||
current_app.logger.info('Done')
|
||||
db.session.add(corpus)
|
||||
db.session.flush(objects=[corpus])
|
||||
db.session.refresh(corpus)
|
||||
try:
|
||||
corpus.makedirs()
|
||||
except OSError as e:
|
||||
current_app.logger.error(e)
|
||||
db.session.rollback()
|
||||
raise Exception('Internal Server Error')
|
||||
for json_corpus_file in json_corpus['files'].values():
|
||||
self.convert_corpus_file(json_corpus_file, corpus, corpus_dir)
|
||||
current_app.logger.info('Done')
|
||||
|
||||
|
||||
def convert_corpus_file(self, json_corpus_file, corpus, corpus_dir):
|
||||
current_app.logger.info(f'Create CorpusFile {json_corpus_file["title"]}...')
|
||||
corpus_file = CorpusFile(
|
||||
corpus=corpus,
|
||||
address=json_corpus_file['address'],
|
||||
author=json_corpus_file['author'],
|
||||
booktitle=json_corpus_file['booktitle'],
|
||||
chapter=json_corpus_file['chapter'],
|
||||
editor=json_corpus_file['editor'],
|
||||
filename=json_corpus_file['filename'],
|
||||
institution=json_corpus_file['institution'],
|
||||
journal=json_corpus_file['journal'],
|
||||
mimetype='application/vrt+xml',
|
||||
pages=json_corpus_file['pages'],
|
||||
publisher=json_corpus_file['publisher'],
|
||||
publishing_year=json_corpus_file['publishing_year'],
|
||||
school=json_corpus_file['school'],
|
||||
title=json_corpus_file['title']
|
||||
)
|
||||
db.session.add(corpus_file)
|
||||
db.session.flush(objects=[corpus_file])
|
||||
db.session.refresh(corpus_file)
|
||||
try:
|
||||
shutil.copy2(
|
||||
os.path.join(corpus_dir, json_corpus_file['filename']),
|
||||
corpus_file.path
|
||||
)
|
||||
except:
|
||||
current_app.logger.warning(
|
||||
'Can not convert corpus file: '
|
||||
f'{os.path.join(corpus_dir, json_corpus_file["filename"])}'
|
||||
' -> '
|
||||
f'{corpus_file.path}'
|
||||
)
|
||||
current_app.logger.info('Done')
|
||||
|
Reference in New Issue
Block a user