| | import json |
| | from collections import defaultdict |
| | import jsonlines |
| |
|
| | subsets = ['train', 'val', 'test'] |
| | savepath = "flickr30k/annotations" |
| |
|
| | set2jsonline = { |
| | 'train': 'flickr30k/all_data_final_train_2014.jsonline', |
| | 'val': 'flickr30k/all_data_final_val_set0_2014.jsonline', |
| | 'test': 'flickr30k/all_data_final_test_set0_2014.jsonline', |
| | } |
| |
|
| | import os |
| | if not os.path.exists(savepath): |
| | os.makedirs(savepath) |
| |
|
| |
|
| | savename = { |
| | 'train': "flickr30k/captions_train.json", |
| | 'val': "flickr30k/captions_val.json", |
| | 'test': "flickr30k/captions_test.json", |
| | } |
| |
|
| | |
| | |
| |
|
| | for subset in subsets: |
| | imagefield = [] |
| | annotaionfiled = [] |
| | sen_id = 0 |
| | with jsonlines.open(set2jsonline[subset]) as reader: |
| | for annotation in reader: |
| | sentences = annotation["sentences"] |
| | image_id = annotation["img_path"] |
| | imagefield.append({ |
| | "filename": annotation["img_path"], |
| | "id": annotation['id'], |
| | }) |
| | for sentence in sentences: |
| | annotaionfiled.append({ |
| | "image_id": annotation['id'], |
| | "id": sen_id, |
| | "caption": sentence, |
| | }) |
| | sen_id += 1 |
| | |
| | data = { |
| | "images": imagefield, |
| | "annotations": annotaionfiled, |
| | } |
| | json.dump( data, open(savename[subset], "w")) |
| | |