{"@context":"http://iiif.io/api/presentation/3/context.json","id":"https://2020clirevents.aviaryplatform.com/iiif/cn6xw48940/manifest","type":"Manifest","label":{"en":["Combo Session: #sustainability #technology"]},"logo":"https://d9jk7wjtjpu5g.cloudfront.net/organizations/logo_images/000/000/118/original/CLIRforSocial_round.jpg?1604439777","metadata":[{"label":{"en":["Agent"]},"value":{"en":["Yinlin Chen","James Tuttle","Alex Kinnaman","Bethany Scott","Zhiwu Xie"]}},{"label":{"en":["Date"]},"value":{"en":["2020-11-12"]}},{"label":{"en":["Language"]},"value":{"en":["English (Primary)"]}},{"label":{"en":["Description"]},"value":{"en":["\u003cp\u003eVirginia Tech University Libraries is developing a cloud-based, serverless, microservice application to support digital asset management, preservation, and access. This presentation will outline the balance of cost-effectiveness and creating a trustworthy platform while relying on the cloud.\u003c/p\u003e","\u003cp\u003eThe University of Houston Libraries has completed an estimate of its digital preservation program's electricity usage and carbon footprint. Building on this research, I investigate the carbon footprint and general sustainability issues for various offsite/cloud storage providers, and highlight areas for future research and advocacy.\u003c/p\u003e","\u003cp\u003eWe analyze the reuse inefficiency of digital preservation systems and trace the root causes to data file formats and their design rationales. The archival formats need to be less human-centric and more efficient for machine consumption.\u003c/p\u003e"]}},{"label":{"en":["Presentation Slides"]},"value":{"en":["\u003cp\u003e\u003ca href=\"https://drive.google.com/file/d/1GFrPzw4KrFLUBzkAgZMmubHEVX8iyY2z/view?usp=drivesdk\"\u003e00-Chen_Kinnaman_Tuttle_DigiPres2020.pptx\u003c/a\u003e\u003cbr\u003e\u003ca href=\"https://drive.google.com/file/d/1FouwSsbOj5VNNoGcX3Bn08LaKpVsUeQO/view?usp=drivesdk\"\u003e01-DigiPres-2020-bscott.pptx\u003c/a\u003e\u003cbr\u003e\u003ca href=\"https://drive.google.com/file/d/1FvlGhMpnB9RY_NT3-s4OyyCxhjXPKfXz/view?usp=drivesdk\"\u003e02-2020-DigiPres-Xie.pptx\u003c/a\u003e\u003c/p\u003e"]}},{"label":{"en":["External Transcripts"]},"value":{"en":["\u003cp\u003e\u003ca href=\"https://drive.google.com/file/d/1FoIW5UXlAImfs-RUXUb1xckjdQeHYCBr/view?usp=drivesdk\"\u003e00-Chen_Kinnaman_Tuttle_DigiPres2020.docx\u003c/a\u003e\u003cbr\u003e\u003ca href=\"https://drive.google.com/file/d/1FVzgG818dXkNBVMc1iRSj9CA83Yd05Kz/view?usp=drivesdk\"\u003e01-digipres_2020_bscott.docx\u003c/a\u003e\u003cbr\u003e\u003ca href=\"https://drive.google.com/file/d/1FcsgXwKv3f7D7oeFGyA24NXSK47DHLXF/view?usp=drivesdk\"\u003e02-2020-DigiPres-Xie.docx\u003c/a\u003e\u003c/p\u003e"]}}],"summary":{"en":["\u003cp\u003eVirginia Tech University Libraries is developing a cloud-based, serverless, microservice application to support digital asset management, preservation, and access. This presentation will outline the balance of cost-effectiveness and creating a trustworthy platform while relying on the cloud.\u003c/p\u003e","\u003cp\u003eThe University of Houston Libraries has completed an estimate of its digital preservation program's electricity usage and carbon footprint. Building on this research, I investigate the carbon footprint and general sustainability issues for various offsite/cloud storage providers, and highlight areas for future research and advocacy.\u003c/p\u003e","\u003cp\u003eWe analyze the reuse inefficiency of digital preservation systems and trace the root causes to data file formats and their design rationales. The archival formats need to be less human-centric and more efficient for machine consumption.\u003c/p\u003e"]},"provider":[{"id":"https://2020clirevents.aviaryplatform.com/aboutus","type":"Agent","label":{"en":["CLIR"]},"homepage":[{"id":"https://2020clirevents.aviaryplatform.com/","type":"Text","label":{"en":["CLIR"]},"format":"text/html"}],"logo":[{"id":"https://d9jk7wjtjpu5g.cloudfront.net/organizations/logo_images/000/000/118/original/CLIRforSocial_round.jpg?1604439777","type":"Image"}]}],"thumbnail":[{"id":"https://d9jk7wjtjpu5g.cloudfront.net/collection_resource_files/thumbnails/000/100/695/small/247-00.png?1604535279","type":"Image","format":"image/png"}],"items":[{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695","type":"Canvas","label":{"en":["Media File 1 of 3 - 1605126988_00-Chen_Kinnaman_Tuttle_DigiPres2020-stream.mp4"]},"duration":802.132,"width":640,"height":360,"thumbnail":[{"id":"https://d9jk7wjtjpu5g.cloudfront.net/collection_resource_files/thumbnails/000/100/695/small/247-00.png?1604535279","type":"Image","format":"image/png"}],"items":[{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/content/1","type":"AnnotationPage","items":[{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/content/1/annotation/1","type":"Annotation","motivation":"painting","body":{"id":"https://aviary-p-2020clirevents.s3.wasabisys.com/collection_resource_files/resource_files/000/100/695/original/1605126988_00-Chen_Kinnaman_Tuttle_DigiPres2020-stream.mp4?1605108988","type":"Video","format":"video/mp4","duration":802.132,"width":640,"height":360},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695","metadata":[]}]}],"annotations":[{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768","type":"AnnotationPage","label":{"en":["English [Transcript]"]},"items":[{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/1","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"Hello everyone. Thank you for listening to our \ntalk on a cloud-based serverless microservices  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=3.52,8.56"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/2","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"application for digital preservation. \nThis is presented by Yinlin Chen and  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=8.56,14.72"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/3","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"Alex Kinnaman from Virginia Tech with support \nfrom James Tuttle also at Virginia Tech.\n  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=14.72,18.96"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/4","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"So today, just a quick run-through of what we'll \nbe chatting about we'll give an introduction to  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=24.56,30.24"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/5","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"what we're doing at Virginia Tech. A bit about \nour problem statement and then our solution to  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=30.24,35.6"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/6","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"those problems; an overview of our entire \ninfrastructure, as well as an overview of  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=35.6,42.08"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/7","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"the performance and cost of our decisions; and \nthen finally a conclusion and future work.\n  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=42.08,47.12"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/8","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"So digital preservation, as we \nall know, combines policies,  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=50.32,53.36"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/9","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"strategies, and actions that enable \naccess to digital content over time.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=53.36,58.08"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/10","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"And the digital preservation strategies and \nactions address content creation, integrity,  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=58.08,62.64"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/11","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"and maintenance. Since content integrity \nincludes verification methods and routine audits,  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=62.64,68.08"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/12","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"we want to make sure that we can check the fixity \nof the content held on our preservation storage  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=68.72,72.8"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/13","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"systems at regular intervals. This helps us to \nmaintain logs of fixity information and supply  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=72.8,79.2"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/14","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"audits on demand. And also it helps us, helps our \nability at home to detect any corrupt data.\n  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=79.2,85.84"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/15","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"So this is an overview of the Virginia Tech \nDigital Library Platform, which we're in the  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=88.08,92.88"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/16","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"process of building. And you can see that it is \nbuilt on several different types of microservices.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=92.88,99.92"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/17","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"And for this talk will be focusing specifically \non the fixity service. And we'll get a little  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=99.92,105.2"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/18","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"bit more into this diagram as the presentation \nmoves on. But this gives you kind of an idea of  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=105.2,110.96"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/19","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"the overall system that we're working within.\n \nSo our issue is that we needed to move our data  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=110.96,118.72"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/20","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"from on-premise servers to cloud vendors in order \nto reduce all of our maintenance obligations and  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=118.72,124.32"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/21","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"significantly reduce expense. Cloud vendors such \nas AWS S3 and Azure Storage advertise a 99.9999,  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=124.32,133.52"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/22","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"et cetera, of data durability of objects over \na given year. However, we want to make sure  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=133.52,139.28"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/23","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"that we can verify the data from these black box \nvendor systems. We want to make sure there's file  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=140.08,146.08"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/24","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"integrity from our version to their version and \nalso get notifications when custom events occur.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=146.08,151.84"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/25","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"So our solution is to develop a cloud-based, \nserverless and microservice application. This  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=154.48,160.32"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/26","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"allows us to run large amounts of fixity creation \nand validation asynchronously. And it also allows  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=160.32,166.48"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/27","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"routine checking based on the policies that we \nhave defined in our digital libraries department.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=166.48,171.04"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/28","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"The system can scale up and down depending \non the amount of content and we need to move.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=172.0,176.16"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/29","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"And this way, what's really helpful to us is \nthat we only have to use the resources that are  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=177.6,184.32"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/30","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"required. We don't have to buy one package \nand only use a few things. We can tailor  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=184.32,188.72"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/31","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"individual components. And of course, we \nwant to make this as automatic as possible  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=188.72,194.24"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/32","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"without any human intervention or as much \nsystem maintenance. So now I will pass  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=194.24,199.28"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/33","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"it to Yinlin Chen to get a deeper sense of \nthe system and the performance and cost.\n  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=199.28,204.48"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/34","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"The entire infrastructure is serverless. \nWe use all our AWS managed services.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=211.04,218.08"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/35","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"In this infrastructure we have two parts. One \npart is we implement several microservices  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=220.64,229.52"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/36","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"to do the fixity work to retrieve the \nfiles from the S3 and do the fixity check  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=230.24,236.4"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/37","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"using microservice. Here we implement three \nLambda to do that. Another part is to base  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=237.12,245.76"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/38","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"on the policy we implemented, we \ncan define different rules and  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=245.76,250.32"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/39","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"generate a report using AWS Athena. Finally, \nbased on the result we can notify our users.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=251.28,262.8"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/40","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"Inside this serverless fixity work, we have three \nLambda functions implemented in microservice.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=265.52,272.48"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/41","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"They connect to each other and put inside AWS \nstep functions. These functions read the files  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=272.48,286.64"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/42","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"in the S3 bucket, then first they retrieve files \nfrom S3. Then after the file is retrieved then  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=286.64,296.72"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/43","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"it will compute the checksum, we use MD5. Then \nfinally we validate the checksum with the original  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=296.72,304.24"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/44","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"MD5 and to make sure it's working or not, and \nthen we save all results into another S3 bucket  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=305.36,314.8"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/45","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"to further analyze these local data. We can \nuse the Command Line to trigger this process,  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=315.44,324.0"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/46","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"or we can use the Web Interface \nto trigger this process too.\n  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=324.0,329.2"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/47","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"This is our step function workflow. So \nhere, we can see these green colors are  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=331.76,340.56"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/48","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"functions. So we can see we retrieve files \nand if the file is retrieved successfully,  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=341.28,348.8"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/49","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"then we will do the checksum compute, then \nfinally do a validation of the checksum.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=348.8,357.68"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/50","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"You can see here we can wait four minutes, \n12 hours, or four hours. This is because  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=358.8,364.96"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/51","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"we have all kinds of different files and \nsome files have storage being AWS standard  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=364.96,374.8"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/52","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"storage, which we can achieve very quickly. \nSome files because these are archive files,  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=376.32,382.24"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/53","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"so we store in S3 Glacier. So these files will \ntake at least 12 hours to retrieve. We support  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=382.24,392.0"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/54","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"multiple scenarios. So no matter \nif this file is stored in S3  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=392.0,397.92"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/55","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"Standard or Glacier, we're able to retrieve this \nand do the fixity check. So this is the workflow  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=398.96,405.84"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/56","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"for the fixity check.\nWe can see in S3 we then \nhave different storage classes from Standard to S3  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=405.84,417.2"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/57","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"Glacier Deep Archive. For the Standard we can \nretrieve very quickly, in less than four minutes,  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=417.2,426.4"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/58","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"for the Glacier we will need 12 hours. You \ncan also see the pricing here. The files  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=426.4,436.56"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/59","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"stored in Glacier are cheaper than, for instance, \nStandard. So based on different kinds of files,  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=436.56,445.2"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/60","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"we store in different S3 storage classes. We all \ncan do the fixity check for all these files.\n  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=445.2,452.4"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/61","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"This is a history of all the step function events. \nYou can see every operation will have a timestamp,  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=455.6,465.92"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/62","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"and you can see the total execution time. For this \nexample it's 39 seconds. We can see all the Lambda  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=466.56,477.04"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/63","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"functions being excused, and also we can see all \nthe Lambda logs through the CloudWatch logs. Every  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=477.04,486.16"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/64","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"single step is recorded, so we can get all \nthe information we need to do the analysis.\n  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=487.04,494.4"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/65","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"This is another part. We have different \npreservation rules. We can base on what rule we  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=497.52,503.44"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/66","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"implement Lambda. Which rule is the condition \ninside this Lambda, and we connect this with the  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=503.44,511.28"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/67","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"CloudWatch. So based on rules we choose the Lambda \nfunctions and they will query the data in the  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=512.0,522.32"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/68","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"Amazon Athena. From the log \nfiles we can do the queries and  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=523.44,529.68"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/69","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"based on that rule we decide to trigger the \nstep function and to do the fixity work.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=530.8,537.92"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/70","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"We also apply the Amazon SQS so we can \nsupport to process thousands of files  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=540.0,549.36"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/71","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"all at the same time, concurrently. So no \nmatter if today we need to do a fixity for  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=552.4,560.16"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/72","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"100 files or 1,000 files or 10,000 files, \nwe use this approach to support that. We can  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=560.16,567.04"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/73","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"do fixity checks for thousands of files very \nquickly.\nWe record everything, all the fixity  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=567.84,577.76"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/74","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"results in the S3 and we use \nAWS Athena service so we can  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=580.24,586.56"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/75","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"use basic SQL queries to gather all the \ninformation we need. Even with very complex  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=588.96,595.36"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/76","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"scenarios, we can be able to write a SQL query to \nget a report we need. You can see here we recorded  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=595.36,603.84"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/77","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"how long it will take, and all the information \nif this fixity check is matched or not, when is  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=607.6,615.44"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/78","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"fixity check being done. This table shows all the \ninformation. Underlying this is just a text file  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=615.44,625.84"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/79","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"stored in S3, but we can be using the AWS Athena \nservice. We can see it's like the database, and  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=625.84,633.92"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/80","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"we can use a simple query to query all of these \nthat we need.\nThis is the performance. We test  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=633.92,641.04"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/81","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"from size 3 megabyte to 800 megabyte. Our \ncurrent setup is focused on image data.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=643.04,652.24"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/82","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"The largest ones are about 800mb. Then in the \nfuture we will try to do these on video data. So  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=653.28,661.28"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/83","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"video data can be GB and then we will do that in \nthe future. You can see here we process over 1,000  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=661.28,667.76"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/84","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"data just in 40 seconds. We can do this very \nquickly.\nThen the cost, because right now the  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=667.76,676.32"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/85","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"state of our preservation data is around 500 \ngigabyte. Most of this is just free to us because  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=678.64,687.36"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/86","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"AWS has free tiers, so most of it costs nothing \nto us. We do several tests based on this data  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=687.36,696.88"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/87","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"to do multiple different kinds of survey tests, \nlike multiple rounds, and all the cost we'll need  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=698.56,706.08"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/88","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"something like $2. It's nothing to us because we \nuse serverless infrastructure. If we don't use the  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=706.08,716.0"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/89","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"resources, we don't need to pay anything. We \ntried to compare this if we use instances, but we  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=716.0,724.8"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/90","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"thought that we don't want to waste this kind of \nmoney because instances are charged by hours. Also  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=724.8,731.28"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/91","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"we need to write a program to support thousands of \nfiles concurrently and it will take a lot of work.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=731.28,738.32"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/92","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"We don't want to spend the time or money on that. \nBut you can see the costs are very low to us.\n  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=738.32,744.08"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/93","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"Conclusion. You can see using this approach \nwe save a lot of time, and based on our rules  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=745.92,752.16"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/94","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"we have a lot of flexibility because it's just \nmultiple microservices communicating together.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=752.16,757.84"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/95","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"We can tune performance and cost for each single \nmicroservice. There are many combinations which  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=758.64,768.48"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/96","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"we can choose and we can test to reduce the cost \nof that improvement performance.\nIn the future,  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=768.48,775.12"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/97","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"we will try to support more preserving data, like \nvideos, and continue improving our performance,  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=775.12,782.4"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/98","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"and reduce costs, and then write reporting \ntools for AWS Athena, and support more  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=783.52,790.88"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/99","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"different kinds of rules. Right now we just \nsupport some basic rules but in the future  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=790.88,794.96"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/100","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"we will support more kinds of rules and policies \nfor the preservation. That's it. Thank you.","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695#t=794.96,799.84"}]},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768","type":"AnnotationPage","label":{"en":["English [Transcript]"]},"items":[{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100695/transcript/20768/annotation/101","type":"Annotation","motivation":"subtitling","body":{"type":"TextualBody","value":"https://d9jk7wjtjpu5g.cloudfront.net/file_transcripts/associated_files/000/020/768/original/00-247_captions.vtt?1604535283","format":"text/vtt","language":"en"},"target":"https://d9jk7wjtjpu5g.cloudfront.net/file_transcripts/associated_files/000/020/768/original/00-247_captions.vtt?1604535283"}]}]},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696","type":"Canvas","label":{"en":["Media File 2 of 3 - open-uri20201105-6716-dayerd.mp4"]},"duration":905.56,"width":640,"height":360,"thumbnail":[{"id":"https://d9jk7wjtjpu5g.cloudfront.net/collection_resource_files/thumbnails/000/100/696/small/247-01.png?1604535284","type":"Image","format":"image/png"}],"items":[{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/content/1","type":"AnnotationPage","items":[{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/content/2/annotation/1","type":"Annotation","motivation":"painting","body":{"id":"https://aviary-p-2020clirevents.s3.wasabisys.com/collection_resource_files/resource_files/000/100/696/original/open-uri20201105-6716-dayerd.mp4?1604535284","type":"Video","format":"video/mp4","duration":905.56,"width":640,"height":360},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696","metadata":[]}]}],"annotations":[{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769","type":"AnnotationPage","label":{"en":["English [Transcript]"]},"items":[{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/1","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"Hi, everyone. My name is Bethany Scott and \nI am the digital projects coordinator at the  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=2.24,8.08"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/2","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"University of Houston libraries. I'm going to be \ntalking today about the process of assessing and  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=8.08,15.28"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/3","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"ultimately improving our electricity consumption \nfor our digital preservation activities.","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=15.28,21.12"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/4","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"So I'll start off by giving a little bit of \nbackground about digital preservation at UH.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=23.76,30.16"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/5","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"So, our first digital preservation policy \nwas drafted and approved in 2014 to 2015.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=31.44,37.68"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/6","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"And we started using Archivematica in production \nin 2018 for our preservation storage. We now have  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=38.32,47.28"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/7","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"nearly 5000 packages at about 30 terabytes in \nstorage. And the content that we have stored in  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=47.28,55.6"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/8","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"Archivematica is almost all digitized material \ncreated by the UH library's digitization unit  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=55.6,63.92"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/9","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"for our online access repositories.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=65.28,67.84"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/10","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"We also have quite a bit of film and video \ncontent, which was done through outside  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=69.12,74.8"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/11","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"vendors. And of course, that accounts for \nsome of the largest file sizes in storage.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=74.8,80.8"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/12","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"But despite making some progress with just even \nsetting up our digital preservation program,  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=82.64,88.96"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/13","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"until recently, we had not yet considered \nthe long term sustainability of the program  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=89.6,95.44"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/14","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"from an environmental standpoint. To address \nthis, I gathered information on our technology  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=96.0,102.72"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/15","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"infrastructure, and its energy expenditures. \nAnd I evaluated the requirements of our digital  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=102.72,109.36"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/16","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"preservation policy to identify areas where \nthe overall sustainability of the program  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=109.36,115.44"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/17","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"may be improved in the future by \nmodifying our current practices.","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=116.0,120.08"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/18","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"So there are three components to the carbon \nfootprint analysis that I conducted. First,  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=122.96,129.92"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/19","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"I did an inventory of the hardware and equipment \nused for digital preservation activities.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=129.92,135.44"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/20","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"Then I calculated the electricity \nused by the local equipment.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=136.56,140.64"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/21","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"And finally, I investigated offsite storage \nproviders' sustainability policies and practices.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=141.44,147.52"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/22","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"And I actually recently wrote a couple \nof blog posts for the Society of American  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=149.2,154.16"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/23","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"Archivists Electronic Records Section that \ngoes over the hardware inventory and the  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=154.16,160.4"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/24","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"electricity calculation. It goes into a lot of \ndetail there. I will give a quick summary right  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=161.04,169.28"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/25","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"now, and that way, we can spend more time on the \noffsite storage providers in this presentation.","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=169.28,175.36"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/26","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"But if you would like to read \nabout the electricity consumption  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=176.32,180.24"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/27","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"estimation, please check out the \nblog posts, they're linked here.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=180.24,184.8"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/28","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"Part one is available at bit.ly slash erspost1, \nand part two is available bit.ly slash erspost2.","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=185.6,197.4"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/29","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"So my first step on understanding our \ncarbon footprint was to get an idea of  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=206.32,213.2"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/30","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"the components of the local infrastructure \nthat support digital preservation at UH.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=213.2,218.64"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/31","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"So I worked with our system administrator to \nfigure out what pieces of equipment are used for  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=219.76,226.24"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/32","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"processing, storage, networking, and \nother elements of the overall digital  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=226.24,231.68"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/33","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"preservation program, we came up with this \ninventory. So from the inventory list,  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=231.68,238.72"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/34","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"we looked at the technical documentation \nfor these specific hardware components  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=238.72,244.96"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/35","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"to determine the likely maximum power \nconsumption for the hardware. We also  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=245.76,251.84"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/36","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"estimated the power draw while idle, and then a \nrange of partial power draw scenarios in between.","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=251.84,259.76"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/37","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"So here are the findings.","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=262.24,264.4"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/38","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"And as you can see, the estimated \nmaximum greenhouse gas emissions derived  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=265.0,272.08"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/39","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"from power use for the digital preservation \nhardware is around 124,000 pounds annually.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=272.08,280.64"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/40","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"And that equates to 56.3 metric tons. So that \nis the greenhouse gas emissions from our digital  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=281.36,291.52"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/41","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"access and preservation hardware annually. To \nput this in perspective, it's equivalent to  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=291.52,299.12"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/42","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"the greenhouse gas emissions from about 140,000 \nmiles driven by an average passenger vehicle.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=299.12,306.16"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/43","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"It's also equivalent to the carbon dioxide \nemissions from over 62,000 pounds of coal burned.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=307.36,313.12"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/44","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"Now, this is just an estimate for the time being. \nBut when we are able to go back to work on campus,  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=314.08,321.12"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/45","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"I am planning to get some actual power draw \nnumbers directly from the equipment and the  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=321.12,325.6"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/46","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"server room to continue this research. And \nI'll talk more about that in a little bit.","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=325.6,330.88"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/47","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"But of course, this energy use only accounts for \nour local storage. So we also have to account for  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=334.0,341.12"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/48","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"offsite storage, which would be another factor \nand an organization's overall carbon footprint.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=341.12,346.8"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/49","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"As we all know, lots of institutions' \ndigital preservation policies require  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=348.24,353.44"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/50","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"them to keep multiple copies of their digital \nassets in different geographic locations. And  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=353.44,359.76"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/51","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"there are several offsite storage providers \nthat we can use to address those requirements.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=359.76,364.72"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/52","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"The Big Three commercial providers are Amazon AWS, \nGoogle Cloud, and Microsoft Azure. And then there  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=365.44,375.76"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/53","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"are also non-commercial providers from the \ncultural heritage sector, such as Chronopolis at  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=375.76,382.0"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/54","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"UC San Diego, and the MetaArchive Cooperative, \nwhich is supported by the Educopia Institute.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=382.0,388.64"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/55","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"Since the cultural heritage options were created \nby and for digital preservation practitioners,  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=390.88,396.32"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/56","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"there tends to be a bit more transparency \nabout their standards and procedures.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=396.96,402.16"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/57","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"And there could be additional services that they \nprovide, such as geographic distribution built in  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=402.8,410.64"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/58","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"as well. And also, the cultural heritage options \ntend to be hosted out of higher ed institutions,  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=410.64,418.72"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/59","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"many of which are already moving towards \ncarbon neutral sources of energy on campus,  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=419.36,424.64"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/60","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"and which have a lot of documentation about their \nsustainability practices at the campus level.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=425.44,431.12"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/61","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"So for example, Chronopolis is hosted out \nof UC San Diego, and data in the Chronopolis  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=432.08,438.64"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/62","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"network is also replicated at the University of \nMaryland Institute for Advanced Computer Studies,  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=438.64,445.04"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/63","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"and also the Texas Digital Library. Each of these \ninstitutions employ sustainability officers, and  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=445.84,454.4"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/64","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"have stated commitments and action plans to move \ntowards carbon neutrality in the coming years.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=454.4,460.8"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/65","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"For instance, UC San Diego is committed to \nreducing its energy use intensity on campus,  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=461.84,467.92"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/66","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"and obtaining 100% clean electricity \non campus by 2025. Not too far off.","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=468.64,475.36"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/67","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"Regarding the commercial providers, \nall three have also taken actions  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=477.44,482.56"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/68","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"to reduce their carbon footprint, some more \neffective than others. As an entire company,  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=482.56,489.6"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/69","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"Microsoft has been carbon neutral since 2012. And \nin fact, I believe they are now carbon negative,  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=489.6,496.72"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/70","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"actually. Google is carbon neutral too which they \nachieved through improving the energy efficiency  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=496.72,504.88"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/71","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"of their data centers, and through constructing \nor purchasing renewable energy sources.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=504.88,510.88"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/72","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"And Google actually provides a fair amount \nof transparency too through their annual  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=512.32,518.48"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/73","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"sustainability and environmental reports that they \nrelease every year. And then that leaves Amazon.","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=518.48,525.6"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/74","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"Unfortunately, Amazon remains one of the \nleast transparent companies in the world  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=526.88,531.36"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/75","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"in terms of environmental performance, but \nit is by far the biggest provider which  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=531.92,537.2"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/76","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"is demonstrated on this chart. So this is \nvisualizing the power consumption for data  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=538.0,544.32"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/77","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"service providers in Virginia, where the Amazon \nUS East region is located. This is the core of  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=544.32,552.0"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/78","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"the AWS global infrastructure. As you can see \nfrom the chart, Amazon consumes three times more  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=552.0,560.24"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/79","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"electricity as the next highest service provider. \nAnd actually Amazon's power demands in Virginia,  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=560.24,567.6"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/80","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"increased by 60%, between 2017 and 2019, but \nthey did not add any new renewable energy.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=568.24,577.04"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/81","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"So as a result, the data centers in Virginia are \nonly 12% renewable powered, and this is in pretty  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=578.0,586.4"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/82","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"stark contrast to their stated commitment \nto be 100% renewable powered by 2040.","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=586.4,592.64"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/83","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"And in an open letter from September \n2019 Amazon Employees For Climate  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=595.04,600.72"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/84","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"Justice highlighted their complaints \nabout the company's performance, citing  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=600.72,605.84"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/85","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"the intensive use of fossil fuels throughout the \nbusiness, polluting communities, actively helping  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=606.56,613.28"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/86","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"oil and gas companies accelerate extraction \nof new reserves, and funding climate denying  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=613.28,619.52"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/87","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"think tanks and 68 members of Congress who voted \nagainst climate legislation 100% of the time.","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=619.52,626.08"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/88","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"Now, some of us may have to use AWS for backups \ndue to its low cost. I know we fall into that  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=628.56,636.72"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/89","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"category. So I want to give a little silver lining \nhere: there are five AWS regions that they have  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=636.72,644.56"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/90","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"stated to be carbon neutral already, including \nthe US West in Oregon, and two regions in Europe.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=644.56,652.64"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/91","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"So if you have to use AWS, you could \nconsider choosing one of the carbon  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=653.92,658.96"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/92","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"neutral regions if you are setting up \nyour AWS storage for the first time,  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=658.96,663.68"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/93","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"or you could switch from US \nEast to US West as an example.","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=664.48,668.4"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/94","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"So as I mentioned before, there are some \nadditional things that I would like to  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=671.76,676.08"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/95","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"research further regarding our carbon footprint, \nand how we might be able to reduce it over time.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=676.08,681.84"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/96","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"First off, along with our digitization unit, I \nam looking into a tiered approach to selecting  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=684.32,690.88"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/97","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"appropriate preservation file formats. So for \nexample, we might choose a high res full color  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=690.88,698.96"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/98","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"TIFF as the preservation master for an object like \na medieval manuscript, where researchers need to  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=698.96,706.08"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/99","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"investigate the minute details in the parchment \ntexture, the ink and decoration and so on.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=706.08,712.88"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/100","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"But for something like a folder of \n20th century typed correspondence,  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=714.4,718.72"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/101","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"where the value is more informational, rather \nthan artifactual, we could decide that a lower  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=719.28,726.4"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/102","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"resolution file or even a PDF is sufficient. \nThis would reduce the amount of storage space,  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=726.4,733.76"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/103","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"therefore allowing us to digitize more content \nat the same level of energy use for storage.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=734.32,740.4"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/104","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"Also, when we get back to work on campus, I \nwould like to capture actual point in time  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=743.36,750.08"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/105","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"power consumption figures from our local hardware \ninfrastructure. And then I'd like to look at  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=750.08,756.48"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/106","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"whether we can reduce our power consumption \nby changing our preservation processes.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=756.48,761.6"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/107","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"So as an example, when we do resource intensive \nactions or computations, like a fixity check,  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=762.64,770.0"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/108","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"does that actually cause our power consumption \nto go up significantly? And if so, what is the  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=770.56,778.08"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/109","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"balance between an appropriate schedule of fixity \nchecking to confirm the integrity of our files,  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=778.08,785.44"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/110","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"while also ensuring that our carbon footprint \nand our energy use doesn't spike too much.","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=785.44,791.2"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/111","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"So finally, to wrap up, here are a couple \nof ideas about what you can do if you're  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=794.32,799.36"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/112","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"interested in these issues. First off, contact \nyour sustainability office. Many institutions of  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=799.36,806.24"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/113","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"higher ed have a sustainability office and a team \nwho's already working on this, who can provide  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=806.24,812.16"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/114","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"information on current and future sustainability \nefforts going on at your institution.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=812.16,817.04"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/115","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"They also may be able to provide you with \ninsights on the sources of electricity on campus.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=818.24,824.24"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/116","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"So even if your campus is not \nyet 100%, renewable powered,  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=824.96,830.24"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/117","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"there might be portions of the grid that are, \nyou might be able to encourage renewable energy  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=830.24,836.24"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/118","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"sources for buildings where your local \nstorage infrastructure is housed.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=837.12,842.64"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/119","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"And finally, for AWS users, there is a free tool \ncalled Green Cost Explorer, which provides a  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=844.24,852.32"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/120","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"breakdown of what percentage of your AWS service \nis green or carbon neutral versus not carbon  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=852.32,859.68"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/121","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"neutral. And so this could give you some insights \ninto whether your use of AWS is carbon heavy,  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=859.68,867.28"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/122","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"and where you might be able to improve by \nswitching to a different region or service of AWS.","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=867.28,874.4"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/123","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"All right, well, thank you very much for your \nattention. I look forward to chatting about  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=877.6,882.8"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/124","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"it with y'all on slack. And if you have any \nadditional questions or feedback on this at a  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=882.8,888.72"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/125","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"later date, please feel free to email me anytime. \nMy email address is bscott3@uh.edu. Thanks again.","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696#t=888.72,905.44"}]},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769","type":"AnnotationPage","label":{"en":["English [Transcript]"]},"items":[{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100696/transcript/20769/annotation/126","type":"Annotation","motivation":"subtitling","body":{"type":"TextualBody","value":"https://d9jk7wjtjpu5g.cloudfront.net/file_transcripts/associated_files/000/020/769/original/01-247_captions.vtt?1604535287","format":"text/vtt","language":"en"},"target":"https://d9jk7wjtjpu5g.cloudfront.net/file_transcripts/associated_files/000/020/769/original/01-247_captions.vtt?1604535287"}]}]},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697","type":"Canvas","label":{"en":["Media File 3 of 3 - open-uri20201105-6716-1qh2fr.mp4"]},"duration":910.72,"width":640,"height":360,"thumbnail":[{"id":"https://d9jk7wjtjpu5g.cloudfront.net/collection_resource_files/thumbnails/000/100/697/small/247-02.png?1604535289","type":"Image","format":"image/png"}],"items":[{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/content/1","type":"AnnotationPage","items":[{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/content/3/annotation/1","type":"Annotation","motivation":"painting","body":{"id":"https://aviary-p-2020clirevents.s3.wasabisys.com/collection_resource_files/resource_files/000/100/697/original/open-uri20201105-6716-1qh2fr.mp4?1604535289","type":"Video","format":"video/mp4","duration":910.72,"width":640,"height":360},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697","metadata":[]}]}],"annotations":[{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770","type":"AnnotationPage","label":{"en":["English [Transcript]"]},"items":[{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/1","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"Hello, my name is Zhiwu Xie. I work at \nVirginia Tech Libraries as a professor  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=2.96,8.08"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/2","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"and chief strategy officer. The title of my \ntalk is Choosing Archival Format By Machine  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=8.08,14.24"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/3","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"Consumption Efficiency . This talk sums up major \nfindings of our IMLS funded research project  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=14.24,20.96"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/4","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"Developing Library Cyberinfrastructure \nStrategy for Big Data Sharing and Reuse .\n  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=21.52,27.56"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/5","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"The central thesis of my talk can be summed up as \nfollows: to preserve big data, we must carefully  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=28.32,34.96"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/6","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"choose an efficient archival format for reuse. \nThis typically leads to a binary data format  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=34.96,40.8"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/7","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"that is optimized for efficient processing in \nbig data systems. An efficient format is usually  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=40.8,47.6"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/8","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"not for human consumption, therefore eliminates \nall textual data formats from XML, JSON, to CSV.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=47.6,54.96"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/9","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"Although counterintuitive, the \nperformance gain of the efficient  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=55.76,59.12"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/10","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"archival format can be a life-and-death decision \nfor an archival system. Why? Because with the  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=59.92,66.4"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/11","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"rapid growth of archival data sizes, the \nuse cases are changing, and poor performance  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=66.4,72.48"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/12","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"can render traditional archival systems \nunusable.\nNow let's dive into more details.\n  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=72.48,78.24"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/13","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"Today, big data preservation is no longer \nmerely a buzzword in libraries and archives.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=80.64,86.72"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/14","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"The flagship scholarly repositories have grown \nsignificantly in sizes in the past decade.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=86.72,92.88"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/15","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"Moreover, not just the big science projects, but \nsmall science teams and regular guys like you and  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=93.52,99.12"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/16","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"me can now easily collect tens to hundreds TB of \nresearch data that worth preserving and reusing.\n  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=99.12,107.68"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/17","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"The growth of the data size not only exceeds the \nstorage capacity of our laptops and desktops,  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=108.88,114.88"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/18","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"the more challenging aspect is that even if we \ncan beef up our local storage, we usually don t  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=114.88,121.84"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/19","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"have sufficient processing power to make sense of \nthem. This leads to a paradigm shift in archival  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=121.84,128.24"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/20","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"systems, from a repository centric view to a \ndata centric view.\nLet us look at the OAIS Model.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=128.24,136.8"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/21","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"Here, consumers gain access to the archive through \nan Access module that essentially do two things:  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=137.44,144.16"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/22","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"1) responds to user queries and 2) delivers \nDissemination Information Package (DIP).  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=145.04,152.16"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/23","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"A traditional repository takes a narrower \ninterpretation of this model, and will only  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=153.12,158.16"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/24","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"respond to prescribed, simple queries like \nthose against the predetermined metadata.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=158.16,163.36"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/25","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"If you want answers beyond those simple queries, \nsorry here s the DIP, download and go figure it  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=164.32,171.04"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/26","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"out by yourself.\nA traditional repository works \nfine if the size of the DIP is within the end  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=171.04,179.04"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/27","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"users data processing capacity. But in big \ndata scenarios, this is usually not the case.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=179.04,185.2"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/28","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"For example, only in Sep 2020, Common \nCrawl has collected 3.45 billion web pages.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=186.08,193.2"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/29","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"The whole data set is available for download, \nbut who can easily spin up a cluster to query  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=193.76,199.28"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/30","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"3.45 billion web pages? Not me.\nIt is therefore \nnecessary to shift to a data centric view, where  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=199.28,208.0"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/31","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"the preservation systems not only need to provide \narchival storage and basic queries facilities,  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=208.0,214.48"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/32","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"but also user programmable computational resources \nto manipulate the archival data in place.\n  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=214.48,220.96"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/33","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"Along this new paradigm, Common Crawl hosts their \ndata sets on the Amazon Cloud, which provides  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=223.44,229.52"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/34","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"both the computational and storage capabilities \nto for consumers to conduct customized queries  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=229.52,237.84"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/35","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"beyond those simple, metadata queries. Now, we \ncan spin up virtual machines and write our own  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=237.84,243.52"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/36","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"code to answer questions like: how many times 9-11 \nis mentioned between Sep 10 6AM to Sep 12 6PM,  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=243.52,253.84"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/37","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"by webpages in the ac.uk domain. Such queries have \nbeen impossible for traditional repositories.\n  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=253.84,261.28"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/38","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"Similarly, although HTRC does not provide DIP \ndownloads, it does provide the computational  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=263.68,270.72"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/39","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"infrastructure to run customized queries \nagainst the copyrighted book scan corpus.\n  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=270.72,276.08"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/40","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"Now that we have some basic understanding on \nbig data preservation system, let us turn to  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=278.64,284.08"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/41","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"data format. Why is data format important? \nBecause we found out through experiments,  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=284.08,290.4"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/42","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"even equipped with similar big data \nprocessing systems, the same query  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=290.4,295.12"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/43","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"could take 40 seconds to complete if the data \nare preserved in one format but would take  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=295.12,300.72"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/44","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"almost 4000 second to complete if archived \nin a different format.\nOur research project  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=301.44,309.2"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/45","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"was initially intended to benchmark different \nbig data archival and processing systems and  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=309.2,314.8"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/46","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"optimize their performance by adjusting settings \nand configurations. To our surprise, however,  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=314.8,321.92"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/47","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"stunning performance gain can be achieved not \nby changing the system settings but by changing  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=321.92,327.6"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/48","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"archival data formats. And the culprit is usually \nthe friendly, human readable textual format,  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=327.6,333.76"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/49","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"including but are not limited to XML, JSON, \nand CSV. Why? I ll show you a few examples\n  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=335.76,342.56"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/50","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"Look at this timestamp. If encoded in text/string,  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=345.2,350.0"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/51","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"here is how it s translated into binary \nand stored in a computer. But if it  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=350.0,356.56"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/52","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"s encoded in integer which signifies \nthe number of second from a epoch time,  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=358.0,363.28"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/53","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"it s stored in this integer, which translates \ninto this binary form, which is much shorter.\n  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=363.28,369.52"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/54","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"The size of the storage is not the only issue. \nA computer can easily compare two integers and  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=371.68,378.4"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/55","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"knows which is larger therefore which a later \ntimestamp. It cannot easily tell which string  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=378.4,385.44"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/56","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"signifies a later timestamp unless the \nstring is converted into an integer.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=385.44,389.76"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/57","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"Not only do we waste the storage space, but \nalso waste computational cycles to convert  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=390.56,396.64"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/58","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"human readable text to machine understandable \nintegers.\nHere s another example.\nIn the free  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=396.64,406.56"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/59","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"text, try to imagine how a computer finds the 4th \nsentence in the 2nd paragraph, or the 4th column  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=406.56,419.44"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/60","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"of the 2nd row in this table? Which is easier for \ncomputer?\nTo summarize, data format stipulates  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=421.28,433.92"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/61","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"how information is encoded, organized, and \nstored. Human readable formats can be difficult  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=433.92,439.92"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/62","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"for computer to make sense, and typically take \nmore storage and computational cycles to query.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=439.92,446.08"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/63","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"Although the computer becomes faster and \nstorage becomes cheaper, by definition  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=447.76,452.4"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/64","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"these growths are outpaced by the deluge of big \ndata. Small performance savings here and there  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=452.4,458.8"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/65","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"can therefore translated into major performance \ngains, e.g., from 4000 seconds to 40 seconds.\n  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=459.36,466.56"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/66","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"To understand why data formats \nmake such big a difference,  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=469.92,473.6"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/67","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"it is worth reviewing the rationale \nbehind modern big data systems.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=473.6,477.84"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/68","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"First, although these systems can \nquickly process large amount of data,  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=478.64,483.12"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/69","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"they assume the data are comprised of structurally \nrepetitive records with low interdependency.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=483.12,489.2"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/70","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"Under this assumption, large datasets can \nbe efficiently partitioned and replicated,  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=489.92,495.36"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/71","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"and processed in a scalable manner on commodity \ncomputers interconnected only by the network.\n  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=495.36,500.96"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/72","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"Nonetheless, basic physics law still applies \nhere. For example, network transmit is always  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=502.64,510.08"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/73","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"much slower than local data access, and \nspinning disk is much slower than memory.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=510.08,514.8"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/74","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"Scanning large contiguous data blocks on disk is \nalso more efficient than combining many random  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=515.68,521.76"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/75","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"accesses together. Modern big data systems \nrespect these constraints and work coherently  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=521.76,528.24"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/76","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"to leverage these constraints. That s why we have \ndata replication systems like Hadoop File System,  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=528.24,534.24"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/77","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"in-memory processing systems like Spark, and \ncolumnar databases like HBase. All these means  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=534.24,540.64"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/78","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"that these systems rely heavily on data locality, \nspace efficiency, and caching to work properly.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=540.64,548.32"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/79","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"If data are not formatted to these rationale, \nthen the big data systems cannot properly  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=548.32,553.44"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/80","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"leverage their built-in mechanism to effectively \nuse the computing resources, therefore the low  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=553.44,559.44"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/81","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"performance.\nTo better understand the logic behind \nour focus on data format, allow me to make an  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=559.44,567.36"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/82","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"analogy. Compare the differences between a modern \nsupply chain and my home organizing system. In my  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=567.36,575.04"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/83","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"home I much prefer organize all my stuff as in the \nlower figures. I can easily browse my belongings,  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=575.04,583.2"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/84","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"and find things I want. However, the human \nscale is not necessarily the most efficient one.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=583.2,589.84"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/85","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"Modern supply chain components such as the \ncontainer port, the industry warehouse,  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=594.4,599.52"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/86","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"and the bar code, QR code are not designed to be \nhuman friendly. I can not easily understand the  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=599.52,605.68"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/87","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"bar code, the QR code, or move a container. That \ns meant to be handled by specialized machine.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=605.68,611.6"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/88","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"Indeed, modern container port reportedly employs \nonly a handful of workers. Everything else is  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=612.4,619.2"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/89","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"automated for machines. But if I insist everything \nis labelled in plain English for my convenience,  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=619.2,625.76"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/90","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"the modern supply chain could have \nto be slowed down significantly.\n  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=625.76,629.68"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/91","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"For the same reason, we should not preserve \narchival data in a format that do not take  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=631.36,636.0"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/92","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"advantage of the efficiency requirements of modern \nbig data systems. That s like trying to move my  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=636.0,641.36"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/93","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"home closet and pantry across the pacific without \npacking them into packages and then containers.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=641.36,648.88"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/94","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"Ultimately the inefficient data format would \nbe not only awkward but also rather wasteful.\n  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=649.6,656.0"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/95","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"Turns out we are also not the only ones \nconcerned with data formats and performance.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=658.8,663.92"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/96","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"In recent years many widely adopted data formats \nhave been challenged for their efficiency and  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=664.56,670.0"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/97","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"performance when growing larger, the list \nincludes FITS, HDF5, to more widely adopted  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=670.0,685.2"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/98","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"JSON and CSV. Take JSON as an example, it has been \nshown that big data systems processing JSON data  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=685.84,694.0"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/99","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"spend more than 80% of the time on data parsing \nand loading.\nUsing the supply chain analogy,  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=694.0,700.72"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/100","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"this equals to spending the majority of the \ntime packing my closet and pantry to fit  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=700.72,705.84"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/101","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"stuffs in shipping containers, and then spend \nless than 20% of the time on shipping them.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=705.84,710.56"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/102","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"If that is the case, why don t we just store all \nour belongings in shipping containers to cut the  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=711.6,716.0"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/103","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"cost? This is indeed our proposal. But what if we \nneed our shirts and cooking utensils handy without  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=716.0,724.24"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/104","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"having to unpack them from the shipping container? \nTurns out this is not the case for archives.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=724.24,729.84"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/105","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"Most of the archival data are the so-called \ncold data. They live in preservation systems  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=730.64,739.44"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/106","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"and are accessed much less frequently \nthan data backing google search.\n  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=739.44,746.16"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/107","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"Now we quantify the performance gains of more \nefficient data format using web archives as  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=749.28,756.72"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/108","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"examples. We extract 3 typical web archiving reuse \nworkload, approximately mapped to data filtering,  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=756.72,764.24"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/109","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"analysis, and aggregation. We then compare \nthe performance of querying data formatted  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=764.24,770.08"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/110","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"in WARC or WARC with CDX index with the \nsame data re-formatted in Parquet and  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=771.52,776.8"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/111","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"Avro. WARC is an ISO standard and widely used by \nweb archives. It is, however, a textual format.\n  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=776.8,784.56"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/112","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"On the other hand, both Parqeut and Avro are open \ndata formats. Parquet originated from the Google  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=785.68,793.04"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/113","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"Dremel project, which defines a highly efficient \nalgorithm to flatten nested data structure  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=793.04,798.88"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/114","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"such as XML and JSON into a columnar store. The \nso-called columnar store organizes data in a way  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=798.88,806.32"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/115","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"that the values of the same column of all records \nare stored together for easier columnar queries.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=806.32,814.48"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/116","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"Avro, on the other hand, is a row store format. \nBig data systems have been designed to take  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=815.92,821.52"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/117","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"advantage of the predictable data structure \nand shortcuts afforded by both the columnar  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=821.52,828.0"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/118","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"and row stores.\nSo both Parquet and Avro are \nbinary formats, meaning many primitive data  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=828.0,836.16"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/119","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"types have been defined such that those data can \nbe stored in an efficient, machine native way,  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=836.16,842.16"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/120","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"analogous to using bar code or QR codes in \nmodern supply chain. The repeated parsing,  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=842.16,848.16"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/121","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"packing and unpacking is therefore eliminated. \nIn our benchmarking, we are able to achieve  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=848.16,853.76"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/122","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"performance gain in 1-2 orders of magnitude faster \nsimply by reformatting textual data formats.  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=853.76,860.56"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/123","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"Our results are by no means surprising. As \nearly as 2013, a group at UC Berkeley and  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=861.2,868.4"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/124","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"Cloudera achieved 28X speed up by reformatting \ngenomics data from SAM/BAM to a combination  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=868.4,875.68"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/125","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"of Parquet and avro. performance.\nTherefore, The \nreformatting basically rearrange the data in a way  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=875.68,883.2"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/126","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"that big data systems can leverage to achieve \nhigh performance.\nThis concludes my talk.\n  ","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=883.2,890.32"},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/127","type":"Annotation","motivation":"transcribing","body":{"type":"TextualBody","value":"This project is made possible by an IMLS \nand an NSF grant.\nThank you very much","format":"text/plain"},"target":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697#t=893.28,910.56"}]},{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770","type":"AnnotationPage","label":{"en":["English [Transcript]"]},"items":[{"id":"https://2020clirevents.aviaryplatform.com/collections/1191/collection_resources/31950/file/100697/transcript/20770/annotation/128","type":"Annotation","motivation":"subtitling","body":{"type":"TextualBody","value":"https://d9jk7wjtjpu5g.cloudfront.net/file_transcripts/associated_files/000/020/770/original/02-247_captions.vtt?1604535291","format":"text/vtt","language":"en"},"target":"https://d9jk7wjtjpu5g.cloudfront.net/file_transcripts/associated_files/000/020/770/original/02-247_captions.vtt?1604535291"}]}]}]}