fromioimportBytesIOfromurllib.requestimporturlopenimportlibrosafromtransformersimportQwen2AudioForConditionalGeneration,AutoProcessorprocessor=AutoProcessor.from_pretrained("Qwen/Qwen2-Audio-7B-Instruct")model=Qwen2AudioForConditionalGeneration.from_pretrained("Qwen/Qwen2-Audio-7B-Instruct",device_map="auto")conversation=[{"role":"user","content":[{"type":"audio","audio_url":"https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen2-Audio/audio/guess_age_gender.wav"},]},{"role":"assistant","content":"Yes, the speaker is female and in her twenties."},{"role":"user","content":[{"type":"audio","audio_url":"https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen2-Audio/audio/translate_to_chinese.wav"},]},]text=processor.apply_chat_template(conversation,add_generation_prompt=True,tokenize=False)audios=[]formessageinconversation:ifisinstance(message["content"],list):foreleinmessage["content"]:ifele["type"]=="audio":audios.append(librosa.load(BytesIO(urlopen(ele['audio_url']).read()),sr=processor.feature_extractor.sampling_rate)[0])inputs=processor(text=text,audios=audios,return_tensors="pt",padding=True)inputs.input_ids=inputs.input_ids.to("cuda")generate_ids=model.generate(**inputs,max_length=256)generate_ids=generate_ids[:,inputs.input_ids.size(1):]response=processor.batch_decode(generate_ids,skip_special_tokens=True,clean_up_tokenization_spaces=False)[0]
fromioimportBytesIOfromurllib.requestimporturlopenimportlibrosafromtransformersimportQwen2AudioForConditionalGeneration,AutoProcessorprocessor=AutoProcessor.from_pretrained("Qwen/Qwen2-Audio-7B-Instruct")model=Qwen2AudioForConditionalGeneration.from_pretrained("Qwen/Qwen2-Audio-7B-Instruct",device_map="auto")conversation=[{'role':'system','content':'You are a helpful assistant.'},{"role":"user","content":[{"type":"audio","audio_url":"https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen2-Audio/audio/glass-breaking-151256.mp3"},{"type":"text","text":"What's that sound?"},]},{"role":"assistant","content":"It is the sound of glass shattering."},{"role":"user","content":[{"type":"text","text":"What can you do when you hear that?"},]},{"role":"assistant","content":"Stay alert and cautious, and check if anyone is hurt or if there is any damage to property."},{"role":"user","content":[{"type":"audio","audio_url":"https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen2-Audio/audio/1272-128104-0000.flac"},{"type":"text","text":"What does the person say?"},]},]text=processor.apply_chat_template(conversation,add_generation_prompt=True,tokenize=False)audios=[]formessageinconversation:ifisinstance(message["content"],list):foreleinmessage["content"]:ifele["type"]=="audio":audios.append(librosa.load(BytesIO(urlopen(ele['audio_url']).read()),sr=processor.feature_extractor.sampling_rate)[0])inputs=processor(text=text,audios=audios,return_tensors="pt",padding=True)inputs.input_ids=inputs.input_ids.to("cuda")generate_ids=model.generate(**inputs,max_length=256)generate_ids=generate_ids[:,inputs.input_ids.size(1):]response=processor.batch_decode(generate_ids,skip_special_tokens=True,clean_up_tokenization_spaces=False)[0]